From d7714d84c0c13bbf816eaaac32693e4e75e58a87 Mon Sep 17 00:00:00 2001
From: Seunghoon Lee <lshqqytiger@naver.com>
Date: Sat, 13 Jul 2024 13:47:35 +0900
Subject: [PATCH] Add support of ROCm 6. (#27)

* Add support of ROCm 6.1.2 for Windows.

* Fix CI.

* Use llvm.sqrt.f64.
---
 .github/workflows/pr.yml               |     4 +-
 .github/workflows/rust.yml             |     4 +-
 comgr/README                           |     2 +-
 comgr/src/amd_comgr.rs                 |   646 +-
 comgr/src/lib.rs                       |     2 +-
 hip_common/src/lib.rs                  |     4 +-
 hip_runtime-sys/README                 |     2 +-
 hip_runtime-sys/build.rs               |     2 +-
 hip_runtime-sys/lib/amdhip64.def       |   561 -
 hip_runtime-sys/lib/amdhip64.lib       |   Bin 124886 -> 0 bytes
 hip_runtime-sys/src/hip_runtime_api.rs |  1009 +-
 hipfft-sys/README                      |     2 +-
 hipfft-sys/src/hipfft.rs               |    16 +-
 hiprtc-sys/README                      |     2 +-
 hiprtc-sys/src/hiprtc.rs               |    50 +-
 ptx/src/emit.rs                        |    12 +-
 ptx/src/test/spirv_run/mod.rs          |     2 +-
 rocblas-sys/README                     |     2 +-
 rocblas-sys/src/rocblas.rs             |  3591 +++++-
 rocsolver-sys/README                   |     2 +-
 rocsolver-sys/src/rocsolver.rs         |  1166 +-
 rocsparse-sys/README                   |     2 +-
 rocsparse-sys/src/rocsparse.rs         | 13215 ++++++++++++-----------
 zluda/src/impl/device.rs               |     8 +-
 zluda/src/impl/mod.rs                  |    48 +-
 zluda/src/impl/pointer.rs              |     2 +-
 zluda_blas/src/lib.rs                  |     8 +-
 zluda_sparse/src/lib.rs                |     2 +
 28 files changed, 11747 insertions(+), 8619 deletions(-)
 delete mode 100644 hip_runtime-sys/lib/amdhip64.def
 delete mode 100644 hip_runtime-sys/lib/amdhip64.lib

diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
index ea27540..8b5c550 100644
--- a/.github/workflows/pr.yml
+++ b/.github/workflows/pr.yml
@@ -5,7 +5,7 @@ on:
 
 env:
   CARGO_TERM_COLOR: always
-  ROCM_VERSION: "5.7.3"
+  ROCM_VERSION: "6.1.3"
 
 jobs:
   build_lin:
@@ -40,7 +40,7 @@ jobs:
           submodules: true
       - name: Install AMD HIP SDK
         run: |
-          C:\msys64\usr\bin\wget.exe https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-Win10-Win11-For-HIP.exe -O "amdgpu-install.exe"
+          C:\msys64\usr\bin\wget.exe https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-Win10-Win11-For-HIP.exe -O "amdgpu-install.exe"
           .\amdgpu-install.exe -Install -View:1
           Start-Sleep -Seconds 60
           $setupId = (Get-Process ATISetup).id
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 2df8c18..642a57e 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -5,7 +5,7 @@ on:
 
 env:
   CARGO_TERM_COLOR: always
-  ROCM_VERSION: "5.7.3"
+  ROCM_VERSION: "6.1.3"
 
 jobs:
   release:
@@ -87,7 +87,7 @@ jobs:
           submodules: true
       - name: Install AMD HIP SDK
         run: |
-          C:\msys64\usr\bin\wget.exe https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-Win10-Win11-For-HIP.exe -O "amdgpu-install.exe"
+          C:\msys64\usr\bin\wget.exe https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-Win10-Win11-For-HIP.exe -O "amdgpu-install.exe"
           .\amdgpu-install.exe -Install -View:1
           Start-Sleep -Seconds 60
           $setupId = (Get-Process ATISetup).id
diff --git a/comgr/README b/comgr/README
index 51959b9..e53a361 100644
--- a/comgr/README
+++ b/comgr/README
@@ -1 +1 @@
-bindgen .\include\amd_comgr.h --size_t-is-usize --must-use-type "amd_comgr_status_t" --no-layout-tests --no-derive-debug --default-enum-style=newtype --dynamic-loading LibComgr --dynamic-link-require-all -o src/amd_comgr.rs --whitelist-function="^amd_comgr_action_data_get_data$|^amd_comgr_action_info_set_isa_name$|^amd_comgr_action_info_set_option_list$|^amd_comgr_create_action_info$|^amd_comgr_create_data$|^amd_comgr_create_data_set$|^amd_comgr_data_set_add$|^amd_comgr_destroy_action_info$|^amd_comgr_destroy_data_set$|^amd_comgr_do_action$|^amd_comgr_get_data$|^amd_comgr_release_data$|^amd_comgr_set_data$|^amd_comgr_set_data_name$|^amd_comgr_action_info_set_language$|^amd_comgr_set_data_name$"
+bindgen $Env:HIP_PATH/include/amd_comgr.h --must-use-type "amd_comgr_status_t" --no-layout-tests --no-derive-debug --default-enum-style=newtype --dynamic-loading LibComgr --dynamic-link-require-all -o src/amd_comgr.rs --allowlist-function="^amd_comgr_action_data_get_data$|^amd_comgr_action_info_set_isa_name$|^amd_comgr_action_info_set_option_list$|^amd_comgr_create_action_info$|^amd_comgr_create_data$|^amd_comgr_create_data_set$|^amd_comgr_data_set_add$|^amd_comgr_destroy_action_info$|^amd_comgr_destroy_data_set$|^amd_comgr_do_action$|^amd_comgr_get_data$|^amd_comgr_release_data$|^amd_comgr_set_data$|^amd_comgr_set_data_name$|^amd_comgr_action_info_set_language$|^amd_comgr_set_data_name$"
\ No newline at end of file
diff --git a/comgr/src/amd_comgr.rs b/comgr/src/amd_comgr.rs
index cae7fa6..e1e6c5f 100644
--- a/comgr/src/amd_comgr.rs
+++ b/comgr/src/amd_comgr.rs
@@ -1,4 +1,4 @@
-/* automatically generated by rust-bindgen 0.60.1 */
+/* automatically generated by rust-bindgen 0.69.4 */
 
 impl amd_comgr_status_s {
     #[doc = " The function has been executed successfully."]
@@ -9,8 +9,7 @@ impl amd_comgr_status_s {
     pub const AMD_COMGR_STATUS_ERROR: amd_comgr_status_s = amd_comgr_status_s(1);
 }
 impl amd_comgr_status_s {
-    #[doc = " One of the actual arguments does not meet a precondition stated"]
-    #[doc = " in the documentation of the corresponding formal argument."]
+    #[doc = " One of the actual arguments does not meet a precondition stated\n in the documentation of the corresponding formal argument. This\n includes both invalid Action types, and invalid arguments to\n valid Action types."]
     pub const AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT: amd_comgr_status_s = amd_comgr_status_s(2);
 }
 impl amd_comgr_status_s {
@@ -20,7 +19,7 @@ impl amd_comgr_status_s {
 #[repr(transparent)]
 #[doc = " @brief Status codes."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq, Debug)]
-pub struct amd_comgr_status_s(pub ::std::os::raw::c_uint);
+pub struct amd_comgr_status_s(pub ::std::os::raw::c_int);
 #[doc = " @brief Status codes."]
 pub use self::amd_comgr_status_s as amd_comgr_status_t;
 impl amd_comgr_language_s {
@@ -43,14 +42,18 @@ impl amd_comgr_language_s {
     #[doc = " HIP."]
     pub const AMD_COMGR_LANGUAGE_HIP: amd_comgr_language_s = amd_comgr_language_s(4);
 }
+impl amd_comgr_language_s {
+    #[doc = " LLVM IR, either textual (.ll) or bitcode (.bc) format."]
+    pub const AMD_COMGR_LANGUAGE_LLVM_IR: amd_comgr_language_s = amd_comgr_language_s(5);
+}
 impl amd_comgr_language_s {
     #[doc = " Marker for last valid language."]
-    pub const AMD_COMGR_LANGUAGE_LAST: amd_comgr_language_s = amd_comgr_language_s(4);
+    pub const AMD_COMGR_LANGUAGE_LAST: amd_comgr_language_s = amd_comgr_language_s(5);
 }
 #[repr(transparent)]
 #[doc = " @brief The source languages supported by the compiler."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct amd_comgr_language_s(pub ::std::os::raw::c_uint);
+pub struct amd_comgr_language_s(pub ::std::os::raw::c_int);
 #[doc = " @brief The source languages supported by the compiler."]
 pub use self::amd_comgr_language_s as amd_comgr_language_t;
 impl amd_comgr_data_kind_s {
@@ -62,13 +65,11 @@ impl amd_comgr_data_kind_s {
     pub const AMD_COMGR_DATA_KIND_SOURCE: amd_comgr_data_kind_s = amd_comgr_data_kind_s(1);
 }
 impl amd_comgr_data_kind_s {
-    #[doc = " The data is a textual source that is included in the main source"]
-    #[doc = " or other include source."]
+    #[doc = " The data is a textual source that is included in the main source\n or other include source."]
     pub const AMD_COMGR_DATA_KIND_INCLUDE: amd_comgr_data_kind_s = amd_comgr_data_kind_s(2);
 }
 impl amd_comgr_data_kind_s {
-    #[doc = " The data is a precompiled-header source that is included in the main"]
-    #[doc = " source or other include source."]
+    #[doc = " The data is a precompiled-header source that is included in the main\n source or other include source."]
     pub const AMD_COMGR_DATA_KIND_PRECOMPILED_HEADER: amd_comgr_data_kind_s =
         amd_comgr_data_kind_s(3);
 }
@@ -89,9 +90,7 @@ impl amd_comgr_data_kind_s {
     pub const AMD_COMGR_DATA_KIND_RELOCATABLE: amd_comgr_data_kind_s = amd_comgr_data_kind_s(7);
 }
 impl amd_comgr_data_kind_s {
-    #[doc = " The data is an executable machine code object for a specific"]
-    #[doc = " isa. An executable is the kind of code object that can be loaded"]
-    #[doc = " and executed."]
+    #[doc = " The data is an executable machine code object for a specific\n isa. An executable is the kind of code object that can be loaded\n and executed."]
     pub const AMD_COMGR_DATA_KIND_EXECUTABLE: amd_comgr_data_kind_s = amd_comgr_data_kind_s(8);
 }
 impl amd_comgr_data_kind_s {
@@ -102,320 +101,149 @@ impl amd_comgr_data_kind_s {
     #[doc = " The data is a fat binary (clang-offload-bundler output)."]
     pub const AMD_COMGR_DATA_KIND_FATBIN: amd_comgr_data_kind_s = amd_comgr_data_kind_s(16);
 }
+impl amd_comgr_data_kind_s {
+    #[doc = " The data is an archive."]
+    pub const AMD_COMGR_DATA_KIND_AR: amd_comgr_data_kind_s = amd_comgr_data_kind_s(17);
+}
+impl amd_comgr_data_kind_s {
+    #[doc = " The data is a bundled bitcode."]
+    pub const AMD_COMGR_DATA_KIND_BC_BUNDLE: amd_comgr_data_kind_s = amd_comgr_data_kind_s(18);
+}
+impl amd_comgr_data_kind_s {
+    #[doc = " The data is a bundled archive."]
+    pub const AMD_COMGR_DATA_KIND_AR_BUNDLE: amd_comgr_data_kind_s = amd_comgr_data_kind_s(19);
+}
 impl amd_comgr_data_kind_s {
     #[doc = " Marker for last valid data kind."]
-    pub const AMD_COMGR_DATA_KIND_LAST: amd_comgr_data_kind_s = amd_comgr_data_kind_s(16);
+    pub const AMD_COMGR_DATA_KIND_LAST: amd_comgr_data_kind_s = amd_comgr_data_kind_s(19);
 }
 #[repr(transparent)]
 #[doc = " @brief The kinds of data supported."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct amd_comgr_data_kind_s(pub ::std::os::raw::c_uint);
+pub struct amd_comgr_data_kind_s(pub ::std::os::raw::c_int);
 #[doc = " @brief The kinds of data supported."]
 pub use self::amd_comgr_data_kind_s as amd_comgr_data_kind_t;
-#[doc = " @brief A handle to a data object."]
-#[doc = ""]
-#[doc = " Data objects are used to hold the data which is either an input or"]
-#[doc = " output of a code object manager action."]
+#[doc = " @brief A handle to a data object.\n\n Data objects are used to hold the data which is either an input or\n output of a code object manager action."]
 #[repr(C)]
 #[derive(Copy, Clone)]
 pub struct amd_comgr_data_s {
     pub handle: u64,
 }
-#[doc = " @brief A handle to a data object."]
-#[doc = ""]
-#[doc = " Data objects are used to hold the data which is either an input or"]
-#[doc = " output of a code object manager action."]
+#[doc = " @brief A handle to a data object.\n\n Data objects are used to hold the data which is either an input or\n output of a code object manager action."]
 pub type amd_comgr_data_t = amd_comgr_data_s;
-#[doc = " @brief A handle to an action data object."]
-#[doc = ""]
-#[doc = " An action data object holds a set of data objects. These can be"]
-#[doc = " used as inputs to an action, or produced as the result of an"]
-#[doc = " action."]
+#[doc = " @brief A handle to an action data object.\n\n An action data object holds a set of data objects. These can be\n used as inputs to an action, or produced as the result of an\n action."]
 #[repr(C)]
 #[derive(Copy, Clone)]
 pub struct amd_comgr_data_set_s {
     pub handle: u64,
 }
-#[doc = " @brief A handle to an action data object."]
-#[doc = ""]
-#[doc = " An action data object holds a set of data objects. These can be"]
-#[doc = " used as inputs to an action, or produced as the result of an"]
-#[doc = " action."]
+#[doc = " @brief A handle to an action data object.\n\n An action data object holds a set of data objects. These can be\n used as inputs to an action, or produced as the result of an\n action."]
 pub type amd_comgr_data_set_t = amd_comgr_data_set_s;
-#[doc = " @brief A handle to an action information object."]
-#[doc = ""]
-#[doc = " An action information object holds all the necessary information,"]
-#[doc = " excluding the input data objects, required to perform an action."]
+#[doc = " @brief A handle to an action information object.\n\n An action information object holds all the necessary information,\n excluding the input data objects, required to perform an action."]
 #[repr(C)]
 #[derive(Copy, Clone)]
 pub struct amd_comgr_action_info_s {
     pub handle: u64,
 }
-#[doc = " @brief A handle to an action information object."]
-#[doc = ""]
-#[doc = " An action information object holds all the necessary information,"]
-#[doc = " excluding the input data objects, required to perform an action."]
+#[doc = " @brief A handle to an action information object.\n\n An action information object holds all the necessary information,\n excluding the input data objects, required to perform an action."]
 pub type amd_comgr_action_info_t = amd_comgr_action_info_s;
 impl amd_comgr_action_kind_s {
-    #[doc = " Preprocess each source data object in @p input in order. For each"]
-    #[doc = " successful preprocessor invocation, add a source data object to @p result."]
-    #[doc = " Resolve any include source names using the names of include data objects"]
-    #[doc = " in @p input. Resolve any include relative path names using the working"]
-    #[doc = " directory path in @p info. Preprocess the source for the language in @p"]
-    #[doc = " info."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR if any preprocessing fails."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT"]
-    #[doc = " if isa name or language is not set in @p info."]
+    #[doc = " Preprocess each source data object in @p input in order. For each\n successful preprocessor invocation, add a source data object to @p result.\n Resolve any include source names using the names of include data objects\n in @p input. Resolve any include relative path names using the working\n directory path in @p info. Preprocess the source for the language in @p\n info.\n\n Return @p AMD_COMGR_STATUS_ERROR if any preprocessing fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name or language is not set in @p info."]
     pub const AMD_COMGR_ACTION_SOURCE_TO_PREPROCESSOR: amd_comgr_action_kind_s =
         amd_comgr_action_kind_s(0);
 }
 impl amd_comgr_action_kind_s {
-    #[doc = " Copy all existing data objects in @p input to @p output, then add the"]
-    #[doc = " device-specific and language-specific precompiled headers required for"]
-    #[doc = " compilation."]
-    #[doc = ""]
-    #[doc = " Currently the only supported languages are @p AMD_COMGR_LANGUAGE_OPENCL_1_2"]
-    #[doc = " and @p AMD_COMGR_LANGUAGE_OPENCL_2_0."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT if isa name or language"]
-    #[doc = " is not set in @p info, or the language is not supported."]
+    #[doc = " Copy all existing data objects in @p input to @p output, then add the\n device-specific and language-specific precompiled headers required for\n compilation.\n\n Currently the only supported languages are @p AMD_COMGR_LANGUAGE_OPENCL_1_2\n and @p AMD_COMGR_LANGUAGE_OPENCL_2_0.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT if isa name or language\n is not set in @p info, or the language is not supported."]
     pub const AMD_COMGR_ACTION_ADD_PRECOMPILED_HEADERS: amd_comgr_action_kind_s =
         amd_comgr_action_kind_s(1);
 }
 impl amd_comgr_action_kind_s {
-    #[doc = " Compile each source data object in @p input in order. For each"]
-    #[doc = " successful compilation add a bc data object to @p result. Resolve"]
-    #[doc = " any include source names using the names of include data objects"]
-    #[doc = " in @p input. Resolve any include relative path names using the"]
-    #[doc = " working directory path in @p info. Produce bc for isa name in @p"]
-    #[doc = " info. Compile the source for the language in @p info."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR if any compilation"]
-    #[doc = " fails."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT"]
-    #[doc = " if isa name or language is not set in @p info."]
+    #[doc = " Compile each source data object in @p input in order. For each\n successful compilation add a bc data object to @p result. Resolve\n any include source names using the names of include data objects\n in @p input. Resolve any include relative path names using the\n working directory path in @p info. Produce bc for isa name in @p\n info. Compile the source for the language in @p info.\n\n Return @p AMD_COMGR_STATUS_ERROR if any compilation\n fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name or language is not set in @p info."]
     pub const AMD_COMGR_ACTION_COMPILE_SOURCE_TO_BC: amd_comgr_action_kind_s =
         amd_comgr_action_kind_s(2);
 }
 impl amd_comgr_action_kind_s {
-    #[doc = " Copy all existing data objects in @p input to @p output, then add the"]
-    #[doc = " device-specific and language-specific bitcode libraries required for"]
-    #[doc = " compilation."]
-    #[doc = ""]
-    #[doc = " Currently the only supported languages are @p AMD_COMGR_LANGUAGE_OPENCL_1_2,"]
-    #[doc = " @p AMD_COMGR_LANGUAGE_OPENCL_2_0, and @p AMD_COMGR_LANGUAGE_HIP."]
-    #[doc = ""]
-    #[doc = " The options in @p info should be set to a set of language-specific flags."]
-    #[doc = " For OpenCL and HIP these include:"]
-    #[doc = ""]
-    #[doc = "    correctly_rounded_sqrt"]
-    #[doc = "    daz_opt"]
-    #[doc = "    finite_only"]
-    #[doc = "    unsafe_math"]
-    #[doc = "    wavefrontsize64"]
-    #[doc = ""]
-    #[doc = " For example, to enable daz_opt and unsafe_math, the options should be set"]
-    #[doc = " as:"]
-    #[doc = ""]
-    #[doc = "    const char *options[] = {\"daz_opt, \"unsafe_math\"};"]
-    #[doc = "    size_t optionsCount = sizeof(options) / sizeof(options[0]);"]
-    #[doc = "    amd_comgr_action_info_set_option_list(info, options, optionsCount);"]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT if isa name or language"]
-    #[doc = " is not set in @p info, the language is not supported, an unknown"]
-    #[doc = " language-specific flag is supplied, or a language-specific flag is"]
-    #[doc = " repeated."]
-    #[doc = ""]
-    #[doc = " @deprecated since 1.7"]
-    #[doc = " @warning This action, followed by @c AMD_COMGR_ACTION_LINK_BC_TO_BC, may"]
-    #[doc = " result in subtle bugs due to incorrect linking of the device libraries."]
-    #[doc = " The @c AMD_COMGR_ACTION_COMPILE_SOURCE_WITH_DEVICE_LIBS_TO_BC action can"]
-    #[doc = " be used as a workaround which ensures the link occurs correctly."]
+    #[doc = " Copy all existing data objects in @p input to @p output, then add the\n device-specific and language-specific bitcode libraries required for\n compilation.\n\n Currently the only supported languages are @p AMD_COMGR_LANGUAGE_OPENCL_1_2,\n @p AMD_COMGR_LANGUAGE_OPENCL_2_0, and @p AMD_COMGR_LANGUAGE_HIP.\n\n The options in @p info should be set to a set of language-specific flags.\n For OpenCL and HIP these include:\n\n    correctly_rounded_sqrt\n    daz_opt\n    finite_only\n    unsafe_math\n    wavefrontsize64\n\n For example, to enable daz_opt and unsafe_math, the options should be set\n as:\n\n    const char *options[] = {\"daz_opt, \"unsafe_math\"};\n    size_t optionsCount = sizeof(options) / sizeof(options[0]);\n    amd_comgr_action_info_set_option_list(info, options, optionsCount);\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT if isa name or language\n is not set in @p info, the language is not supported, an unknown\n language-specific flag is supplied, or a language-specific flag is\n repeated.\n\n @deprecated since 1.7\n @warning This action, followed by @c AMD_COMGR_ACTION_LINK_BC_TO_BC, may\n result in subtle bugs due to incorrect linking of the device libraries.\n The @c AMD_COMGR_ACTION_COMPILE_SOURCE_WITH_DEVICE_LIBS_TO_BC action can\n be used as a workaround which ensures the link occurs correctly."]
     pub const AMD_COMGR_ACTION_ADD_DEVICE_LIBRARIES: amd_comgr_action_kind_s =
         amd_comgr_action_kind_s(3);
 }
 impl amd_comgr_action_kind_s {
-    #[doc = " Link each bc data object in @p input together and add the linked"]
-    #[doc = " bc data object to @p result. Any device library bc data object"]
-    #[doc = " must be explicitly added to @p input if needed."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR if the link fails."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT"]
-    #[doc = " if isa name is not set in @p info and does not match the isa name"]
-    #[doc = " of all bc data objects in @p input."]
+    #[doc = " Link a collection of bitcodes, bundled bitcodes, and bundled bitcode\n archives in @p into a single composite (unbundled) bitcode @p.\n Any device library bc data object must be explicitly added to @p input if\n needed.\n\n Return @p AMD_COMGR_STATUS_ERROR if the link or unbundling fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if IsaName is not set in @p info and does not match the isa name\n of all bc data objects in @p input, or if the Name field is not set for\n any DataObject in the input set."]
     pub const AMD_COMGR_ACTION_LINK_BC_TO_BC: amd_comgr_action_kind_s = amd_comgr_action_kind_s(4);
 }
 impl amd_comgr_action_kind_s {
-    #[doc = " Optimize each bc data object in @p input and create an optimized bc data"]
-    #[doc = " object to @p result."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR if the optimization fails."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT"]
-    #[doc = " if isa name is not set in @p info and does not match the isa name"]
-    #[doc = " of all bc data objects in @p input."]
+    #[doc = " Optimize each bc data object in @p input and create an optimized bc data\n object to @p result.\n\n Return @p AMD_COMGR_STATUS_ERROR if the optimization fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name is not set in @p info and does not match the isa name\n of all bc data objects in @p input."]
     pub const AMD_COMGR_ACTION_OPTIMIZE_BC_TO_BC: amd_comgr_action_kind_s =
         amd_comgr_action_kind_s(5);
 }
 impl amd_comgr_action_kind_s {
-    #[doc = " Perform code generation for each bc data object in @p input in"]
-    #[doc = " order. For each successful code generation add a relocatable data"]
-    #[doc = " object to @p result."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR if any code"]
-    #[doc = " generation fails."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT"]
-    #[doc = " if isa name is not set in @p info and does not match the isa name"]
-    #[doc = " of all bc data objects in @p input."]
+    #[doc = " Perform code generation for each bc data object in @p input in\n order. For each successful code generation add a relocatable data\n object to @p result.\n\n Return @p AMD_COMGR_STATUS_ERROR if any code\n generation fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name is not set in @p info and does not match the isa name\n of all bc data objects in @p input."]
     pub const AMD_COMGR_ACTION_CODEGEN_BC_TO_RELOCATABLE: amd_comgr_action_kind_s =
         amd_comgr_action_kind_s(6);
 }
 impl amd_comgr_action_kind_s {
-    #[doc = " Perform code generation for each bc data object in @p input in"]
-    #[doc = " order. For each successful code generation add an assembly source data"]
-    #[doc = " object to @p result."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR if any code"]
-    #[doc = " generation fails."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT"]
-    #[doc = " if isa name is not set in @p info and does not match the isa name"]
-    #[doc = " of all bc data objects in @p input."]
+    #[doc = " Perform code generation for each bc data object in @p input in\n order. For each successful code generation add an assembly source data\n object to @p result.\n\n Return @p AMD_COMGR_STATUS_ERROR if any code\n generation fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name is not set in @p info and does not match the isa name\n of all bc data objects in @p input."]
     pub const AMD_COMGR_ACTION_CODEGEN_BC_TO_ASSEMBLY: amd_comgr_action_kind_s =
         amd_comgr_action_kind_s(7);
 }
 impl amd_comgr_action_kind_s {
-    #[doc = " Link each relocatable data object in @p input together and add"]
-    #[doc = " the linked relocatable data object to @p result. Any device"]
-    #[doc = " library relocatable data object must be explicitly added to @p"]
-    #[doc = " input if needed."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR if the link fails."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT"]
-    #[doc = " if isa name is not set in @p info and does not match the isa name"]
-    #[doc = " of all relocatable data objects in @p input."]
+    #[doc = " Link each relocatable data object in @p input together and add\n the linked relocatable data object to @p result. Any device\n library relocatable data object must be explicitly added to @p\n input if needed.\n\n Return @p AMD_COMGR_STATUS_ERROR if the link fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name is not set in @p info and does not match the isa name\n of all relocatable data objects in @p input."]
     pub const AMD_COMGR_ACTION_LINK_RELOCATABLE_TO_RELOCATABLE: amd_comgr_action_kind_s =
         amd_comgr_action_kind_s(8);
 }
 impl amd_comgr_action_kind_s {
-    #[doc = " Link each relocatable data object in @p input together and add"]
-    #[doc = " the linked executable data object to @p result. Any device"]
-    #[doc = " library relocatable data object must be explicitly added to @p"]
-    #[doc = " input if needed."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR if the link fails."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT"]
-    #[doc = " if isa name is not set in @p info and does not match the isa name"]
-    #[doc = " of all relocatable data objects in @p input."]
+    #[doc = " Link each relocatable data object in @p input together and add\n the linked executable data object to @p result. Any device\n library relocatable data object must be explicitly added to @p\n input if needed.\n\n Return @p AMD_COMGR_STATUS_ERROR if the link fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name is not set in @p info and does not match the isa name\n of all relocatable data objects in @p input."]
     pub const AMD_COMGR_ACTION_LINK_RELOCATABLE_TO_EXECUTABLE: amd_comgr_action_kind_s =
         amd_comgr_action_kind_s(9);
 }
 impl amd_comgr_action_kind_s {
-    #[doc = " Assemble each source data object in @p input in order into machine code."]
-    #[doc = " For each successful assembly add a relocatable data object to @p result."]
-    #[doc = " Resolve any include source names using the names of include data objects in"]
-    #[doc = " @p input. Resolve any include relative path names using the working"]
-    #[doc = " directory path in @p info. Produce relocatable for isa name in @p info."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR if any assembly fails."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT if isa name is not set in"]
-    #[doc = " @p info."]
+    #[doc = " Assemble each source data object in @p input in order into machine code.\n For each successful assembly add a relocatable data object to @p result.\n Resolve any include source names using the names of include data objects in\n @p input. Resolve any include relative path names using the working\n directory path in @p info. Produce relocatable for isa name in @p info.\n\n Return @p AMD_COMGR_STATUS_ERROR if any assembly fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT if isa name is not set in\n @p info."]
     pub const AMD_COMGR_ACTION_ASSEMBLE_SOURCE_TO_RELOCATABLE: amd_comgr_action_kind_s =
         amd_comgr_action_kind_s(10);
 }
 impl amd_comgr_action_kind_s {
-    #[doc = " Disassemble each relocatable data object in @p input in"]
-    #[doc = " order. For each successful disassembly add a source data object to"]
-    #[doc = " @p result."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR if any disassembly"]
-    #[doc = " fails."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT"]
-    #[doc = " if isa name is not set in @p info and does not match the isa name"]
-    #[doc = " of all relocatable data objects in @p input."]
+    #[doc = " Disassemble each relocatable data object in @p input in\n order. For each successful disassembly add a source data object to\n @p result.\n\n Return @p AMD_COMGR_STATUS_ERROR if any disassembly\n fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name is not set in @p info and does not match the isa name\n of all relocatable data objects in @p input."]
     pub const AMD_COMGR_ACTION_DISASSEMBLE_RELOCATABLE_TO_SOURCE: amd_comgr_action_kind_s =
         amd_comgr_action_kind_s(11);
 }
 impl amd_comgr_action_kind_s {
-    #[doc = " Disassemble each executable data object in @p input in order. For"]
-    #[doc = " each successful disassembly add a source data object to @p result."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR if any disassembly"]
-    #[doc = " fails."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT"]
-    #[doc = " if isa name is not set in @p info and does not match the isa name"]
-    #[doc = " of all relocatable data objects in @p input."]
+    #[doc = " Disassemble each executable data object in @p input in order. For\n each successful disassembly add a source data object to @p result.\n\n Return @p AMD_COMGR_STATUS_ERROR if any disassembly\n fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name is not set in @p info and does not match the isa name\n of all relocatable data objects in @p input."]
     pub const AMD_COMGR_ACTION_DISASSEMBLE_EXECUTABLE_TO_SOURCE: amd_comgr_action_kind_s =
         amd_comgr_action_kind_s(12);
 }
 impl amd_comgr_action_kind_s {
-    #[doc = " Disassemble each bytes data object in @p input in order. For each"]
-    #[doc = " successful disassembly add a source data object to @p"]
-    #[doc = " result. Only simple assembly language commands are generate that"]
-    #[doc = " corresponf to raw bytes are supported, not any directives that"]
-    #[doc = " control the code object layout, or symbolic branch targets or"]
-    #[doc = " names."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR if any disassembly"]
-    #[doc = " fails."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT"]
-    #[doc = " if isa name is not set in @p info"]
+    #[doc = " Disassemble each bytes data object in @p input in order. For each\n successful disassembly add a source data object to @p\n result. Only simple assembly language commands are generate that\n corresponf to raw bytes are supported, not any directives that\n control the code object layout, or symbolic branch targets or\n names.\n\n Return @p AMD_COMGR_STATUS_ERROR if any disassembly\n fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name is not set in @p info"]
     pub const AMD_COMGR_ACTION_DISASSEMBLE_BYTES_TO_SOURCE: amd_comgr_action_kind_s =
         amd_comgr_action_kind_s(13);
 }
 impl amd_comgr_action_kind_s {
-    #[doc = " Compile each source data object in @p input in order. For each"]
-    #[doc = " successful compilation add a fat binary to @p result. Resolve"]
-    #[doc = " any include source names using the names of include data objects"]
-    #[doc = " in @p input. Resolve any include relative path names using the"]
-    #[doc = " working directory path in @p info. Produce fat binary for isa name in @p"]
-    #[doc = " info. Compile the source for the language in @p info."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR if any compilation"]
-    #[doc = " fails."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT"]
-    #[doc = " if isa name or language is not set in @p info."]
+    #[doc = " Compile each source data object in @p input in order. For each\n successful compilation add a fat binary to @p result. Resolve\n any include source names using the names of include data objects\n in @p input. Resolve any include relative path names using the\n working directory path in @p info. Produce fat binary for isa name in @p\n info. Compile the source for the language in @p info.\n\n Return @p AMD_COMGR_STATUS_ERROR if any compilation\n fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name or language is not set in @p info.\n\n @deprecated since 2.5\n @see in-process compilation via AMD_COMGR_ACTION_COMPILE_SOURCE_TO_BC, etc.\n insteaad"]
     pub const AMD_COMGR_ACTION_COMPILE_SOURCE_TO_FATBIN: amd_comgr_action_kind_s =
         amd_comgr_action_kind_s(14);
 }
 impl amd_comgr_action_kind_s {
-    #[doc = " Compile each source data object in @p input in order. For each"]
-    #[doc = " successful compilation add a bc data object to @p result. Resolve"]
-    #[doc = " any include source names using the names of include data objects"]
-    #[doc = " in @p input. Resolve any include relative path names using the"]
-    #[doc = " working directory path in @p info. Produce bc for isa name in @p"]
-    #[doc = " info. Compile the source for the language in @p info. Link against"]
-    #[doc = " the device-specific and language-specific bitcode device libraries"]
-    #[doc = " required for compilation."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR if any compilation"]
-    #[doc = " fails."]
-    #[doc = ""]
-    #[doc = " Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT"]
-    #[doc = " if isa name or language is not set in @p info."]
+    #[doc = " Compile each source data object in @p input in order. For each\n successful compilation add a bc data object to @p result. Resolve\n any include source names using the names of include data objects\n in @p input. Resolve any include relative path names using the\n working directory path in @p info. Produce bc for isa name in @p\n info. Compile the source for the language in @p info. Link against\n the device-specific and language-specific bitcode device libraries\n required for compilation.\n\n Return @p AMD_COMGR_STATUS_ERROR if any compilation\n fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name or language is not set in @p info."]
     pub const AMD_COMGR_ACTION_COMPILE_SOURCE_WITH_DEVICE_LIBS_TO_BC: amd_comgr_action_kind_s =
         amd_comgr_action_kind_s(15);
 }
+impl amd_comgr_action_kind_s {
+    #[doc = " Compile a single source data object in @p input in order. For each\n successful compilation add a relocatable data object to @p result.\n Resolve any include source names using the names of include data objects\n in @p input. Resolve any include relative path names using the\n working directory path in @p info. Produce relocatable for hip name in @p\n info. Compile the source for the language in @p info. Link against\n the device-specific and language-specific bitcode device libraries\n required for compilation. Currently only supports HIP language.\n\n Return @p AMD_COMGR_STATUS_ERROR if any compilation\n fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name or language is not set in @p info."]
+    pub const AMD_COMGR_ACTION_COMPILE_SOURCE_TO_RELOCATABLE: amd_comgr_action_kind_s =
+        amd_comgr_action_kind_s(16);
+}
+impl amd_comgr_action_kind_s {
+    #[doc = " Compile each source data object in @p input and create a single executabele\n in @p result. Resolve any include source names using the names of include\n data objects in @p input. Resolve any include relative path names using the\n working directory path in @p info. Produce executable for isa name in @p\n info. Compile the source for the language in @p info. Link against\n the device-specific and language-specific bitcode device libraries\n required for compilation.\n\n Return @p AMD_COMGR_STATUS_ERROR if any compilation\n fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name or language is not set in @p info."]
+    pub const AMD_COMGR_ACTION_COMPILE_SOURCE_TO_EXECUTABLE: amd_comgr_action_kind_s =
+        amd_comgr_action_kind_s(17);
+}
 impl amd_comgr_action_kind_s {
     #[doc = " Marker for last valid action kind."]
-    pub const AMD_COMGR_ACTION_LAST: amd_comgr_action_kind_s = amd_comgr_action_kind_s(15);
+    pub const AMD_COMGR_ACTION_LAST: amd_comgr_action_kind_s = amd_comgr_action_kind_s(17);
 }
 #[repr(transparent)]
 #[doc = " @brief The kinds of actions that can be performed."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct amd_comgr_action_kind_s(pub ::std::os::raw::c_uint);
+pub struct amd_comgr_action_kind_s(pub ::std::os::raw::c_int);
 #[doc = " @brief The kinds of actions that can be performed."]
 pub use self::amd_comgr_action_kind_s as amd_comgr_action_kind_t;
 extern crate libloading;
@@ -440,11 +268,6 @@ pub struct LibComgr {
         size: *mut usize,
         bytes: *mut ::std::os::raw::c_char,
     ) -> amd_comgr_status_t,
-    pub amd_comgr_get_data_name: unsafe extern "C" fn(
-        data: amd_comgr_data_t,
-        size: *mut usize,
-        name: *mut ::std::os::raw::c_char,
-    ) -> amd_comgr_status_t,
     pub amd_comgr_create_data_set:
         unsafe extern "C" fn(data_set: *mut amd_comgr_data_set_t) -> amd_comgr_status_t,
     pub amd_comgr_destroy_data_set:
@@ -503,9 +326,6 @@ impl LibComgr {
             .get(b"amd_comgr_set_data_name\0")
             .map(|sym| *sym)?;
         let amd_comgr_get_data = __library.get(b"amd_comgr_get_data\0").map(|sym| *sym)?;
-        let amd_comgr_get_data_name = __library
-            .get(b"amd_comgr_get_data_name\0")
-            .map(|sym| *sym)?;
         let amd_comgr_create_data_set = __library
             .get(b"amd_comgr_create_data_set\0")
             .map(|sym| *sym)?;
@@ -539,7 +359,6 @@ impl LibComgr {
             amd_comgr_set_data,
             amd_comgr_set_data_name,
             amd_comgr_get_data,
-            amd_comgr_get_data_name,
             amd_comgr_create_data_set,
             amd_comgr_destroy_data_set,
             amd_comgr_data_set_add,
@@ -553,27 +372,7 @@ impl LibComgr {
         })
     }
     #[must_use]
-    #[doc = " @brief Create a data object that can hold data of a specified kind."]
-    #[doc = ""]
-    #[doc = " Data objects are reference counted and are destroyed when the"]
-    #[doc = " reference count reaches 0. When a data object is created its"]
-    #[doc = " reference count is 1, it has 0 bytes of data, it has an empty name,"]
-    #[doc = " and it has no metadata."]
-    #[doc = ""]
-    #[doc = " @param[in] kind The kind of data the object is intended to hold."]
-    #[doc = ""]
-    #[doc = " @param[out] data A handle to the data object created. Its reference"]
-    #[doc = " count is set to 1."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_SUCCESS The function has"]
-    #[doc = " been executed successfully."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p"]
-    #[doc = " kind is an invalid data kind, or @p"]
-    #[doc = " AMD_COMGR_DATA_KIND_UNDEF. @p data is NULL."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES"]
-    #[doc = " Unable to create the data object as out of resources."]
+    #[doc = " @brief Create a data object that can hold data of a specified kind.\n\n Data objects are reference counted and are destroyed when the\n reference count reaches 0. When a data object is created its\n reference count is 1, it has 0 bytes of data, it has an empty name,\n and it has no metadata.\n\n @param[in] kind The kind of data the object is intended to hold.\n\n @param[out] data A handle to the data object created. Its reference\n count is set to 1.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n kind is an invalid data kind, or @p\n AMD_COMGR_DATA_KIND_UNDEF. @p data is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to create the data object as out of resources."]
     pub unsafe fn amd_comgr_create_data(
         &self,
         kind: amd_comgr_data_kind_t,
@@ -582,49 +381,12 @@ impl LibComgr {
         (self.amd_comgr_create_data)(kind, data)
     }
     #[must_use]
-    #[doc = " @brief Indicate that no longer using a data object handle."]
-    #[doc = ""]
-    #[doc = " The reference count of the associated data object is"]
-    #[doc = " decremented. If it reaches 0 it is destroyed."]
-    #[doc = ""]
-    #[doc = " @param[in] data The data object to release."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_SUCCESS The function has"]
-    #[doc = " been executed successfully."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p"]
-    #[doc = " data is an invalid data object, or has kind @p"]
-    #[doc = " AMD_COMGR_DATA_KIND_UNDEF."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES"]
-    #[doc = " Unable to update the data object as out of resources."]
+    #[doc = " @brief Indicate that no longer using a data object handle.\n\n The reference count of the associated data object is\n decremented. If it reaches 0 it is destroyed.\n\n @param[in] data The data object to release.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n data is an invalid data object, or has kind @p\n AMD_COMGR_DATA_KIND_UNDEF.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update the data object as out of resources."]
     pub unsafe fn amd_comgr_release_data(&self, data: amd_comgr_data_t) -> amd_comgr_status_t {
         (self.amd_comgr_release_data)(data)
     }
     #[must_use]
-    #[doc = " @brief Set the data content of a data object to the specified"]
-    #[doc = " bytes."]
-    #[doc = ""]
-    #[doc = " Any previous value of the data object is overwritten. Any metadata"]
-    #[doc = " associated with the data object is also replaced which invalidates"]
-    #[doc = " all metadata handles to the old metadata."]
-    #[doc = ""]
-    #[doc = " @param[in] data The data object to update."]
-    #[doc = ""]
-    #[doc = " @param[in] size The number of bytes in the data specified by @p bytes."]
-    #[doc = ""]
-    #[doc = " @param[in] bytes The bytes to set the data object to. The bytes are"]
-    #[doc = " copied into the data object and can be freed after the call."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_SUCCESS The function has"]
-    #[doc = " been executed successfully."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p"]
-    #[doc = " data is an invalid data object, or has kind @p"]
-    #[doc = " AMD_COMGR_DATA_KIND_UNDEF."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES"]
-    #[doc = " Unable to update the data object as out of resources."]
+    #[doc = " @brief Set the data content of a data object to the specified\n bytes.\n\n Any previous value of the data object is overwritten. Any metadata\n associated with the data object is also replaced which invalidates\n all metadata handles to the old metadata.\n\n @param[in] data The data object to update.\n\n @param[in] size The number of bytes in the data specified by @p bytes.\n\n @param[in] bytes The bytes to set the data object to. The bytes are\n copied into the data object and can be freed after the call.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n data is an invalid data object, or has kind @p\n AMD_COMGR_DATA_KIND_UNDEF.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update the data object as out of resources."]
     pub unsafe fn amd_comgr_set_data(
         &self,
         data: amd_comgr_data_t,
@@ -634,28 +396,7 @@ impl LibComgr {
         (self.amd_comgr_set_data)(data, size, bytes)
     }
     #[must_use]
-    #[doc = " @brief Set the name associated with a data object."]
-    #[doc = ""]
-    #[doc = " When compiling, the fle name of an include directive is used to"]
-    #[doc = " reference the contents of the include data object with the same"]
-    #[doc = " name. The name may also be used for other data objects in log and"]
-    #[doc = " diagnostic output."]
-    #[doc = ""]
-    #[doc = " @param[in] data The data object to update."]
-    #[doc = ""]
-    #[doc = " @param[in] name A null terminated string that specifies the name to"]
-    #[doc = " use for the data object. If NULL then the name is set to the empty"]
-    #[doc = " string."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_SUCCESS The function has"]
-    #[doc = " been executed successfully."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p"]
-    #[doc = " data is an invalid data object, or has kind @p"]
-    #[doc = " AMD_COMGR_DATA_KIND_UNDEF."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES"]
-    #[doc = " Unable to update the data object as out of resources."]
+    #[doc = " @brief Set the name associated with a data object.\n\n When compiling, the full name of an include directive is used to\n reference the contents of the include data object with the same\n name. The name may also be used for other data objects in log and\n diagnostic output.\n\n @param[in] data The data object to update.\n\n @param[in] name A null terminated string that specifies the name to\n use for the data object. If NULL then the name is set to the empty\n string.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n data is an invalid data object, or has kind @p\n AMD_COMGR_DATA_KIND_UNDEF.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update the data object as out of resources."]
     pub unsafe fn amd_comgr_set_data_name(
         &self,
         data: amd_comgr_data_t,
@@ -664,28 +405,7 @@ impl LibComgr {
         (self.amd_comgr_set_data_name)(data, name)
     }
     #[must_use]
-    #[doc = " @brief Get the data contents, and/or the size of the data"]
-    #[doc = " associated with a data object."]
-    #[doc = ""]
-    #[doc = " @param[in] data The data object to query."]
-    #[doc = ""]
-    #[doc = " @param[in, out] size On entry, the size of @p bytes. On return, if @p bytes"]
-    #[doc = " is NULL, set to the size of the data object contents."]
-    #[doc = ""]
-    #[doc = " @param[out] bytes If not NULL, then the first @p size bytes of the"]
-    #[doc = " data object contents is copied. If NULL, no data is copied, and"]
-    #[doc = " only @p size is updated (useful in order to find the size of buffer"]
-    #[doc = " required to copy the data)."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_SUCCESS The function has"]
-    #[doc = " been executed successfully."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p"]
-    #[doc = " data is an invalid data object, or has kind @p"]
-    #[doc = " AMD_COMGR_DATA_KIND_UNDEF. @p size is NULL."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES"]
-    #[doc = " Unable to update the data object as out of resources."]
+    #[doc = " @brief Get the data contents, and/or the size of the data\n associated with a data object.\n\n @param[in] data The data object to query.\n\n @param[in, out] size On entry, the size of @p bytes. On return, if @p bytes\n is NULL, set to the size of the data object contents.\n\n @param[out] bytes If not NULL, then the first @p size bytes of the\n data object contents is copied. If NULL, no data is copied, and\n only @p size is updated (useful in order to find the size of buffer\n required to copy the data).\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n data is an invalid data object, or has kind @p\n AMD_COMGR_DATA_KIND_UNDEF. @p size is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update the data object as out of resources."]
     pub unsafe fn amd_comgr_get_data(
         &self,
         data: amd_comgr_data_t,
@@ -695,47 +415,7 @@ impl LibComgr {
         (self.amd_comgr_get_data)(data, size, bytes)
     }
     #[must_use]
-    #[doc = " @brief Get the data object name and/or name length."]
-    #[doc = ""]
-    #[doc = " @param[in] data The data object to query."]
-    #[doc = ""]
-    #[doc = " @param[in, out] size On entry, the size of @p name. On return, the size of"]
-    #[doc = " the data object name including the terminating null character."]
-    #[doc = ""]
-    #[doc = " @param[out] name If not NULL, then the first @p size characters of the"]
-    #[doc = " data object name are copied. If @p name is NULL, only @p size is updated"]
-    #[doc = " (useful in order to find the size of buffer required to copy the name)."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_SUCCESS The function has"]
-    #[doc = " been executed successfully."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p"]
-    #[doc = " data is an invalid data object, or has kind @p"]
-    #[doc = " AMD_COMGR_DATA_KIND_UNDEF. @p size is NULL."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES"]
-    #[doc = " Unable to update the data object as out of resources."]
-    pub unsafe fn amd_comgr_get_data_name(
-        &self,
-        data: amd_comgr_data_t,
-        size: *mut usize,
-        name: *mut ::std::os::raw::c_char,
-    ) -> amd_comgr_status_t {
-        (self.amd_comgr_get_data_name)(data, size, name)
-    }
-    #[must_use]
-    #[doc = " @brief Create a data set object."]
-    #[doc = ""]
-    #[doc = " @param[out] data_set A handle to the data set created. Initially it"]
-    #[doc = " contains no data objects."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_SUCCESS The function has been executed"]
-    #[doc = " successfully."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p data_set is NULL."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES Unable to create the data"]
-    #[doc = " set object as out of resources."]
+    #[doc = " @brief Create a data set object.\n\n @param[out] data_set A handle to the data set created. Initially it\n contains no data objects.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has been executed\n successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p data_set is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES Unable to create the data\n set object as out of resources."]
     pub unsafe fn amd_comgr_create_data_set(
         &self,
         data_set: *mut amd_comgr_data_set_t,
@@ -743,21 +423,7 @@ impl LibComgr {
         (self.amd_comgr_create_data_set)(data_set)
     }
     #[must_use]
-    #[doc = " @brief Destroy a data set object."]
-    #[doc = ""]
-    #[doc = " The reference counts of any associated data objects are decremented. Any"]
-    #[doc = " handles to the data set object become invalid."]
-    #[doc = ""]
-    #[doc = " @param[in] data_set A handle to the data set object to destroy."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_SUCCESS The function has been executed"]
-    #[doc = " successfully."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p data_set is an invalid"]
-    #[doc = " data set object."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES Unable to update data set"]
-    #[doc = " object as out of resources."]
+    #[doc = " @brief Destroy a data set object.\n\n The reference counts of any associated data objects are decremented. Any\n handles to the data set object become invalid.\n\n @param[in] data_set A handle to the data set object to destroy.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has been executed\n successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p data_set is an invalid\n data set object.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES Unable to update data set\n object as out of resources."]
     pub unsafe fn amd_comgr_destroy_data_set(
         &self,
         data_set: amd_comgr_data_set_t,
@@ -765,25 +431,7 @@ impl LibComgr {
         (self.amd_comgr_destroy_data_set)(data_set)
     }
     #[must_use]
-    #[doc = " @brief Add a data object to a data set object if it is not already added."]
-    #[doc = ""]
-    #[doc = " The reference count of the data object is incremented."]
-    #[doc = ""]
-    #[doc = " @param[in] data_set A handle to the data set object to be updated."]
-    #[doc = ""]
-    #[doc = " @param[in] data A handle to the data object to be added. If @p data_set"]
-    #[doc = " already has the specified handle present, then it is not added. The order"]
-    #[doc = " that data objects are added is preserved."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_SUCCESS The function has been executed"]
-    #[doc = " successfully."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p data_set is an invalid"]
-    #[doc = " data set object. @p data is an invalid data object; has undef kind; has"]
-    #[doc = " include kind but does not have a name."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES Unable to update data set"]
-    #[doc = " object as out of resources."]
+    #[doc = " @brief Add a data object to a data set object if it is not already added.\n\n The reference count of the data object is incremented.\n\n @param[in] data_set A handle to the data set object to be updated.\n\n @param[in] data A handle to the data object to be added. If @p data_set\n already has the specified handle present, then it is not added. The order\n that data objects are added is preserved.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has been executed\n successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p data_set is an invalid\n data set object. @p data is an invalid data object; has undef kind; has\n include kind but does not have a name.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES Unable to update data set\n object as out of resources."]
     pub unsafe fn amd_comgr_data_set_add(
         &self,
         data_set: amd_comgr_data_set_t,
@@ -792,31 +440,7 @@ impl LibComgr {
         (self.amd_comgr_data_set_add)(data_set, data)
     }
     #[must_use]
-    #[doc = " @brief Return the Nth data object of a specified data kind that is added to a"]
-    #[doc = " data set object."]
-    #[doc = ""]
-    #[doc = " The reference count of the returned data object is incremented."]
-    #[doc = ""]
-    #[doc = " @param[in] data_set A handle to the data set object to be queried."]
-    #[doc = ""]
-    #[doc = " @param[in] data_kind The data kind of the data object to be returned."]
-    #[doc = ""]
-    #[doc = " @param[in] index The index of the data object of data kind @data_kind to be"]
-    #[doc = " returned. The first data object is index 0. The order of data objects matches"]
-    #[doc = " the order that they were added to the data set object."]
-    #[doc = ""]
-    #[doc = " @param[out] data The data object being requested."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_SUCCESS The function has been executed"]
-    #[doc = " successfully."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p data_set is an invalid"]
-    #[doc = " data set object. @p data_kind is an invalid data kind or @p"]
-    #[doc = " AMD_COMGR_DATA_KIND_UNDEF. @p index is greater than the number of data"]
-    #[doc = " objects of kind @p data_kind. @p data is NULL."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES Unable to query data set"]
-    #[doc = " object as out of resources."]
+    #[doc = " @brief Return the Nth data object of a specified data kind that is added to a\n data set object.\n\n The reference count of the returned data object is incremented.\n\n @param[in] data_set A handle to the data set object to be queried.\n\n @param[in] data_kind The data kind of the data object to be returned.\n\n @param[in] index The index of the data object of data kind @data_kind to be\n returned. The first data object is index 0. The order of data objects matches\n the order that they were added to the data set object.\n\n @param[out] data The data object being requested.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has been executed\n successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p data_set is an invalid\n data set object. @p data_kind is an invalid data kind or @p\n AMD_COMGR_DATA_KIND_UNDEF. @p index is greater than the number of data\n objects of kind @p data_kind. @p data is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES Unable to query data set\n object as out of resources."]
     pub unsafe fn amd_comgr_action_data_get_data(
         &self,
         data_set: amd_comgr_data_set_t,
@@ -827,18 +451,7 @@ impl LibComgr {
         (self.amd_comgr_action_data_get_data)(data_set, data_kind, index, data)
     }
     #[must_use]
-    #[doc = " @brief Create an action info object."]
-    #[doc = ""]
-    #[doc = " @param[out] action_info A handle to the action info object created."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_SUCCESS The function has"]
-    #[doc = " been executed successfully."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p"]
-    #[doc = " action_info is NULL."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES"]
-    #[doc = " Unable to create the action info object as out of resources."]
+    #[doc = " @brief Create an action info object.\n\n @param[out] action_info A handle to the action info object created.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n action_info is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to create the action info object as out of resources."]
     pub unsafe fn amd_comgr_create_action_info(
         &self,
         action_info: *mut amd_comgr_action_info_t,
@@ -846,18 +459,7 @@ impl LibComgr {
         (self.amd_comgr_create_action_info)(action_info)
     }
     #[must_use]
-    #[doc = " @brief Destroy an action info object."]
-    #[doc = ""]
-    #[doc = " @param[in] action_info A handle to the action info object to destroy."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_SUCCESS The function has"]
-    #[doc = " been executed successfully."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p"]
-    #[doc = " action_info is an invalid action info object."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES"]
-    #[doc = " Unable to update action info object as out of resources."]
+    #[doc = " @brief Destroy an action info object.\n\n @param[in] action_info A handle to the action info object to destroy.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n action_info is an invalid action info object.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update action info object as out of resources."]
     pub unsafe fn amd_comgr_destroy_action_info(
         &self,
         action_info: amd_comgr_action_info_t,
@@ -865,30 +467,7 @@ impl LibComgr {
         (self.amd_comgr_destroy_action_info)(action_info)
     }
     #[must_use]
-    #[doc = " @brief Set the isa name of an action info object."]
-    #[doc = ""]
-    #[doc = " When an action info object is created it has no isa name. Some"]
-    #[doc = " actions require that the action info object has an isa name"]
-    #[doc = " defined."]
-    #[doc = ""]
-    #[doc = " @param[in] action_info A handle to the action info object to be"]
-    #[doc = " updated."]
-    #[doc = ""]
-    #[doc = " @param[in] isa_name A null terminated string that is the isa name. If NULL"]
-    #[doc = " or the empty string then the isa name is cleared. The isa name is defined as"]
-    #[doc = " the Code Object Target Identification string, described at"]
-    #[doc = " https://llvm.org/docs/AMDGPUUsage.html#code-object-target-identification"]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_SUCCESS The function has"]
-    #[doc = " been executed successfully."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p"]
-    #[doc = " action_info is an invalid action info object. @p isa_name is not an"]
-    #[doc = " isa name supported by this version of the code object manager"]
-    #[doc = " library."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES"]
-    #[doc = " Unable to update action info object as out of resources."]
+    #[doc = " @brief Set the isa name of an action info object.\n\n When an action info object is created it has no isa name. Some\n actions require that the action info object has an isa name\n defined.\n\n @param[in] action_info A handle to the action info object to be\n updated.\n\n @param[in] isa_name A null terminated string that is the isa name. If NULL\n or the empty string then the isa name is cleared. The isa name is defined as\n the Code Object Target Identification string, described at\n https://llvm.org/docs/AMDGPUUsage.html#code-object-target-identification\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n action_info is an invalid action info object. @p isa_name is not an\n isa name supported by this version of the code object manager\n library.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update action info object as out of resources."]
     pub unsafe fn amd_comgr_action_info_set_isa_name(
         &self,
         action_info: amd_comgr_action_info_t,
@@ -897,28 +476,7 @@ impl LibComgr {
         (self.amd_comgr_action_info_set_isa_name)(action_info, isa_name)
     }
     #[must_use]
-    #[doc = " @brief Set the source language of an action info object."]
-    #[doc = ""]
-    #[doc = " When an action info object is created it has no language defined"]
-    #[doc = " which is represented by @p"]
-    #[doc = " AMD_COMGR_LANGUAGE_NONE. Some actions require that"]
-    #[doc = " the action info object has a source language defined."]
-    #[doc = ""]
-    #[doc = " @param[in] action_info A handle to the action info object to be"]
-    #[doc = " updated."]
-    #[doc = ""]
-    #[doc = " @param[in] language The language to set. If @p"]
-    #[doc = " AMD_COMGR_LANGUAGE_NONE then the language is cleared."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_SUCCESS The function has"]
-    #[doc = " been executed successfully."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p"]
-    #[doc = " action_info is an invalid action info object. @p language is an"]
-    #[doc = " invalid language."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES"]
-    #[doc = " Unable to update action info object as out of resources."]
+    #[doc = " @brief Set the source language of an action info object.\n\n When an action info object is created it has no language defined\n which is represented by @p\n AMD_COMGR_LANGUAGE_NONE. Some actions require that\n the action info object has a source language defined.\n\n @param[in] action_info A handle to the action info object to be\n updated.\n\n @param[in] language The language to set. If @p\n AMD_COMGR_LANGUAGE_NONE then the language is cleared.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n action_info is an invalid action info object. @p language is an\n invalid language.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update action info object as out of resources."]
     pub unsafe fn amd_comgr_action_info_set_language(
         &self,
         action_info: amd_comgr_action_info_t,
@@ -927,31 +485,7 @@ impl LibComgr {
         (self.amd_comgr_action_info_set_language)(action_info, language)
     }
     #[must_use]
-    #[doc = " @brief Set the options array of an action info object."]
-    #[doc = ""]
-    #[doc = " This overrides any option strings or arrays previously set by calls to this"]
-    #[doc = " function or @p amd_comgr_action_info_set_options."]
-    #[doc = ""]
-    #[doc = " An @p action_info object which had its options set with this function can"]
-    #[doc = " only have its option inspected with @p"]
-    #[doc = " amd_comgr_action_info_get_option_list_count and @p"]
-    #[doc = " amd_comgr_action_info_get_option_list_item."]
-    #[doc = ""]
-    #[doc = " @param[in] action_info A handle to the action info object to be updated."]
-    #[doc = ""]
-    #[doc = " @param[in] options An array of null terminated strings. May be NULL if @p"]
-    #[doc = " count is zero, which will result in an empty options array."]
-    #[doc = ""]
-    #[doc = " @param[in] count The number of null terminated strings in @p options."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_SUCCESS The function has been executed"]
-    #[doc = " successfully."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p action_info is an"]
-    #[doc = " invalid action info object, or @p options is NULL and @p count is non-zero."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES Unable to update action"]
-    #[doc = " info object as out of resources."]
+    #[doc = " @brief Set the options array of an action info object.\n\n This overrides any option strings or arrays previously set by calls to this\n function or @p amd_comgr_action_info_set_options.\n\n An @p action_info object which had its options set with this function can\n only have its option inspected with @p\n amd_comgr_action_info_get_option_list_count and @p\n amd_comgr_action_info_get_option_list_item.\n\n @param[in] action_info A handle to the action info object to be updated.\n\n @param[in] options An array of null terminated strings. May be NULL if @p\n count is zero, which will result in an empty options array.\n\n @param[in] count The number of null terminated strings in @p options.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has been executed\n successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p action_info is an\n invalid action info object, or @p options is NULL and @p count is non-zero.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES Unable to update action\n info object as out of resources."]
     pub unsafe fn amd_comgr_action_info_set_option_list(
         &self,
         action_info: amd_comgr_action_info_t,
@@ -961,37 +495,7 @@ impl LibComgr {
         (self.amd_comgr_action_info_set_option_list)(action_info, options, count)
     }
     #[must_use]
-    #[doc = " @brief Perform an action."]
-    #[doc = ""]
-    #[doc = " Each action ignores any data objects in @p input that it does not"]
-    #[doc = " use. If logging is enabled in @info then @p result will have a log"]
-    #[doc = " data object added. Any diagnostic data objects produced by the"]
-    #[doc = " action will be added to @p result. See the description of each"]
-    #[doc = " action in @p amd_comgr_action_kind_t."]
-    #[doc = ""]
-    #[doc = " @param[in] kind The action to perform."]
-    #[doc = ""]
-    #[doc = " @param[in] info The action info to use when performing the action."]
-    #[doc = ""]
-    #[doc = " @param[in] input The input data objects to the @p kind action."]
-    #[doc = ""]
-    #[doc = " @param[out] result Any data objects are removed before performing"]
-    #[doc = " the action which then adds all data objects produced by the action."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_SUCCESS The function has"]
-    #[doc = " been executed successfully."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR An error was"]
-    #[doc = " reported when executing the action."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p"]
-    #[doc = " kind is an invalid action kind. @p input_data or @p result_data are"]
-    #[doc = " invalid action data object handles. See the description of each"]
-    #[doc = " action in @p amd_comgr_action_kind_t for other"]
-    #[doc = " conditions that result in this status."]
-    #[doc = ""]
-    #[doc = " @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES"]
-    #[doc = " Unable to update the data object as out of resources."]
+    #[doc = " @brief Perform an action.\n\n Each action ignores any data objects in @p input that it does not\n use. If logging is enabled in @info then @p result will have a log\n data object added. Any diagnostic data objects produced by the\n action will be added to @p result. See the description of each\n action in @p amd_comgr_action_kind_t.\n\n @param[in] kind The action to perform.\n\n @param[in] info The action info to use when performing the action.\n\n @param[in] input The input data objects to the @p kind action.\n\n @param[out] result Any data objects are removed before performing\n the action which then adds all data objects produced by the action.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR An error was\n reported when executing the action.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n kind is an invalid action kind. @p input_data or @p result_data are\n invalid action data object handles. See the description of each\n action in @p amd_comgr_action_kind_t for other\n conditions that result in this status.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update the data object as out of resources."]
     pub unsafe fn amd_comgr_do_action(
         &self,
         kind: amd_comgr_action_kind_t,
diff --git a/comgr/src/lib.rs b/comgr/src/lib.rs
index 9e188f2..001fc6d 100644
--- a/comgr/src/lib.rs
+++ b/comgr/src/lib.rs
@@ -60,7 +60,7 @@ impl Comgr {
 
     #[cfg(windows)]
     unsafe fn load_library() -> std::result::Result<LibComgr, libloading::Error> {
-        LibComgr::new("amd_comgr.dll")
+        LibComgr::new("amd_comgr_2.dll")
     }
 
     #[cfg(not(windows))]
diff --git a/hip_common/src/lib.rs b/hip_common/src/lib.rs
index 356bdf9..4a10e3b 100644
--- a/hip_common/src/lib.rs
+++ b/hip_common/src/lib.rs
@@ -76,7 +76,7 @@ pub enum CompilationMode {
 impl CompilationMode {
     pub unsafe fn from_device(device: i32) -> Result<CompilationMode, hipError_t> {
         let mut device_props = mem::zeroed();
-        hip! { hipGetDeviceProperties(&mut device_props, device) };
+        hip! { hipGetDevicePropertiesR0600(&mut device_props, device) };
         if device_props.warpSize == 32 {
             Ok(CompilationMode::Wave32)
         } else {
@@ -96,7 +96,7 @@ impl CompilationMode {
 
 pub unsafe fn comgr_isa(device: i32) -> Result<CString, hipError_t> {
     let mut device_props = mem::zeroed();
-    hip! { hipGetDeviceProperties(&mut device_props, device) };
+    hip! { hipGetDevicePropertiesR0600(&mut device_props, device) };
     let gcn_arch = CStr::from_ptr(device_props.gcnArchName.as_ptr() as _);
     let mut arch_name = b"amdgcn-amd-amdhsa--".to_vec();
     arch_name.extend_from_slice(gcn_arch.to_bytes_with_nul());
diff --git a/hip_runtime-sys/README b/hip_runtime-sys/README
index 5fe5c6d..9e4f2f8 100644
--- a/hip_runtime-sys/README
+++ b/hip_runtime-sys/README
@@ -1 +1 @@
-bindgen include/hip_runtime_api.h -o src/hip_runtime_api.rs --no-layout-tests --size_t-is-usize --default-enum-style=newtype --whitelist-function "hip.*" --whitelist-type "hip.*" --no-derive-debug --must-use-type hipError_t --new-type-alias "^hipDeviceptr_t$" --whitelist-var "^hip.*$" -- -I/opt/rocm/include -D__HIP_PLATFORM_AMD__
+bindgen $Env:HIP_PATH/include/hip/hip_runtime_api.h -o src/hip_runtime_api.rs --no-layout-tests --default-enum-style=newtype --allowlist-function "hip.*" --allowlist-type "hip.*" --no-derive-debug --must-use-type hipError_t --new-type-alias "^hipDeviceptr_t$" --allowlist-var "^hip.*$" -- -I"$Env:HIP_PATH/include" -D__HIP_PLATFORM_AMD__
\ No newline at end of file
diff --git a/hip_runtime-sys/build.rs b/hip_runtime-sys/build.rs
index b6d842e..9e26af5 100644
--- a/hip_runtime-sys/build.rs
+++ b/hip_runtime-sys/build.rs
@@ -6,7 +6,7 @@ fn main() -> Result<(), VarError> {
     if cfg!(windows) {
         let env = env::var("CARGO_CFG_TARGET_ENV")?;
         if env == "msvc" {
-            let mut path = PathBuf::from(env::var("CARGO_MANIFEST_DIR")?);
+            let mut path = PathBuf::from(env::var("HIP_PATH")?);
             path.push("lib");
             println!("cargo:rustc-link-search=native={}", path.display());
         } else {
diff --git a/hip_runtime-sys/lib/amdhip64.def b/hip_runtime-sys/lib/amdhip64.def
deleted file mode 100644
index 8dda3d7..0000000
--- a/hip_runtime-sys/lib/amdhip64.def
+++ /dev/null
@@ -1,561 +0,0 @@
-;
-; Definition file of amdhip64.dll
-; Automatic generated by gendef
-; written by Kai Tietz 2008
-;
-LIBRARY "amdhip64.dll"
-EXPORTS
-hipCreateSurfaceObject
-hipDestroySurfaceObject
-hipExtModuleLaunchKernel
-hipHccModuleLaunchKernel
-hipModuleLaunchKernelExt
-hipTexRefGetArray
-hipTexRefGetBorderColor
-hipTexRefGetMipmappedArray
-AMD_CPU_AFFINITY DATA
-AMD_DIRECT_DISPATCH DATA
-AMD_GPU_FORCE_SINGLE_FP_DENORM DATA
-AMD_LOG_LEVEL DATA
-AMD_LOG_LEVEL_FILE DATA
-AMD_LOG_MASK DATA
-AMD_OCL_BUILD_OPTIONS DATA
-AMD_OCL_BUILD_OPTIONS_APPEND DATA
-AMD_OCL_LINK_OPTIONS DATA
-AMD_OCL_LINK_OPTIONS_APPEND DATA
-AMD_OCL_SC_LIB DATA
-AMD_OCL_WAIT_COMMAND DATA
-AMD_OPT_FLUSH DATA
-AMD_SERIALIZE_COPY DATA
-AMD_SERIALIZE_KERNEL DATA
-AMD_THREAD_TRACE_ENABLE DATA
-CL_KHR_FP64 DATA
-CQ_THREAD_STACK_SIZE DATA
-CUDA_VISIBLE_DEVICES DATA
-DISABLE_DEFERRED_ALLOC DATA
-GPU_ADD_HBCC_SIZE DATA
-GPU_ANALYZE_HANG DATA
-GPU_BLIT_ENGINE_TYPE DATA
-GPU_CP_DMA_COPY_SIZE DATA
-GPU_DEVICE_ORDINAL DATA
-GPU_DUMP_BLIT_KERNELS DATA
-GPU_DUMP_CODE_OBJECT DATA
-GPU_ENABLE_COOP_GROUPS DATA
-GPU_ENABLE_HW_P2P DATA
-GPU_ENABLE_LARGE_ALLOCATION DATA
-GPU_ENABLE_LC DATA
-GPU_ENABLE_PAL DATA
-GPU_ENABLE_WAVE32_MODE DATA
-GPU_ENABLE_WGP_MODE DATA
-GPU_FLUSH_ON_EXECUTION DATA
-GPU_FORCE_64BIT_PTR DATA
-GPU_FORCE_BLIT_COPY_SIZE DATA
-GPU_FORCE_OCL20_32BIT DATA
-GPU_FORCE_QUEUE_PROFILING DATA
-GPU_IFH_MODE DATA
-GPU_IMAGE_BUFFER_WAR DATA
-GPU_IMAGE_DMA DATA
-GPU_MAX_COMMAND_BUFFERS DATA
-GPU_MAX_COMMAND_QUEUES DATA
-GPU_MAX_HEAP_SIZE DATA
-GPU_MAX_HW_QUEUES DATA
-GPU_MAX_REMOTE_MEM_SIZE DATA
-GPU_MAX_SUBALLOC_SIZE DATA
-GPU_MAX_USWC_ALLOC_SIZE DATA
-GPU_MAX_WORKGROUP_SIZE DATA
-GPU_MAX_WORKGROUP_SIZE_2D_X DATA
-GPU_MAX_WORKGROUP_SIZE_2D_Y DATA
-GPU_MAX_WORKGROUP_SIZE_3D_X DATA
-GPU_MAX_WORKGROUP_SIZE_3D_Y DATA
-GPU_MAX_WORKGROUP_SIZE_3D_Z DATA
-GPU_MIPMAP DATA
-GPU_NUM_COMPUTE_RINGS DATA
-GPU_NUM_MEM_DEPENDENCY DATA
-GPU_PINNED_MIN_XFER_SIZE DATA
-GPU_PINNED_XFER_SIZE DATA
-GPU_PRINT_CHILD_KERNEL DATA
-GPU_RAW_TIMESTAMP DATA
-GPU_RESOURCE_CACHE_SIZE DATA
-GPU_SELECT_COMPUTE_RINGS_ID DATA
-GPU_SINGLE_ALLOC_PERCENT DATA
-GPU_STAGING_BUFFER_SIZE DATA
-GPU_STREAMOPS_CP_WAIT DATA
-GPU_USE_DEVICE_QUEUE DATA
-GPU_USE_SINGLE_SCRATCH DATA
-GPU_USE_SYNC_OBJECTS DATA
-GPU_WAVES_PER_SIMD DATA
-GPU_WAVE_LIMIT_ENABLE DATA
-GPU_WORKLOAD_SPLIT DATA
-GPU_XFER_BUFFER_SIZE DATA
-HIPRTC_USE_RUNTIME_UNBUNDLER DATA
-HIP_FORCE_DEV_KERNARG DATA
-HIP_HIDDEN_FREE_MEM DATA
-HIP_HOST_COHERENT DATA
-HIP_INITIAL_DM_SIZE DATA
-HIP_MEM_POOL_SUPPORT DATA
-HIP_MEM_POOL_USE_VM DATA
-HIP_USE_RUNTIME_UNBUNDLER DATA
-HIP_VISIBLE_DEVICES DATA
-HSA_ENABLE_COARSE_GRAIN_SVM DATA
-HSA_KERNARG_POOL_SIZE DATA
-HSA_LOCAL_MEMORY_ENABLE DATA
-OCL_CODE_CACHE_ENABLE DATA
-OCL_CODE_CACHE_RESET DATA
-OCL_SET_SVM_SIZE DATA
-OCL_STUB_PROGRAMS DATA
-OPENCL_VERSION DATA
-PAL_ALWAYS_RESIDENT DATA
-PAL_DISABLE_SDMA DATA
-PAL_EMBED_KERNEL_MD DATA
-PAL_FORCE_ASIC_REVISION DATA
-PAL_MALL_POLICY DATA
-PAL_PREPINNED_MEMORY_SIZE DATA
-PAL_RGP_DISP_COUNT DATA
-REMOTE_ALLOC DATA
-ROC_ACTIVE_WAIT_TIMEOUT DATA
-ROC_AQL_QUEUE_SIZE DATA
-ROC_CPU_WAIT_FOR_SIGNAL DATA
-ROC_ENABLE_LARGE_BAR DATA
-ROC_ENABLE_PRE_VEGA DATA
-ROC_GLOBAL_CU_MASK DATA
-ROC_HMM_FLAGS DATA
-ROC_P2P_SDMA_SIZE DATA
-ROC_SIGNAL_POOL_SIZE DATA
-ROC_SKIP_COPY_SYNC DATA
-ROC_SKIP_KERNEL_ARG_COPY DATA
-ROC_SYSTEM_SCOPE_SIGNAL DATA
-ROC_USE_FGS_KERNARG DATA
-__gnu_f2h_ieee
-__gnu_h2f_ieee
-__hipPopCallConfiguration
-__hipPushCallConfiguration
-__hipRegisterFatBinary
-__hipRegisterFunction
-__hipRegisterManagedVar
-__hipRegisterSurface
-__hipRegisterTexture
-__hipRegisterVar
-__hipUnregisterFatBinary
-amd_dbgapi_get_build_id
-amd_dbgapi_get_build_name
-amd_dbgapi_get_git_hash
-hipApiName
-hipArray3DCreate
-hipArray3DGetDescriptor
-hipArrayCreate
-hipArrayDestroy
-hipArrayGetDescriptor
-hipArrayGetInfo
-hipBindTexture
-hipBindTexture2D
-hipBindTextureToArray
-hipBindTextureToMipmappedArray
-hipChooseDevice
-hipConfigureCall
-hipCreateChannelDesc
-hipCreateTextureObject
-hipCtxCreate
-hipCtxDestroy
-hipCtxDisablePeerAccess
-hipCtxEnablePeerAccess
-hipCtxGetApiVersion
-hipCtxGetCacheConfig
-hipCtxGetCurrent
-hipCtxGetDevice
-hipCtxGetFlags
-hipCtxGetSharedMemConfig
-hipCtxPopCurrent
-hipCtxPushCurrent
-hipCtxSetCacheConfig
-hipCtxSetCurrent
-hipCtxSetSharedMemConfig
-hipCtxSynchronize
-hipDestroyExternalMemory
-hipDestroyExternalSemaphore
-hipDestroyTextureObject
-hipDeviceCanAccessPeer
-hipDeviceComputeCapability
-hipDeviceDisablePeerAccess
-hipDeviceEnablePeerAccess
-hipDeviceGet
-hipDeviceGetAttribute
-hipDeviceGetByPCIBusId
-hipDeviceGetCacheConfig
-hipDeviceGetDefaultMemPool
-hipDeviceGetGraphMemAttribute
-hipDeviceGetLimit
-hipDeviceGetMemPool
-hipDeviceGetName
-hipDeviceGetP2PAttribute
-hipDeviceGetPCIBusId
-hipDeviceGetSharedMemConfig
-hipDeviceGetStreamPriorityRange
-hipDeviceGetUuid
-hipDeviceGraphMemTrim
-hipDevicePrimaryCtxGetState
-hipDevicePrimaryCtxRelease
-hipDevicePrimaryCtxReset
-hipDevicePrimaryCtxRetain
-hipDevicePrimaryCtxSetFlags
-hipDeviceReset
-hipDeviceSetCacheConfig
-hipDeviceSetGraphMemAttribute
-hipDeviceSetLimit
-hipDeviceSetMemPool
-hipDeviceSetSharedMemConfig
-hipDeviceSynchronize
-hipDeviceTotalMem
-hipDriverGetVersion
-hipDrvGetErrorName
-hipDrvGetErrorString
-hipDrvMemcpy2DUnaligned
-hipDrvMemcpy3D
-hipDrvMemcpy3DAsync
-hipDrvPointerGetAttributes
-hipEventCreate
-hipEventCreateWithFlags
-hipEventDestroy
-hipEventElapsedTime
-hipEventQuery
-hipEventRecord
-hipEventRecord_spt
-hipEventSynchronize
-hipExtGetLinkTypeAndHopCount
-hipExtLaunchKernel
-hipExtLaunchMultiKernelMultiDevice
-hipExtMallocWithFlags
-hipExtStreamCreateWithCUMask
-hipExtStreamGetCUMask
-hipExternalMemoryGetMappedBuffer
-hipFree
-hipFreeArray
-hipFreeAsync
-hipFreeHost
-hipFreeMipmappedArray
-hipFuncGetAttribute
-hipFuncGetAttributes
-hipFuncSetAttribute
-hipFuncSetCacheConfig
-hipFuncSetSharedMemConfig
-hipGLGetDevices
-hipGetChannelDesc
-hipGetCmdName
-hipGetDevice
-hipGetDeviceCount
-hipGetDeviceFlags
-hipGetDeviceProperties
-hipGetErrorName
-hipGetErrorString
-hipGetLastError
-hipGetMipmappedArrayLevel
-hipGetStreamDeviceId
-hipGetSymbolAddress
-hipGetSymbolSize
-hipGetTextureAlignmentOffset
-hipGetTextureObjectResourceDesc
-hipGetTextureObjectResourceViewDesc
-hipGetTextureObjectTextureDesc
-hipGetTextureReference
-hipGraphAddChildGraphNode
-hipGraphAddDependencies
-hipGraphAddEmptyNode
-hipGraphAddEventRecordNode
-hipGraphAddEventWaitNode
-hipGraphAddHostNode
-hipGraphAddKernelNode
-hipGraphAddMemAllocNode
-hipGraphAddMemFreeNode
-hipGraphAddMemcpyNode
-hipGraphAddMemcpyNode1D
-hipGraphAddMemcpyNodeFromSymbol
-hipGraphAddMemcpyNodeToSymbol
-hipGraphAddMemsetNode
-hipGraphChildGraphNodeGetGraph
-hipGraphClone
-hipGraphCreate
-hipGraphDebugDotPrint
-hipGraphDestroy
-hipGraphDestroyNode
-hipGraphEventRecordNodeGetEvent
-hipGraphEventRecordNodeSetEvent
-hipGraphEventWaitNodeGetEvent
-hipGraphEventWaitNodeSetEvent
-hipGraphExecChildGraphNodeSetParams
-hipGraphExecDestroy
-hipGraphExecEventRecordNodeSetEvent
-hipGraphExecEventWaitNodeSetEvent
-hipGraphExecHostNodeSetParams
-hipGraphExecKernelNodeSetParams
-hipGraphExecMemcpyNodeSetParams
-hipGraphExecMemcpyNodeSetParams1D
-hipGraphExecMemcpyNodeSetParamsFromSymbol
-hipGraphExecMemcpyNodeSetParamsToSymbol
-hipGraphExecMemsetNodeSetParams
-hipGraphExecUpdate
-hipGraphGetEdges
-hipGraphGetNodes
-hipGraphGetRootNodes
-hipGraphHostNodeGetParams
-hipGraphHostNodeSetParams
-hipGraphInstantiate
-hipGraphInstantiateWithFlags
-hipGraphKernelNodeCopyAttributes
-hipGraphKernelNodeGetAttribute
-hipGraphKernelNodeGetParams
-hipGraphKernelNodeSetAttribute
-hipGraphKernelNodeSetParams
-hipGraphLaunch
-hipGraphLaunch_spt
-hipGraphMemAllocNodeGetParams
-hipGraphMemFreeNodeGetParams
-hipGraphMemcpyNodeGetParams
-hipGraphMemcpyNodeSetParams
-hipGraphMemcpyNodeSetParams1D
-hipGraphMemcpyNodeSetParamsFromSymbol
-hipGraphMemcpyNodeSetParamsToSymbol
-hipGraphMemsetNodeGetParams
-hipGraphMemsetNodeSetParams
-hipGraphNodeFindInClone
-hipGraphNodeGetDependencies
-hipGraphNodeGetDependentNodes
-hipGraphNodeGetEnabled
-hipGraphNodeGetType
-hipGraphNodeSetEnabled
-hipGraphReleaseUserObject
-hipGraphRemoveDependencies
-hipGraphRetainUserObject
-hipGraphUpload
-hipGraphicsGLRegisterBuffer
-hipGraphicsGLRegisterImage
-hipGraphicsMapResources
-hipGraphicsResourceGetMappedPointer
-hipGraphicsSubResourceGetMappedArray
-hipGraphicsUnmapResources
-hipGraphicsUnregisterResource
-hipHostAlloc
-hipHostFree
-hipHostGetDevicePointer
-hipHostGetFlags
-hipHostMalloc
-hipHostRegister
-hipHostUnregister
-hipImportExternalMemory
-hipImportExternalSemaphore
-hipInit
-hipIpcCloseMemHandle
-hipIpcGetEventHandle
-hipIpcGetMemHandle
-hipIpcOpenEventHandle
-hipIpcOpenMemHandle
-hipKernelNameRef
-hipLaunchByPtr
-hipLaunchCooperativeKernel
-hipLaunchCooperativeKernelMultiDevice
-hipLaunchCooperativeKernel_spt
-hipLaunchHostFunc
-hipLaunchHostFunc_spt
-hipLaunchKernel
-hipLaunchKernel_spt
-hipMalloc
-hipMalloc3D
-hipMalloc3DArray
-hipMallocArray
-hipMallocAsync
-hipMallocFromPoolAsync
-hipMallocHost
-hipMallocManaged
-hipMallocMipmappedArray
-hipMallocPitch
-hipMemAddressFree
-hipMemAddressReserve
-hipMemAdvise
-hipMemAllocHost
-hipMemAllocPitch
-hipMemCreate
-hipMemExportToShareableHandle
-hipMemGetAccess
-hipMemGetAddressRange
-hipMemGetAllocationGranularity
-hipMemGetAllocationPropertiesFromHandle
-hipMemGetInfo
-hipMemImportFromShareableHandle
-hipMemMap
-hipMemMapArrayAsync
-hipMemPoolCreate
-hipMemPoolDestroy
-hipMemPoolExportPointer
-hipMemPoolExportToShareableHandle
-hipMemPoolGetAccess
-hipMemPoolGetAttribute
-hipMemPoolImportFromShareableHandle
-hipMemPoolImportPointer
-hipMemPoolSetAccess
-hipMemPoolSetAttribute
-hipMemPoolTrimTo
-hipMemPrefetchAsync
-hipMemPtrGetInfo
-hipMemRangeGetAttribute
-hipMemRangeGetAttributes
-hipMemRelease
-hipMemRetainAllocationHandle
-hipMemSetAccess
-hipMemUnmap
-hipMemcpy
-hipMemcpy2D
-hipMemcpy2DAsync
-hipMemcpy2DAsync_spt
-hipMemcpy2DFromArray
-hipMemcpy2DFromArrayAsync
-hipMemcpy2DFromArrayAsync_spt
-hipMemcpy2DFromArray_spt
-hipMemcpy2DToArray
-hipMemcpy2DToArrayAsync
-hipMemcpy2DToArrayAsync_spt
-hipMemcpy2DToArray_spt
-hipMemcpy2D_spt
-hipMemcpy3D
-hipMemcpy3DAsync
-hipMemcpy3DAsync_spt
-hipMemcpy3D_spt
-hipMemcpyAsync
-hipMemcpyAsync_spt
-hipMemcpyAtoH
-hipMemcpyDtoD
-hipMemcpyDtoDAsync
-hipMemcpyDtoH
-hipMemcpyDtoHAsync
-hipMemcpyFromArray
-hipMemcpyFromArray_spt
-hipMemcpyFromSymbol
-hipMemcpyFromSymbolAsync
-hipMemcpyFromSymbolAsync_spt
-hipMemcpyFromSymbol_spt
-hipMemcpyHtoA
-hipMemcpyHtoD
-hipMemcpyHtoDAsync
-hipMemcpyParam2D
-hipMemcpyParam2DAsync
-hipMemcpyPeer
-hipMemcpyPeerAsync
-hipMemcpyToArray
-hipMemcpyToSymbol
-hipMemcpyToSymbolAsync
-hipMemcpyToSymbolAsync_spt
-hipMemcpyToSymbol_spt
-hipMemcpyWithStream
-hipMemcpy_spt
-hipMemset
-hipMemset2D
-hipMemset2DAsync
-hipMemset2DAsync_spt
-hipMemset2D_spt
-hipMemset3D
-hipMemset3DAsync
-hipMemset3DAsync_spt
-hipMemset3D_spt
-hipMemsetAsync
-hipMemsetAsync_spt
-hipMemsetD16
-hipMemsetD16Async
-hipMemsetD32
-hipMemsetD32Async
-hipMemsetD8
-hipMemsetD8Async
-hipMemset_spt
-hipMipmappedArrayCreate
-hipMipmappedArrayDestroy
-hipMipmappedArrayGetLevel
-hipModuleGetFunction
-hipModuleGetGlobal
-hipModuleGetTexRef
-hipModuleLaunchCooperativeKernel
-hipModuleLaunchCooperativeKernelMultiDevice
-hipModuleLaunchKernel
-hipModuleLoad
-hipModuleLoadData
-hipModuleLoadDataEx
-hipModuleOccupancyMaxActiveBlocksPerMultiprocessor
-hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
-hipModuleOccupancyMaxPotentialBlockSize
-hipModuleOccupancyMaxPotentialBlockSizeWithFlags
-hipModuleUnload
-hipOccupancyMaxActiveBlocksPerMultiprocessor
-hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
-hipOccupancyMaxPotentialBlockSize
-hipPeekAtLastError
-hipPointerGetAttribute
-hipPointerGetAttributes
-hipProfilerStart
-hipProfilerStop
-hipRegisterTracerCallback
-hipRuntimeGetVersion
-hipSetDevice
-hipSetDeviceFlags
-hipSetupArgument
-hipSignalExternalSemaphoresAsync
-hipStreamAddCallback
-hipStreamAddCallback_spt
-hipStreamAttachMemAsync
-hipStreamBeginCapture
-hipStreamBeginCapture_spt
-hipStreamCreate
-hipStreamCreateWithFlags
-hipStreamCreateWithPriority
-hipStreamDestroy
-hipStreamEndCapture
-hipStreamEndCapture_spt
-hipStreamGetCaptureInfo
-hipStreamGetCaptureInfo_spt
-hipStreamGetCaptureInfo_v2
-hipStreamGetCaptureInfo_v2_spt
-hipStreamGetDevice
-hipStreamGetFlags
-hipStreamGetFlags_spt
-hipStreamGetPriority
-hipStreamGetPriority_spt
-hipStreamIsCapturing
-hipStreamIsCapturing_spt
-hipStreamQuery
-hipStreamQuery_spt
-hipStreamSynchronize
-hipStreamSynchronize_spt
-hipStreamUpdateCaptureDependencies
-hipStreamWaitEvent
-hipStreamWaitEvent_spt
-hipStreamWaitValue32
-hipStreamWaitValue64
-hipStreamWriteValue32
-hipStreamWriteValue64
-hipTexObjectCreate
-hipTexObjectDestroy
-hipTexObjectGetResourceDesc
-hipTexObjectGetResourceViewDesc
-hipTexObjectGetTextureDesc
-hipTexRefGetAddress
-hipTexRefGetAddressMode
-hipTexRefGetFilterMode
-hipTexRefGetFlags
-hipTexRefGetFormat
-hipTexRefGetMaxAnisotropy
-hipTexRefGetMipmapFilterMode
-hipTexRefGetMipmapLevelBias
-hipTexRefGetMipmapLevelClamp
-hipTexRefSetAddress
-hipTexRefSetAddress2D
-hipTexRefSetAddressMode
-hipTexRefSetArray
-hipTexRefSetBorderColor
-hipTexRefSetFilterMode
-hipTexRefSetFlags
-hipTexRefSetFormat
-hipTexRefSetMaxAnisotropy
-hipTexRefSetMipmapFilterMode
-hipTexRefSetMipmapLevelBias
-hipTexRefSetMipmapLevelClamp
-hipTexRefSetMipmappedArray
-hipThreadExchangeStreamCaptureMode
-hipUnbindTexture
-hipUserObjectCreate
-hipUserObjectRelease
-hipUserObjectRetain
-hipWaitExternalSemaphoresAsync
diff --git a/hip_runtime-sys/lib/amdhip64.lib b/hip_runtime-sys/lib/amdhip64.lib
deleted file mode 100644
index f7c559162058a1770d32cd26368d73652243a098..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 124886
zcmeHQdwg6)*`5iA6|rKuSg>M2?jn@54Mi)m*=&-fxh%U$ODnQ$lWnt*?8eQewupj=
zctb=*L`6hYL_|bHL_|bIL_|bXL_|bH<l{r+Lq$Zy@0rUvXKrV*hu!iQ{r#Sv>^tww
zGxwP@^PZVKSC;By6Gv=+$YTB9p<RcsSkl$GWXTd;9{lV)ylcs!-0Ar33EAe)ggkUP
zArId}$Rj%wdeA(9We*a{Fz|@LDGfpyzO!E7?)MYQ@f4oGy_a&3zf2SImwO3$GzZ*D
z$X{0gmk{#U0^kfn{*M54;1NO|KL)skkiT^S7ZdXLg}_<BlZ5=^1ORb*0^vMy10nxB
z7`Tv-ClSt*rvXn8@~;U1I!_G%HxcshPT&GUo`%lTrvnJ*KM3bP_Yv~U2!Qzf7hM0n
zgiwOC5~P7VLny5R_Y&GM0^CgKHb($g61wd{z{P~VU@`DXLbrpT?M?<BBlLyiz}<w-
z8vt$~^hHYm=xz_)?aw9j#XA6}0*@2A!v^3kLSNDkTu119gggHNLSMQNI1@k|c0?R@
zypPbA<$)UsU9b|koY0;22hJyS=LNtSz>|dTg79{^lh9qefvX68`Ch;|gf843*bE?k
zyB!bQLg?;C0#^|FihY6e2weo7MJE9d68g&30Mfh13IJTM+8aRJ85Va)AYO|ZpwF=9
zaT0gn3GCG?fpqN!y!v2?3-JW@o-c7ao(!lf4Exk29>NpYHz#pDp1^*~BoM}a!2Sq_
z<1{=OkVb|Bj+H=K4*(84LgHFHf!84aIFP2-00*5c0lx<UuU#i`6Q01qD<!VL6F3BU
z%Ypno1XzN6;h4sg;U0lQ^MrETfG5zoMB*Ymfu%c2oPj6969UU7(awP7NCyY<bUDz~
zD}lV|0uJkvxExPl#X<?xpB2F2L}C-3437%D?gT<P?#2^%{eT4W=Jmiz#FGPYTnW5k
zk;EtQ1df;|aWb9^h&RI<s}c|32^@*|bKHq1km{DW22Y?H<;n4RJc0DC5@+KH^mIs|
zUi2^^Ooq&e1j;4@^sbb+1W%xEcZrYS$?%Loe}hnt`|$*_2!{h@lLd}CNaB1vfmQP*
z-is&0lL7;q(f<L1>m}~N6Bx=#+=eGGjP!9_fG3cfCxNu&7#<Wj`WQkvP*;xz@+&2-
zz!NC!FL5rOz{qwI2xEi++zg{AM~>_81XeGR0QYKO4bsDLGM)^77dU1Ep&WPN39Rjv
zK%T4x)*T_i{s!K(QUY;%6L2iz#{rIGfj93cfjoON!!rWMO%uw2GCU3_u9HAs6@lZs
zC9c5}D0N9(i6^jrUx|zG1jdjaj+5|YcvN8gSVB1v<~UGBzHlIK%fJStkK_Az0vmfI
zuErCXSRsKpOaK*>BM0J80Zv#VfwZ3hoCtl6>+u98kv|-l;R#HkK5`&0rWl?QsMZPP
zco<KhhIn(_i6=0P`pI!Sp1@m<lDG*^pnjwTIO@R6B8l_x1R4t@&cqX#-A>{(Jb_L4
z&4D;=0^ZsoaWb9^j|*&`B$VSJJb^8UCkOI&3vg1m#I<+=Z|jt}1W(}Poh8n~6L>p#
zIl%FD2GkFRcbqEmB%Z)2$P<QBkq;cm+f#veqC7Y*!V@@cz68QLjo}f2cdaLs<8C~G
z(~(~sSK<l0dvA$z@C447Cvg&<3`i@(d%(|eE1tl6a}w-t;C(A4P#4|@ynhdgv+-m=
zS{Ocn_;B2gCvYbGaUhS*1U|S(;=On>JRxw_B%vI4;R$@GU*cLkfwPegj*s98e3(d_
zfG5L)0v|y-IBv!h_$cZ<$1QjQ=X6P2g(vVa#D@cA_A!R11<pl$IPS+2_&CzV@k=~`
z^AJA{gn1tD3FJEm^86FPC-;>&2T$Nrs1F<sJQ*GqIRAJ;IgrQa1E21dxC&3;0^~Et
z8F(^0Ch(c#2<1Q?e+Ia)TLO7?A@EtGljAZxfs2q%4&>cMz~^?5I2BKZM+H8A451u1
z;R$?Ug~Y{p0v9ikI1Nt*_+$9uItk>(7lBK_&jCM|0AJcw;{A9sAfFj7txG(DC-7zD
zGskUs0++o`;!-?;uPl@}6HkUG1umZ;l;ci3fv@&TATPcOT+u0kxLg5z4dHN{jwb`S
z8LnI{aU-6<*SjP>k0)@|e2J6sWI);(zHzJs>fASgtC4RU7vTwf6ZysgKi^~k7sEB+
z;{ew+z_*r2AdcSxu3aQ?4xYfbx0Bd}C&PmR*Nqd(fw)`;eCIBK>w5_0KwY{X`0l<E
zSK|rXkdwF*PvCn9hvN=Bf$t+-9FO4%{9sz*K0JXNM<i~+6Zj$0&4IZ85V#5Tm*Z?a
z8J-gO(FufdAf7)0ZeA^cyt^6r@evY;^N)dB_LsO2Pv9r>B~Hha;TeHjCkf@a7f;})
zh#v>y_EX?C<Oj!fcmh8|d^s+`6S#e$#F=<9AY6u@HzbhXKL_qWzHlI}cL2XYI2@Pa
z3H)-g1mgNj;8#0JAg;e+ctYUL2|_vU!4vp(PT~eUfxA#|IWEH!_|1V5s8hcI?%rA8
zx6_1jJb))~57NzXBc8zTR!UrrCvfi)f#2^%D91T?0{6|6cn6*gs7nlg7?(g<{Q<bY
zU*Z}(fj=TV4y65$zyo_rT!1I=r(Gq^#S?gNTZv70GCU;k=XHesd0nxX9n1~qM~Xd}
zLOP$#jST0DrK$0W%5>K<QY;RQ4h+~D<di@~D2()v4y`Koq()NkQkj}A!h5=2E;Y)9
z+4_dkSb2E;3FWZ{i4li>ddf46dTop4$te$gWi~ekYvZ$%<$=;{b!=i)xn3<#GA_46
z=&yfl%<a$Z5c;z{fr#<B9YTL2<<0r>hQ4wmRj-$}Fb{^r*6gm;$IJC}ZL(I^^i{d-
zYp^msRhphIkK06slkl4w>?x*mqs3HjZ+0j<vQ`!%6Zd5EnKUX#wvbDWr27?@K6vOI
z&ZjfQLUyQcAXDtkp$-iV=LZ#~f#JU5KxTDjz$Gj8W(P8c!(ghgN)ZjG2a4UJ*@2$o
zaBd_!JXG+x6;rufW~j$d8OWk)x_zroZqJ2uaUk1mgt#V^9Vw=V2M1F+S#G4*J1|;M
zGpUftXHx^&bs6aA)O?K`R%P--TK<gm=QF9E;z&M)yv+=yy2DIDW~}PZBcplKWbG_{
zbWy4oMpEfjD2jDKj;2R@QpMHTLKgl}FsrlaOptphO%^Joy_tMI(^E_h3=F4bDp|Rv
zdU}fe-RZO%9+wZL2G$}m{i&foMZJ3f8Il?5%MN9VBWrU(4498JD#c)mr``-q#Hl!(
z@5v%qCAgl^!CaAtDe@+WHVcotrH6Ym#o_LwP%{;8BC9|(oGbR_heva&Kk3-NrkLx@
z*`5Yc`M!+Eh7_x34h=PbWN0}p+ETbCwK}u3vp9%2+M%rJ%b6~`$`pr(ikV|F=~07R
zG_bB^-N@kFNL~q0xbR$JRm;p=;f&_fxum$X6W%SaM~`MkGsRqf7_ATuSaHwx_M4H+
z4yKSl-J`vzFvT?~H6p?Zg{*`ym^vnCAkvds7OE?ch3VLzN#)E0F;6Hxr+z*&I6RUm
z4rT^jz6zt=ynegfMhk1wqS#%oYlicycuktyvDn#DJjTcY-8<6=dH`0FS%M6WrE`V>
zjk&{Er^X|j8%(JcerR-%6-{mw8J0)6t9j4dSSI#lSU;K>N~>JC?9dRpioxtq@fcPM
zl&*#)M>;#5z=IC6pLOF}Z^!)QQ)`MN*})9@uR%4Q`AlJWl#MRwRJxyyF=5BXd=)YS
zY*^9~Qp^TjKi3e0k|_CH1|bcpZ2)xWuKI#1Z3aJrE_iS_S77}Y>+Y2t9W4aisi=&G
zlN<~&mr}+{79r-Zuy!adJ1#ZfS(i~LB9chvpqdEgfB|TbcdyDoz`|p7XJ8luMIndo
zSBV8L0@+lRboFO*`H?h_Mt*dNWqEOQsC#s%XCM<aXQ;>_9epz|Ds+o7WaieN?Ln<5
z_U1FZv8gV@1y&mUnS7MZ%m<rJU;xAzpq3zWVf8FGJUmb=jON(<Qwc0|Vp+C2m`*TH
zOyG;Wd${W_q~th?D5mlVqc5LAH7-P9Lxa^!Bv(c%s&33TMn@z9@fyyr)rVv@LE*!Z
zsHS2JkZCd8P@}QbEM{EH3BOnr)QsWck<o6}EhCPDK_wYRdxVeGnS3D{oG|*J>spgq
zTPPyvEJ`K_ow)>Kd?D(YnK(1pjYce5VKJ<0%teeCsX{i5;8;e6qabq~L_<Q>3}nMT
zfk|`uOwcWeyiw{KbI)VAV^c?D5vrU_yX>viIY)jNy>NOYi`vd7SFCmpkA@zY>(K+E
zxvAO3ys>#J_k;MutuLBNFjqMyi{ZUH9JdrFBmw!_7mQxarEg#uBStaJ`si>r${hL!
z2QkA-g?%QIVLDM{d2Gf@M8&F2%u8Vvs<Ifg(H{i45;(}RVO3F?Gje}x3nOfZgP3)Q
zMp9YZ=<O>6^%%i!tj-oUbWRj2<#JiFSVE>V(Ye9aQG|F=$<?OQrOC;3t-7JIakgG+
zRBF{Q$ar_=GCMQj_Zp)BzxndU%1onN?=3aDE7ekci%RKoRQ=#p(2Qm1C2~`I43?^;
zjpgyxrMinob5(rFcU6~{$Vt)1D_>)_9+ijFNfE|GM~I;}TCMwXDc%`x@Wt`<8%xub
z;>L2LxPG=WIbN)clQ_BPxSX!l(p1^)kGnX1Zmcwl6Q!96(_bhTzL3JybY+MUD1Sz7
z>`i)Uk63Hq8k)e=!@D<@62|J4>4tcr<}b)gdvv+(%LrTk)OF<gL>PhKx2~nLu;j4F
z>Qc)`wz{Dfh6`Q(JJdq67!PU`0#tvxbUJ%jP>L1=F1?XjxN@K<2U52WV`V|-8L1i0
zVHoL&T5YD>Q{Ge=<3+6fj<jUQQ)XQWOGD5T2|=XC7oE}*r7D(ySW$CbOK9lJ95ffH
zbaCXTu{o*}>F|f@9+W?eXk&BmOVg;#l-5s{bLDb9H8xhBnc)$It|eD}WvWh3mRR+J
zI)fIpx?G=OeYS{^E;F>#rLl>!$lpj^6`K09^?JD~7WW#P5p-G#3_YccxtjcKX!TB(
zHU^o2u*6S8r!Y~fm&XUoQ)Vnp2h|@N!nBww0!@#NN4laSQ8f!ls+CuU(A3ugX1b2N
zg4j%mC^m<K7XB8je%5Q%%1NRnH8vL%NvJ8SBi1ajP+FQqHr6n*g2C+&=dV!48skI_
zLz(S0ax;DFZ7TAq%N1X&Ur3j#qLHxXBIs<f>8Ca|J==iS>C*bjWThcf$o*MPhF`l8
z@`y6Owv*x4YKx&)%f;}C+8(8i{jl9rjYhq)9?8-aBdP79drK~z?Vg><j%!{NvF*pG
z)S{iTcCA!)+gDF{Luq!hf$Eg2)p#ErdNiDDzkPL7CHRa|66P9sZC5*lfyz`x&psis
zHC+)5WVUuN^9a+z1-2eu1Kg3<931{!r4tsJt?%d<!l;>U4&Mz7g;TkDrB+9a$d{@c
z^_o<0xZ9zO&SILV77!Phda_7I>Xj))S(O?-5UnZ9@iAbc%nJ={9!35fZn3`e<;iks
z#_2h5^7x%mTVX_-FEa;^zecITM^{lAsz>e;>l1x?IOLgrLpQ^>h)h@l&bYy^+FnCt
zLvDl&SG%>Ter;Df1fwk?OlMn!JZN_+Lm;!mHS4$P%XBk+D+6KVO%WRUBeez}8KW8)
zN+QjAWmCD1x~j~rdQ6$3y$PzBdc9T;`;@vaa_OQ)R;n9$G$L&&RP~W-W7AtYdqy#$
zS2kA5qJh>o*>c<0QvO!Z6DZ@SOD8ph?2-`?p)TzaGF3ZQt5h+sKvKi*kk952y3;B2
zo7sdpWHhf<f4X$mR2mauyHS)Qx$Y~NgQM`dcJN!*%1oA~XUgLv74f>sHC36eee`Tu
zOq{r$_@!&)%VXHUWtqlR<WHAQab}tiUy2?V>ALECE(lbWMcSJiD1w1X^~8}a)8$lk
zydSgQ+N_wRW9Ap*sQPzorfVWwcM!vWMeM%wKSg)OQ@^<({KlxLVGZI9uQnEyBTFYX
z_zkJ*LyTIY?l6Dp(ZSNpiIO+Q(e{rlad?cRst;x24240y*WNw5VS|{R>N9IoAq=Ot
zj%7Kdjz2YN`0~x1#1Gv)Xo}pO|Ipm~YcsquF=zg_=4!l&%NQ8a$OqQSkc6-`^B9}5
zSZ&_+dkyuOsDEKj7zeD-HJKJVT#Ru|hM`JLALi)5(kz%wm(XY508%Y#Fi(W`JJLdI
z^u-4FQYD(^)OgVKGcEZ`)reY5NFL=?C<+_0A+bXWd&p4Jc%KgCxq5B7TyIp$BC$cz
z6|ow+HjE*R=(nX64Pb$uIeI}pP?`~8u-c{lR>RY)=s<asu=0Vv%SH7mIsuWp;!Oyi
z456mKWomtGGBrM4mn$<&U6a|`1^L<?s&av<C*N^XY=EA^Xg9oJ!;F}1!cUB&!@pRj
z!lbV@TgMBjT;z&^aJbJI%IZq_t#gGG>coa7(-0Ss>g)&uwozp)B#C9fC^?Zh^26RD
zP&(-etXcCJ=}>K)_oe)Y?PB@tDNmQH;~)zvybMv3TfQ<=(~T`ARV1|a*_=iU`j#)l
z$?bPdsnT%z3>_?gtiLzCgfdH8jMk>UBDH*A>X%{zwCP8ayL_?1$l)mzyZm5E>+ljt
zJw6WQQ`(?5i0LW|3389G-g<3HG;`h$$vP4FR@^<IjMRK#gs!d-&>c9^8%T8@dgVuZ
z;i(NH9^-6;F<Gld?Iz8C85(@;JPK9()OC8w>t{Fi)Eao{6%$Mz6J2Whm|=FFVX)S0
zT!z54v>cI|T&9Pq%$%7kGK5P)jyQL3D41<{LkZm-p#*Kz9TUsd6GF(sMn_gFd7Od}
zLRTY%&E+w@5<oFms+Xq35YNN$=-8nJr4+^jz5|)9ZRB?tOVxLrPE2T4imk9L4LhWu
zGj!*M?P`Y*b&u{aT<&%#QMcj_#pT{qC`#X#K?ym+v3kK6J*B@>bJ`K2qS;g&!(MQX
za6>(3s57FwjLkpG-411RdR#6viiU*@lOkQ0HRAD&a(2v`PAIVSSnm@xG3W__rI)YO
zq*IUy(2az;eo!x=Vzgn~#qHD1rNC$8qWjENXBwqyqoQVzDvQ<DUfy}WD&8VT-G9_-
z(zWR=;TvI;ld%eM;UKTVV8US_3Pp4Fgkj{j7Jzh%^NpPK#XxiRgb^EqxRMi#En%AF
zPn|&sgLNlfN{UF=MT-w7eRd6WyILUxGd*Vjwxi`=PWznREw?z|QH}+T&e5A0b3})R
z(#N`8=j2y<OixS{tvN#s`$|uEp^g>WjOeGFxof$_`F7^6^c}hBzFF_s3p18%Rm?ww
z*qd_O7vieDuc<ywH?MD_8)nRke`o(Kb;O=7U*Y8~$PhbO*@|q~G^GbFv$c_DE(S%c
z`;q%wqci1tWX!@NX1ka^r)rz9nBaIV&ci^!a5DX}EiNai;h_5)ot~_fqP&wob)CxC
zOy59Yw-+t3@-(^JJifD2Fz*X9Uh^!REWcRT4VGD=VF(^Ix#cVHf+{3dhULbGN*$|X
zg;tndZz+fC!$CI0D_P-<R;OZ8Ww<ze29}0Fgwzy8hK8YQ7y_GU@bQCrh<<49Yyb$H
z`QMr=Z;s6VLdi=OkR>;Lg)5*^+gSf(Qn9rVaLZp>I6?gjbi&`7R+IyQo+1e~vs2Tx
zdPCnLVERsZjIYSN#_||BMSiQoG9t^Kk#u?t{q0N{!&iT)IzGuWKWhn9ePlL&Vbk;t
zp`mYv4(&*!>JFphu>&`yrjIBrmJ!oqOh1x^+H%SUBT_6Tvdq>6p`Mukz&57At&G&4
zhEBT1mdjxVy{Rn1VxbCs+i5Pp#%5VeFsjyEAqKO$n4t8XMWMu60^1Xb5)w)bwm`0v
zXi~$6nMLxqQ%et3XzKBe+N;@C#c#2l9sV7%1d_1#)CGo~p=IpC39sRnxcbuwht1%a
zL6o7Ziw!?4FIhQ=VEHdYLt3LpN`b)Cv%UltVd@(uQLi9ebCt%}L=?988HJ3O0kJa@
z^x0@~s>Gp*F`-=F#QT^?IS}gln<~O~hwJelnmb#24)aI8mKZrCrBNz0ff`z5ZvZy=
zLv_z=W}P)A&CKAGnchZSA3Vglsv)4mi|l*hC;Dw_i7X9mvZSsmHGQzG;9p{38c>~`
zEHRrdUTT8I=^PtIwA06`rj||?=P)p*o)Kzbvo6x0TdYGShC2Rg<ILB1ca;ZoE16jK
zm*K*jYt-b*Q_xXlsy1t!T0Kw%re3hx$=ar{a||S=rl=drY(?tpa<{Lz`o<#GOr2wT
zQ9X;|8p%vv=AxRG%E)SB(@%UUMU3$}?$GS~5+MX~(-&TZwARH^9?4wVj#M%aq1g1p
z%oj)a2N*I>q(IZFW9Je*rdr=}noQGfFzYC#iD0%72vj}Z6KEwM^3|4meeo9;#NG0y
zSYa^d$Y@gerG;E(FLO~xpmh;>x1;JmDi@KC=(noHUqWQN7JsX*Smu##>@UMbeB6TA
z!%h9JC6K7a!&QX93WbY<X^7_8n1@Iv){#ZC1xsj!AL?5XGF;~HX+}}^Jx5$)J-ht{
zpZ<uV<GB}ztXN71D|MRO>&uLs>eCy)7>{m0rm{;*&Lw%Fs&<j0S_B4=VXg!MBXv@&
z6RJW>-&A$Q&B$Q!(+G(R^}r&LsoSXaN3AQxei<4)jT+xq7pdpJ92!Qvn3^+ga8UU8
zFFUk;J2WXWbi>*oB_R^U>FPNX>br8AIY#^XA_o+igFzY$1-5#o7iXSCA#!IseoCPJ
zSjKsE`RZ@fQc<??Uq*T$Q%auya%dQ3#nhas<og+FcO3|fFs0ZCR+E}O<Of0`v-#&j
z4h=Iv_7R?LMJfEtekaPBKwyL!uBwL}K`4pQG-42_$Hc-cbc=@jbCqlmuwEN9sS!iA
zJtr*VqGT&FLt78miouL9Z$oafzf`~Y9aaGGx8<tl5m&KuClV_WF@#pQrm8E}VZV>S
zjDDX>jJUb_{YXzIjO+KgzNzYpo5AhO6)w~Ri$q3_^c>nHiyBd2KWuk{5=u_io~4~i
zNcdrfw6xO<DUg_&E0lm%SmCTNgOLIw6g>lsywp}klovpkJPKEH4PO3OdvrR)_`|F|
zv|iOchAxp$ezG0pY;lesXu#Mkp{?IHSzBM?-{FPYQe^AGGE=UMhPqN@=*BE4^1@P%
z;Pb43s%PX`YU&zin9LYXRo5BIXw^~WG}j)bo7{*}ewkrHqo>p;DM<|^F`Ajpwk~9b
z_VC!)>~yI*wq>xiIfVk=RPM&I&WSU*a-FAex?W?Shl}G1MD&{X@@%;x8;!6OnkJ3>
zdf0Qd225isrAeOHz;xP9o<lEDs8M8`39*BZJQzHq)nKzy#8UnawVKW!Rw<fSP9A=g
zf+R$dFDIj&QD!n68evG7hn<+xjr>hfq-%db#;D{9khUJ;^TU@t9G;X_53FQvs7#jY
z_|&w{m)mtg6ndyv<13Dl8h(db!Jgnq9mk#4*)bLCOJn>aq!^dTCqB!<2WLo{@*!c~
z5{CMP;NuLIFVPR%J>2^Ym5kj_$Y-%Ex^b3$jUYo)1W}0iV4;MiM{!_4xWzaVj=}}6
zLKF`FyqKBiE7>C^o2oEgKN!$0Kl<1%of{2&u#UNB6ivae%MG?}oTgL7sR8_u9+tt}
zk0$l_&?Bh&a>X#3bqE%72SSxb4K$3DP>&ppcty@I%;q0_^CITyakv@2mAQ?GMj$Zs
zGF6nOkz0x^R$I@h$cHPR&@?1Ruxu)#WXKIJ-BssvxcPl=;xk(pZ4pj$`Sp@->WoQq
z^fg$>D@8V6)Yf_VB7J6C*W^}&xOPlD?COKyL!lj)z{w)f91MTi8BxUYYiE&SRT!_Y
z^C*5?=?OpD6%58#yNXZ`mFu~}D<~h4hb>bT8u3<~bOzJG@F%R2Wiv4Cv_!PyRgAEh
zl{`1Lh&qHw6sN1lHq>|J33FUsnw%|*_dyXmRp{X0`Z(7(%)?MWJnT4f!cJ$|CQzj&
zL+GYasM62}-h71doa~N4Yl>{$pm$W21DUOj_7hl1L5bU3gcPQm*LV0$p319)5V|`<
z0Vf;gTv1S+RKIvlMDTLApA{Aqby=)7=FQvy!1Tf8wy)mGB<35ICq?Y=6C55Dau~f}
zB(K${N_@^^itJF?n_0CoQ^O}qV*e@hqPsYJ@>KyP%4&v*H-E)3&Ocu(=&qFboHpX-
zVMlIW-|5NH6ra4hybDLeKjwy3DJ!yAZLz8+Xd-=eKhAX1i#YP*su#>vKRiE_*fCNM
z$qzFg`l{Uar4)$jNfA5z==Ba9&U(k>M!kzfcBp#oQX`|gIDE#|JMhg_??RQRZfV^9
zdZ+s~>Yd9mF822I1)=ZIDe^l}FPFwMo5v>DnMPv3kz-|8z2;B}!y2uw*H2Z{1(9B8
zWgQI^krA|%Ikc6n_DDStI`#PpLMQ`Ooy$UPJ{-rtpZT1h#qFLemM)jKd%m`Vkjoz;
z<erBK*%|k3&O3;b2bWRu$N(jcQz%*g9bAEVHziL2m)=Xs^j`?M7sx$I$gRMNzY=l@
zu;4L5&H&iuops<5;F!k=xdrI@8zC113;#~YS-_LP3I6~mki#9FHvk9!laLF61-Os%
zG~fwf;$P4K2A(1Wcj1uEe-m;6u;6JzP6wU@PWTTY_W>i%AU;6Xe-R%5SG@#Rcakcl
z<X&K;gOZzpBetRBO5mVvDY+O}`~pfo3GBQbB_{)q0pl;E<ZfVK9wj#bOI`$BVDa{p
zoD1ymVoFX09tSq;K*?P||4S&j4mfB&B^Lk-UrNcDz%xK~M@sGk@-L(0MquRvN-hWX
z--(j*fdxBLat81uuwfTU?gYAbrQ|ALua{GD4zT?~N;U(J0mtt~$t}Q<yHj!nu<t7<
zIS<%=5hW)94+5)SiSz<1_5c^K_p1<hpkp!O13U;Ew<jfc0KI!5?!dvXrsP6k{@#?F
z4m<_a_o3t=Ah$0i*8|J;gAOo%e@adR9s`a&0Pz8iIFOQSf&E_tF5qO;r^kSGuchQB
zVCBJxFR<4kNE6Vp1a%6y2go0ayatwZQgRWn<5J`a@B}c4I(h7J#1-i6LOg)3!;mgu
z;R@t4Kn|y56YwZ-!s}3ez`*MfA7IH!gb6Hq1IikhcLd@FJONbSNXY}hu}31`f$kI~
z*8t19Dfv9GYnqa?fsP*J1@Hhcl0kZamA#Z)0_@&L$wz=^fJQ$h_X8tYqz5?YD3mcU
ze-$O~1)c;p51?%gBHci4h?3iYWy7d9z`PuIfCqtNjz+!$EAz-VVE+R071(YBI>0@^
zz$nTNSh5=af%$8YHsJ5ThGS4Sf!?)<3vk3bJb{&OLU_Rb$HEV=<C_sT;2B{0IHVC+
zS46yk?&FakKvxOv2H1B!@)DRohPnnk3LHBQU0|S$_yWBfDEU6nvyqakffW-716Wo;
z7{HPfzz?iA5oHOioTTJ3VD~A+19%FkS1EZIIJSmz0Y;`Nxg9v_Ey#P|$U6K0i)K*w
zfdviJ17N#Z)JtHyO$ZC<cq{S)cpRAAjJgaQy9MzDx=%v=06O1>as+lhnUb@B?cNSQ
zz%#(9??9OW51)cKoQn7Y%if7R0Op?tJ>U^w{ku@-fFn;w8iBpvjXDm@I|KOyJOqrq
zhmu=?+<Wl^R=yAU0POL8)G^>mVEqS>PGIGkh!e2rgGdMP1Tc9P$`k1S5aJE&eKzVH
zKt7B*4?GBrd<5+t==mth0_Zvi{Rpt=V@MnDG%$HC>L{@O<COdo=syp-z>%Lo+<|>R
ziMRvXeG2s%co;bTe8d^({WQV@_P7A;6L<_b?lXup(0w7|1$2EDWdkg_2=N1U_#E;P
zcoaD1^C&-H#TU@00t+ric>#|C>%NG109}{B4cPTd=*NMlf%>J$XJGx8QFg%VF2k4v
zEc^<}9e5I$xEyr_=>00<0d!u0aDathLw^Q50<69geK64Vb+m6_{#7VL;8Eb%Zy@i0
z6<4Etfd$`$8}JCQ`WpBHmV66g0*kIi9s%2Z8~qOOATWL%!U6934)Xtc<P)&(chLrb
z+zp5uF#bLCW5BWBN8SO`Kfu@kjNFK{11o-r@PR#ULY)Af0#5i5!UR^|jQ9aZ{1{;a
z``?1T7?}SPqz`xon7kF^9We4!ga`E8hW-Iq@iWvXVBzh^FW?!V@pHr*IQ9;N3#|AB
z@)TJ7ON0yT_$!19JONDHiT(!2{Th7;(0Lci7&!1Z$SYvyyOEaPqVE7k?m=DwD}RTy
z1555jpZ|O07clQWO5OoH3XJ~&=>q!iNB;mU`6Kv&y&pio1nl}J^mo9v4<e6&hk$i|
zrsU7t5Q3IXP9|?B?;xj;Q^`BYY2;nxbn<R;2HAnUgv=)|B|DOrkp*NY%m#KLyONia
zg=9CfJ9!0JL|#euAg>~e$)02{@@ld-*@x^)_9Od~1IU5oHRK@jT5>Qsge)P4l1{Re
zEF;TF7decqAcvFJk=K)z<PGEq@<wtbNs(@nCOsrWdPyJYCs}e7Sw#lOAQ>XVBu9=W
zc~T%FWR$EXYsfKVEm=q2M2;nICdZK?Ii8frdNM}FNttXQI3<u&$O+^`GD)ULmDI>I
zc?+qN8PXuLWD|KS*-W;OlgQg*QvDwCUh+Q7dOkqTBp)Pakq?ox$%n~D$VbUJ<YVMq
z@^P{)IgfmTe3E>MoKHSYE+C&F7n0ACi^%85=gAky#pH|R67nT-DR}|;GP#U=g<MX)
zO0FPZBUh5IldH%#$kpVV<QnoVaxM8bxsH5?Tu;7BZXn+y-zPsHH<BNco5+vI&E&`A
z7V;BvEBPt8jr@$<PJT}AAip5LB)=kel3$a%$ZyEq<hSG=@;h=b*^d04+(-UE?k9gF
z50F2R2g#qwL*#$R!{mR-BjhjSQSw*v7$%jElfRL_lYfvW$Un)G<X_||@^A7q`46sU
z$7GLE+CjIW+tL@%?dS{XJo+NKJ$*6Vfxd*!r!S>D(wET%bSJtq-G%N-Urra&-RSQ0
z6?74OCEbI*iY}&m(!J=b>E3i7x-Z?2?oSV(2h!KjgXnAN!SoQigdR#e=~B9kE~j1e
zFuH;sPG3h~Pgl}6&?D#@>5()=yJ?#C&<yRReYBru=~48BWECBtgLH@v(;PjT=4pYB
z&{4XYuA#@!wR9bQ6Fru`nI1=r^mtmL>**LBr)9c<Zln{mLQkM4(n&f+tF%U^>04->
z&d>&(rJLwm>1Mixo<!e9Po{6D@1Up9Q|UYDY4lz6boy?327M2GFMS_<Km7nblYWq%
zML$H(rXQvsp&zB^(2vn`>Bs4L^b_=x^i%YF`e}Lr{S3X3ewJQDKSw`Lzd$dhU!<4N
zFVRcsm+58nEA(>uReA;e8oiQ!onA%1L9eFYq}R}I(QE0q>2>rw^m_VTdIS9){XYEx
zy^;Qq-b8;yZ>B${x6q%^Tj@{fZS-gKcKUOA2mJ;8CH)nhNA9G*rgzcb(7Wkx={@v!
z^j`XVdLR7*y`TP(K0yCOAEbY#57Gaj57YmpkI=u+N9kYbWAy*%<MeOz@AMz^3Hnd^
zB>fkCivF8EP5(omq5nl!Ks!1*w&~cm;{_etb-b`+UdM|%w(odx#||AY>6qW~(vBTF
zUe>XoW2cT6k?qOO9lLbw+VS#^g&n(f?B4NW@`{c{9k1-zqvKT_i#zu0*sJ5!9ea1|
z)3I;IejWRF9MExK$7?zc>UeF(^R4Uo*7bbr+FGq^8*ITnn|n;>x(^47E-_c%FqdRE
z*T6?$3J}_I71>RxfpqRGW)x+tqRM+@6k!v$!aRT1=TPDKyFTTPHM>Xd`MW;FDk{s1
z;JUEjQZ7Y?>&%o)lZxzqzUS}yeEzOa>n6|V@A`cHuFvQ1`mErF>gVtJeEzOa?S64~
zljZYweTqv!+jrL|YiicZOe9op^Atk7G{n5vMD8)KH=#oC-r~QWGx%&p%(CKNc&uQf
z>oYB<=;g<9GVaTaj$krl>AcAj7aNIqI$f*m8cL^o{H>Kj9ofz8R>G?)O)-qBm>HJ5
znNoK(O$m5NmT?)E`htO*eiVmtwWCZGqNxZ~qa8(TeFUkw>EAO&;Wl%xW0-7_V&^>y
z!b#Q*dB~mNPu330(R)>66h`7p7*(&xWaYL+!^L?;g}Cp^bq^pfOm^s*l8gLuEW@}+
z-k4=vil}loan+%!4nv!`s4}PKt}N67RE%Upxvas#X9Y0SXZH}sIfq{zSQ;^PK?WaF
z8H6FVezBlJ{VcBuG(r$(H?psg<tZhGI|^7$4I^ds$3Bci#1?K%-`i(1J&Q~D>_D6s
zc_CgXJa+bqBQ?$3muK@NxB<`hEX=<GV{?A+i6dfjURq}ajPT6s>0&i3Yc5fMQ4s9t
zE352PXWhKZ^o~|&^Z`P{8Ejl@7GB_r<D-n*7mb8L$64@@#}qtVVsF-wyzVZ6wSgwC
zT8knW-HK+^w8#PVfqhk=`a!h$AtupvX|}8S-M4Z%n)sSXRrh~p#trSwAzTqUzTe@l
z)&S3?8RdquFb>?s_>zIk=DKi94?+0?A<8Ut&uHJSo5Ui|!R=n=x3%yqKHlYDvI<w{
zXS+}oC(pSu18&Nv#SRW>?g<OfMwta5ukljjCsdFShnwdzuP_MxL{j!km?3KlM$KSa
z`pI-m5;s3$TLOrmqw_ailusC$6Pj9#C%8C=ap^0zfi`v3RhV*?sqt4?scNF(i`%Pg
zxzV8^SfP*4ErXk>R8MnXIi+ex7fh*Ay)48h`Kr69H6j|gHcE49<~AtRgPez$mp?gF
z#JpAeK4eZU`#TThpEmW8KhUd;X$g^Xe8V9xUy8Wcu)w~{)Nfg`+_jI9EY}3rBiVl0
zB}vS7nVpiY#9Bs-H7>V{=p@{3==%8~X0fiUmBu<^)R@c7x?KZ#D8?J+GdkhNPEe#3
zoY3ObnUS{y>G`D>6N;Y?>qY>cD}MCQnz_Bl^fANk^l{{lG88daM2$;~s2N)5ykfP2
zD&Y$$(1{D@S>1f2r<mQE(qyay%_d?M)M`YWpLC>j_q;?IYgE5|T2XaMsAbjQx$lRu
zbH>3QQ>$g@Gv;kBc9{OFS#*6=4p!UD?1Q-ZJ67}%=PDWHk`tqDD=v;14lmZINR8`H
z^f*LnNQh(HH^PFFvl2zYplZw=OkDjDvpXZPLa2Wi%A<uE5%#=s5ra<9|9VtpFFdzK
zT*Muf)2j3io!HLXp&L`#p(!i6C-zb!F7|vJQLfC;tBCAlJ;So0;cO>TC%AsXNip}W
z6}mV1S{!}GWYnDGH%;i_H+_SI?k(|E5SI5~vW6nXLBsew%{s7_HvkYl`<ogp(r9Tr
zppUY%e{9V6(!_c)-V_xd?EH7kd~^vCBxm8`SQg9Gn6<Nj#haY28Eh5k(kKOyJnOn2
zAxyjO>R1z;>ym>gq-plL#fNWslC9x{hU#!={zif@IN8_A+Xq6i=qANw1BTE#7c0sh
z;mcBRzE#KQ>OHeiV&%qO3r(Ux?(!qjW%>|OUEiz`;<viK5=e}<Y9V$DXV`W0Va5qf
z)<3g#NaIahII^^u9-2Eg-(c~dhaV|pxRb}X%hT_5C(qgTnj$-T-KI2Glu%upIU5{x
z-du}@PbN)#(!3f;7C`bN;?)pD%jN^DaVWg1<v;3O)nwJckw;8yt*jvjnEctD&2Eyq
zbC`wCT<o=dN#0!gSujG=8ewEOm@_yEnE7KavGj8uYpx};*%QlEPfZ>`u6apvs<`UW
zeR8;}!B>Bk-axJgn;nNLHu^N_#A>YBL77f5p@v?$`y9c$ovpoC)B>I~=@7%Lx_Se4
zJG!J=Z_XpJ)P4A`6uZN4Nu6nk&9c~YtBun4A4_d3&vojw;Ug?tSBiKq#}^RI8W_1a
zQ*Ai=!*;pGp6zTzf6k+u4gERKWH$BZI&#_2w_m12MGxf8+|nFfxe+M1+}L}%SpPN4
z$kN8!nx=!_s_6t`$|ZIjBe5$?*pSXlTOk|m3_-ZsStNFc;gUMz$AvC!-$B7@;UOmM
zSYTV^r0a=2>(>ath!Q`OBsdnt8um3;d-T_wF-RL50pdjpayCKcC-6pb3C`i2>ttQS
zN6gGz$t7jStGUU(?{&|1!-kth#*V-><2m;cw}y{rak8n1UDF;N3utx-zSBsX(T*i~
z_Q$@48P!TkRlmM8COTd<jG2EUJ39B2otGMUQjbRsrNIloaU5#o-pu)?p>`oyhA@^J
zAWwc?X%s3y5|Qn2mE>uy@lJYwBL)V{rqu&R4bSS~pHXfJpQ@WHWU~hozJ%0KHBINh
z^u?sLx<27CI(*ag5V^Qwi7ru|v83A-icodex&1eNj;4{%;1NqyX5?9q&ClhsJ#cwU
zl;ScE!39n(pUSb5W<(SxmruiRL*!ml=S><3Q|vcfJkR2INP~dwUhx$(SMba+IFr%g
zFF4=PbPA7PR3`}{57vZk>JgCGu-1-e3<C@utZE**pbUexQYb0QM=lL}PhHi*-{IMb
zY7~+jhUoCe54W@Bo+Im6HII%;w7nW_0Pz-4lqb>I9=wOkfo-ny<3g3_{5S_mbc&qw
z_`xucU{V(~Dv<(o{c_1F-bI~-o4zm$P}aI5S62am%ToYwjV}P^sCPmkmAe4o!CL^#
zo#&mGQ>eriKo|(ua27zQ;wXS;camOM38MWUbDWD@EDK82NjHw<y;98CcTO{Y82LNp
z+`Qj#2@U(11^;w~+ZbBL{E-0FmUS%Bj6aaeWClNj{wetF_5^8g+O=X}&yHMGexio!
zf?z?&l<KBV%#&u2&wkgc8!8)d94wm+aRqKrryAiiEiTkeu0t`ZHtQ3m;XM<FCimip
zl5#lpRA$(ns)~KI(6Y>?g%}y4nAV3f)x<3lN*nc1Ag2i7Z_y~jpQbIGa8S(VrGR}B
zqXms-Vn=`pO)WQrGGwzXm-&OYGsP&;;Sg<nMV_oOk%3u-BC<j^edocrjow2iXK{(Z
zTqP!4P-r6lmT`L+E>sk=c2#WTY#5?p%C33|4GqGTZq-#-F{=`}YOcjGXyhSZt}_>{
zoU6V=Gj&zyDCsaHF|i5ek-{`x^~;UYBY`x`W{dMt4B^`}L=SyLZb1j?bed61MfDS$
zG%6w-xu8aeEk?Lu9;F7tsytqX%AyyEA`7{vNZ@J|2_#mLKo*;)Rs+%u$XVj+xKf8*
zu&P|H0c&muA`4Qk7K%_t@(}va>btWiG87leV)+q^ar#fOs3m01w`n->8FVxxC_kfD
zyFg9pFu9&;{vGIu_lMw@H2Kzl^7(ooGO`Sv4Kl-J;LSV)7Tj{PR~5w=bqf`-<*Fyh
zX~-?qMY|B0!aPYcZPgKx)TBxe2`6E8(LK8XW~{7CxG95titU-;#|+yymOuEGu>9K|
zA=!E#g4UY8DdT5yGa)Ler3jhPD5R#NI%`JPh!RUK4l}Nn*>)w5Fq23_YvC>$B1;RL
z#1COytxCdX8|o+q)k0Bg2vFm{qgDV(WIio?=y@g9ONEOlND&#BOiQ6=EUn6do1efT
z$0^f5T4-y7nDz?iaSvbM96B;UC2yr9GK6Nn+$PY9i)Lk|$Ro37Rmkj_MFVB#RXlej
z18?1db*SpVs;H?fElP|a&?WIbcvBnlz@$zMSoB~HWocXodpITPk#@8P<#rD?RSee|
zX|x=K8Dp*-I#1TBQ3gf5B`*=(I63g5^p#R;CNxM6ba*yLCBP~>E0Zv;hC3Rt!#yY_
zx9`B!_Ukb@4&w(MGaWuNW7Qvzsh2&dONg4Z3RV3_waV=tRe`46l{z%H39%X%2bWc^
z;>?ECaHPR-AYvspz7aOTih#8MO%Ry}2U<bC!Ml-ffs36Zipg)trpUCBkyqC!`wau&
zAQq7V;-QT-hJQPnkyk|2=haS!*s2PXAbtk(2~Lc%3i}<K8m>X_<8-u4d4uFSsz4YE
zhnrH>9IlQ|%b{geG3d;Qe#;qm>1Jmdt5dy-fo_l{CPzrlLBnv9M!AO1z#P{Qb3<HJ
zhA5e;w()J?!C`hAIk7i_Xze8|0cmO*=xHXj(JYMD56vw*RoWQ!>u|!vEf~+ERtR|@
zM0V*WwF+gtF|DG+ZGnQ&G>XXlG;n6gbDPdG2aLqLqH`@~Fv-kir2{Wj_U;kbWkvFE
z2F3yq^Wwl)uuB&Fj$*_ly0I(jBKtnxl51-L-29cPOpoDHpP4c|Nh4MvL^Sy@VG2z>
z)67-)W>QU4_AAn~6-uIe5Js)xPiq64B_d`ETs35k*9!U{5eUmyHnEO^4J5jrh;5`L
zeg`^H$%wziyIt^i_zoci=Fi}zLb$TthFRQ&Z-YY2V#V0SufX&)5@>b^k)p8uWNM*7
zX;AYb32Jb;zKK^yF673g(hoDt47p`ztp*y+uHZG!h*X4y`Xj9iqu;9GXe14-LD2_P
zXD3V8%i)z56fBU?IeHsmxfaz+p%<)x;#a(Zq#$Lcq2XV=e4^|Z_MKWfS@|e4Ef6q?
zZUQQTi{uy|4BOO5!^|1xi^<e=C`mF`N|;yO$TOlr$F3Cs(`7OTjWp?$e(U(avoSWI
z<w<15DU>V|O_`yVoOh#0jk3tZf3d+LDq5{D&4>#VO~H#ffMnvfi|XHMe7PeRgl*Qe
zv{b#a`efAu9)|7`8<^>!tgR!pJS5eSQsq|VF~OK({kQCWG(*ZTvs4pGMW}~h<eVel
zxT$35pBY3(Nr=oNRfT>{L7GH{3SqWJTA@gf8xt3V0i@<z$c&gr%8oQdjyeN3bPU60
z=8RFKBICwTLE0}yI+{Uaqz1D^MJF_33>B;@qE0DAr3oZPDmAg5mcU7uvNs5^+7iV`
z(U=jdv*5&EVw))VTV>{sX1;Qv9#&W%;T9Miid07e(}(`sV0CaZlfp1e{4jhlGs4Ju
zX{YJDLUCPTIEGQt##m)Q(B$g4pgEvJ4;Q<H85tyO>cPzdS>z0ZKvjeVCM+)TNcyb%
z=2S4INP<$#grXUaEyYr<5;QaR3nMoWnE6=}^$vU0_N?7E)oZWABJL`k!_+N|s$b6$
zaCDQq>UGW(HLa{ss=nN*FirK4FvrdTD=deY55&4*9#d3J^6)!&%`lAdn2TkG2Vo&t
zO=hIwkMK1H62<6_sd#VyW(t;}ol6+BQK_vP*M%Nb(>WeL9=0mg!-?9s2v_{p%E)xU
zwwG~Tw$dR8<;XBDibEvRV}LF~HIoZdqRV}Lm}y*`H+g%s>KyG0h{&=&D!8Chmsq5B
z(&0G}>e`A`TNF7(sB4FtR3ZYAd0QB9V_Qn-DMCHGcw@FOK(-VbuHK5^LXjRA69=Yi
zn)kqHEfR(%Y1-Asp;X*1jGkUIQG@qTq*SF)hCB{dAmAK(9k9Beq58$&p(&c65*A*8
zeUw9nE|$M5!mHHPZ&gK%REsAy1jX)169+Xc4AQ*n6k+O?O{#aXqZAXiaEz>(Y>$c^
zlkFH<FojO&!mP}RWMZ}?(h*7ZEMsy#%OG)P8RV{HF|nRyOk&>-8<S;Fj?c2t(UE1k
zTipGuT9(aW?uA^{Z%AbV@7<`%Yh$FC=O6pAu7tPJ7=x};V4nq*u4Toc(Sd>DNdM^2
zs$x%SBt^ClTn-+eoIGUl;>Cw{9ll~oSLc!?OYlSa7yMjy_zEdKe*5t=wjoy>*Hxa6
ztMKOG8dCP};PLYM*^PS_#4m!Ix(IH@(*GSvcI-a5GG1zw_U;T^cE%sK0ryJ*t{THl
zUrTO~dR<bC8^-Vte1RVOc_dkULkFHakr%O_WCt}-`;nb?d?AFlb0>lT_90!TE*Pwg
z)oU}g4UNV7=MPvskR4hj-L=OK_{CzvKyZ^6`)f!1{{sG(N`qL8)uyHq-L6w9{Vy)a
ziu^#Cu=sV5Ak|0W0#+uO2qZjxf=TKuV>(cYbe2aFCf}3&C;zdq2yRJ>{&nE~GXf*e
zXLG+xe05P%+)QohTz;rhTRM1cxfbyf@padh@hB1eFQv9D4V+kQxj6VCpRBg9aNA!C
znkm^=g1IP~mzCwoTUqA{yOrGw24B8;TG?`$MZA@=R;K<tk{lVdvSopswdOm6AM(lc
z+MmD8<n11aLHp|D@}MhQeEjx=u$DT02J6Kb3v1cdT|a82XEw0BCvCRBT)s5uM0GaS
zdZ}dV$>uZZkz!A_kV}oE`!$Z_b*F2YoADqpc1T8cMN98+KAkBRvO|3XnPP9Q*pnF=
z&JXJ3NmIM*FgN!rz`c#+9vJQ`4rEqm26U>#Bhm6sH`iitZEJEBd$R)>oiK6Qy4-xb
zgYN|j-(ae+sx_wN%iT<`2Ge$uiQSLTJ(|UZA;Y<m?C?-Qr%ariT`{?NAUIzb#~EA+
zrDmlTk7mY*<uUbjFEGzjnAsTZPOM7b3}aU?C>cG#0bqPl45KrtiF0{b(5)NHi@?0S
z!dyrf2eRE-R%$<*Nu@7eU#xJgNo7Zh>EXe_)KHH`qWx%w(bxKS2pDrC#omF@LaV);
zul2t~auqW9Y-%98E`#Ld)@sSoel(LdUvAE~_{0?z8j1F!DaM$-bRX1}mrBNw{(L6Y
zQyj^s(3OkpwKSgOqsOwCzGpYE?g&<NJFEKh7%%YBuC}}6Ouig_8DlycFDg8Fg^^Tx
z6-KIcE%x9(#s!RVv?o<uoh@X$F(US4R%g>0qgz!|+*BFHj2U|)H+KRfhWwNu>&@iz
znI3uPi%O&Aa#Nf!z1yq6xidKJ>+3YS#C@7?KDA50H<TJ!i+0$b8tT&$r2S|rb)L~;
zSIO2rfF3O~)R!H~6i3$PwqmV$xnxXZ`ZSp0-Ap^`Y0Ox>7$vw+GK%V19M1P-k+)hl
zYCoFF)0j5T<{P^Oe4~T8BF|XSw;2s1d9Cr3<L)YFdblT39PT~}Gh{7c+K;AE>1m6v
zkc{$%!u0TPuGp6!9?coaR9i|@oSwG0NayTdQ^XfDTB?%wslJ)bD|NnsRK71G`kNG+
zK(@&28~gV#nA5Ftd1g_s(z)0Llxm@AKbk5*Ppw(3bE)@Qw#MmsXWY}^?8_Ag(L%Iz
zC9gNjJ*nJFGV)nlad@bhIVO`HwX2Mp<fd}glgd{IoctZNYgsos^xR0k)l|mRn|;yG
z?X5BLj*d+l+tHhQde?n4R=m@7E-5bUL~2`2>EZ4-$o-JgeKpRbM>C_DVlF?77i262
zw8*<GrXI0(<ozUXwzuC=WfRvP&-k%_z%-ab7uP-7i^)}SP0FY~Nf~`T+yNRRrUEVI
z<8sg3|3Jw#n6j^sRnyeoUf?wjR$h0u;s|+=!r7lm<s6MM`Mkq3BEMGUTT^r;FFBv5
zBo9{j@|nTmkxX$gGuW0?ditD06xPCMcWPh&cT3tYS8Yw{5`}ZLum*Qowv9EW$6;?+
zhbpXVhV!fVjLwl%wbe8;a`|Riow1xn98Gsji~WL+d8wD#SpG;{x<2M*@ytuNZkm^S
znYU(|yS&WnS}f;fF{9f;)at_m=4@^-WxOyXt;xQX<`t4>XmpS*59UVEJ>@Y)FgpCi
zHOV*EJY3;qU13iqml?vwMA|6I#Od_R{9Y$HbJ?LGtiTLrhl<Cr34`4wCT5K(MYgj2
zdY!dhjGo?bWx$9+#DZ@>+dF8>I;CeseS_r8r`8llvV$3{<P92CHhKN=jHpKhe3`=V
zDBIpjr_%jwlgnH>Ow8)*|K2EB3z-47kEb=3V%AtxPt3gBQ+JOHn8hxh7*%r_q<qMD
zg-Bd?J-M6;SVvM=W9tiM*loyVUw_mcvSLwqa5z^eV%3dpUbNWP`c{S0l5?~WEC`EP
zp-!1NZ#}KFM`4tEvxRit-WE>G>FJ9yD(Bjvv|Mg8I{L(no>k0V$;g&W3Pn^{G|@q$
zp(N(>%z671KJ2Uw^7Uc1@~#(T^0w(oV}HPjndiVTc4-PZtV`>JiPPvQ#jNDxLy3Ip
zwVPy8M&G{hQDE%P=JF%yB5NS|(IM8y7e|M>M~8X_GA+Ju`}%@af?3)yU@e@FWLVME
zvp0Epde)={BxirN2Q!jlZ$86c*IH%tjGlu5<8XnED*c(f)vdQRg+r1rJCq&4#woTm
z+A(ta`qyE}$mV*v;o*T|VKkQ;&bQi1Jf)a38ClI;ZJ50z&sEQQ+tHG-i6)Yq*OSt`
z<n?Z7CT8@_g9?nXkdhnGsIjR$GPf_E!j!tudLDcF_Yuj==CkMpWL;`~*5_M?8<m_`
zK0#YS4Ia*~ZO<OOujgA0R%ZLdx5dQlwk`YqzE--%=FDdbnO0k=#}4lp$p{iwa~$1J
z(s3*_L^UjL*sl26fNf;7n=OT-#ts^jpTzCNXX&$!@!`c5#ke|?FIbyziPPrk1>OWM
zY=U9IZB1%zp@?$JV)QbKE^+F7d;Z5t#=y3rU@eEYGktHCY?;Asyu6F|*`hg>OrAc^
z+s|>5QP@wW3fVMrm36YM@1e$wBHXrEvJT=M58c~9)?5-vp2iq9wnuclWXt6<!Lo?x
z-|Puxa$e7zxg>e>*w$p`u;}J6snn-fNo$X9Pij5*<jOmLcW#ZzH(HD_CM>k4(j(c`
z8UEhNS44(KjpVg8KgR>sqX)!`Ydd;UPunO<zBKHMxm_U|2`Kw46P4tt^t}0OkgU>r
zLs&L+x4Pfwv)taOGoqQGC+}-{*U!^3CM09uz%aIKis{i}>vqPz9nFg5>mMA1y=JQQ
zRbS70&IytW_Af;?S~x0cTkFe-l2263xXEJDe#U2!IvFso%CbI<*%e@&rN!RHvo1KL
z^2tGw&5PS$2kUA5)qr(vVTA2tf{m@cwxjiXW;iv;%HFSf`wDGZ3GmI|rty72vAD51
zTinn&QLL14pp{x#v>(mvC;GVF5^_y+ZfFD7a?hw==Un)_g&pBtnw$&{H!tyn8nh%P
zZjW6tv;F-s+n5P?*-7DkVl@`c<gIVFsS)r7pZfQf8aO7tgagA=8tq3@oSrw;S&K6`
z2t%VxoWH)g$0n0go}#t7RM*n>EG$Mmqw-r#R(aOCPN}B4sl4?tZZ;X^H{LBW#?)N)
zz1S9$(aO@q>GSmJCk1TLiFfU)v7TQ2ZD2(zisS1ymZmGkjpash{cL4&yeMvj2)~AV
z7PZII7o8l(8r(P03a=-%Z?}2F+bq<wOrBcbp8h+)iUb9BWoSg|kEU9Pr@o#ddDutz
zOMB!gA1Z;?RGMP*<mIUv8%{D{=QPM$J~YzAy`68we5b<NjubA7sio|#{4|9tIHRJq
zl=;^F-led$H8(veJYDDHM+>%=LSNavTd*m2wP+Pm`_WYG^t6RDEG`^jp(RcI(G;6!
z+<%Y77MyyfvLx?Sd|&&%*TJbDLf=|?eJ$gCf?K;Ur?r&&)*Rn2*|ghuG&=Q1Q)TIy
zXM8}iSy%n2BwE%s#ptmLI8$ZR@2uHsoF7y<<xEOmQq&TsZ$0iT$r;{&)EZOF*u=g@
z|4_)(rhYqyi+!zmcEn{}-lQck@i^#vyZLa)YF#v>k*YtMs;8cJ@sEU@7@w4Tr&NyQ
zeVlJ^>Z2;7dT)qEss3mxjh=Di9F0-lSfvsqZu!1`^kXWUJfln_OI&As<L|jDn>cAn
zBU67gm5-jcnUAYn=G`tTQR4jcjTGmFtjaxQt+9DlHa`)uDVLyWbcxH)*BU+<^5Mj)
z=cF}!N@HwCYw+1Sp0BdCx6XJ<@zW72u0fip*Q%A1j3vM1rRZrH7YIH%FyRPN<<=&x
zF=#)UX^%eZjn6n)ql5BWthJu;@j{K+GrdWk%O2LxYOJ0KPI6YyOzxt9mERj!s)}>T
z*=>$mVQN2`D%qG3i><GIPUFNWWwQ<3S2<l;Uzud*%WDOz{%Do=^D3`>5u!?^MWPvA
z-#YdeR9^q&Cvn;OX0I2kyqExKiPC;Fl|s+k<QEO5=p=hBQxm7oH=n%3;8d=k)JWAI
zO{LS*M!#fm#tvzTQ|jA~xYS@ZZq8H_n7CBCVrI{;K|B4j!5dx_6y8&yQ72Bb&j#u;
zgInA%*&<&|TG`oWUorS_%sRW3LCeo)@um>d3$nHP%MC_;VX2lt^+!`R*E5EH)nL<Z
zLsf~i6gS1{S(&)PWKCzgXJ>GwU`vd?S;N;1M(?C7agFVY=>uPj^1RYyZeTrTDmZpo
z+!CuLJ2AWOJ>u&IJHLZgqf>u0Rjx5<Wv8%QrLn0O+^Q_e+l#MczoBv>Tg~fpTV#%D
zscgP-wVOFO09PYce>9cLo;Lf<INq6-$MTpJc4ljLO&lw~R#;76d&~D*ZeF}NI$rz}
zw;kUq!?h|iyMw9KnUZG)@okmM_fniVg}#yeI)nAO9Lc|9aBuCA{Cb10JtMiVXZo(e
z*xoV6x4v?N#vHd8ptjkj`XtZV!}nCiky@iPiTu@v^TajRXW96D$!A^wuBIt@KjgEG
z`$51M-X-3KnVDyfbfd+GYYy1m<ysQeA5E2^rw{sJ$cVDzH(*5<qpLj0^VQeO-ej=i
zG9ryk{n1nkJ+t{ASzO{OA(bO}>U{5LH;0VQ3tlv0^+z))_2uf11uwrMqaD@-KD(`3
zEUxGV7Bx-DbJn-x@Dqi#O?zXWnsckdmYFO~&y>eUDpReluKFySf2uHw3szdp%a|D@
zJJa_zg-cwZ-C`T`Scw13;^OzUXo*vQG}97%BgO3spK{}jp1{Oolka=upG(Hg4J@t=
zRH`Sk8+=pM@qU=A)^I12PMtWdp1i#yWY(?>Qz<m^X7aWxrrs<;z4?W~$8Rqa_mrs@
znYG04vo!i;$S<y4ZCAhJ+uQk7#Hrl$CNF1eDW$$Tb!W&bE<1{Dx=N1@mS#@WidFm3
zOd0#~_}2zA>a>e8c?*i!eK;8P>aLJe*-`?rxGY*;LETbbFN>MW?gIAT2zGuCuSSwM
zjlNa*yCW8H*^ox1{%EEIeN4YqnD|}BYT$|Mitps@dm<)&C$UD9IQM+D@^=xF&r~cq
zr)LguujGU!JnCZG_A)&00Ka!L&S-_E{b;7H_(se7B1Wu2v@MOEHN!t>jP2Vw@$J0b
zuW`2bz1%lv{iERR8$kULMlwcGCa<qC{TTaR`~k^^4tt_ht(Mu9*kf94c@{JBnD<9!
ztM;deZ))6rIZm85U!V41$mKFNQv02zT7)MT|7<Whrf`W<=G)<V$lz<oDyMJ1^nVP#
z=+1V%swB-*Upsg><jmCTwR#)siZ4h1S7&oAqa-dtU+NwS`36ce4W2?hD-*N%`m(=-
zZ2CYb?lM+OEqQDAtua0t@`@R$Xos!t;rYg<zebGwdhy_v#}?BVv%|yoVjnXY3zk8Z
z%H2#)<D29EU&tn{d{UFRe0Qo`Z47VN(DLqsZ;$eElUW#0YxkC_DNSCJx?*;g4n;2i
z&Ced*jM`!hV&+-wzOlc%V<3yc9yZmOG*jxn{`VgyH|}gL*UQy0V{s`lXH1T>y}TzR
zC)=S#D@sq`J2}4hGsLcrRhg2Pv~Q*GpE_?(dAeL3M^Y>0mghk+rOVDReNt!5Oieeo
z*a=lr-AtMKR(=0vFm6H{7VB!Q^ZM2%o^tTA3s>8~>Ul5vx6aDm{A|MHW1i=o;c1;w
zEWWgZGiG+nzK-~h&WSCO)Z}DstR1W|D<SM`@MkPmwk%<jCat}`ckBOJoY)MqDU+9|
zCwB?uUzYMt2tAV6fMT478rwmMS$*RkjbrVt*QN@5lF?#s=-Ykoh+`kAwT0VP#@krj
zm|(P_jD6qvZL4tVLk?btdCBXUowQE-c2i!Ua!=N(t&VcOy|?XDF8=XdYw7Zp;|mq8
z9(JX5PpttXD07n`ajWsY-^|lEXBzdIVVs?q%{S|Rk<HeQdKxpjFpItI6-K^+8V%j3
zs9d5KXyWwx`lJ`fvtvcBE$lI4IkWBE!DJ7n5A901hx;XQ-0ezt%#4`5gUnaCH<!os
z{s|GtmFlIbmS?Fksb`kPFEwJ&o;_{fd)JNzqfyt}mFqF1{vk;7%Um%CYFxYcWAdEU
z_5}uiFm-KLdHdS=P6l^0iG42EcQ)9g30K?LJ>|a3-0UsS`wolg;SWQ(?>aYs+h%}=
z#neA`+5OAsjz^p3g1(x#(2m87m<+b7F8J)~b~D&Vr^ibTV|tc!Cg`(>-(BHjgWC8;
zbMc{_Y_HJS*v!QkL=v-kRyr2xZ24M^&*@ubj2SoCO42J8#-R5^uC`~i^ziQC;B8xN
zjcKuLlzx@Mo2||?O4UZis;<fBgE2Ne>{k25ZpIi>m!!3+D`xG1Sz+v{Fh`?Px;DKf
z+!t=KX8GP0_j0nMZiVFfBs_7-WA^+GK+0bo$K95C?MwCEPG;2X=OESl#BsMJ)xJ08
zeHCV5xz<|Q`o^36EUwn=4}GgV`zw6vqG7vwN#Bmr0XlcEbQnY3&K7i_&MX%YV~CUY
zm!7u!8YgqR#s*(WALM6leMTFT>ZRzhUhC&>+iceNK6$V|4(*!B#^eI)w-3={Fe7HM
z&qb|U;$&`Dt@G`v9jY+1>1A)FI-aej+pxyz`?jgmU_~CcaW~J`%9h46yC%Dd`xf6`
z?J}ENoZHaim&?BO&*e5JvnSUF$hM}k%V0z;Zci#b^ZCORPWb`t=uElpI>;??T75gU
zD>UAz+NN@RHBLG{`gUp$*Lc|{v~kHz%IceEy-r~rot~_fTCBIe9^&;1S7mIbZy-3m
z+GW1o)-qig$DExiZ8REA(%kj+#Bb1fVXPh)&={{t$x|ECa@flC5jtx?i4Iv<%ggW1
zTI??li?NdGL=4`j$DlB~-ikyUEZluIa7XGf7_CmVG4Eqm1K9d;%H*w=H&$kF4pxu>
zE#|tf2k92v{bOT;ZT=3$XLp;Htg!Lm<_~%TleQ+`w*);AQyad-@U`1a#KfmD?8vM3
zv>M+oYOluH4l7^Z{&!!*CO(zY3odbw<V#t9#1-`UEwcHnd$SQ+bl!Zcd@*|$%ns-%
z!IzzyuGJg9L+=vjsc)XL%EcOU=w0Gg81qKOzJ(f)yw!^F*<#|<`D}>?CCl^}-dkqM
zSSIN&RmUe=9jiSn<wGGOnmph5Yzt$|I__fR<gm)vt`x@Xaj~;_av|R^tSn=bn7kZg
z#zVF`eze9JlgQ+ZF>?=Qp`90ua(TKmRfhGf-uM%@hM4bN*>}<f!6ud{aj;xtE1rfp
zqH?8cwdryl7u;+ri=?)e#=~M-JX=W`_43Ak)|<FgeXHQBy>V!Lz1BCLtWmkymR2vm
z_tUdGaaw&AT*tWh>?9`U^o`JKRZdrhO3db4!B}UrwIh8o{m{!%hu<U^+cVRN>0@^T
z&#@|xd2mVcJoNN)Z`PUG@IK{RCpb=J+sa%;jjQ$VlYQ%V$E#dyO`M(O=r{r<@t6@a
zUu0iJmQ+sGpS0SScxuUdl}p~9rPC!%neV-6Ok=A|<E4ALJnpD2iCcoNZjP(0xk_Vf
zqQw-(jD_qA+Op(=t^fFVy*xA1jyF-ytaO9PhjoN<eN&6`VBh=vMul-xWu{f8m~|?4
z2hl{t#J-!@$}ORyv$dg~_;T@tkgXkOE%|moP7Ikco7oE}Hq|CdurXghS$4g^ByV}1
z5}XXV@y;X;1lOBhVm9BrXi8%f&95D&rTXTbRgIHXQ+}r;zD=mkPL}E|pF-}l1+2Nb
zqtCI~c-5XV_Ka53YAo7ti<Zys@GT*iSWMxY)a|X!o;hMYWXC#rtMxdBhrJWes62dd
zbWAvsw;azbpb;{%=}kL&B;Whetj5+Jt99S3W0S@wdcpRr)^^3T3udYQRtIal`(EEW
z)MkypZT)YI^&k84aErz0KHfd)jM8U8a+1cmwR#0#?RuLd2JNX`zS2EeW5s@Zdq!7J
z>Au}!Y)|R>EU(|8agN}q?va|_@g}WHzC1l8Vyokedb}rW?SA*vh_O*e6SarW#OaIa
zk=Q=OJ3}@;D`H)Oy9kqyEio&g`-1hfcvfSRJ84>dyI1duSj&^;lKGmFIE6l&$<q}s
zW|zUYIr%iuHTg=MO3(cI-63<kdH~=4>KP#$-!9i1V&WWK7Bd54t8woMnP3LmDvxJ1
z?!79Hc}Z&G6#06x_vuWIjwNZm@a>Mh-@#@tpth6o13Dv{m^+qr5;MlsefAy6nK6ux
ztW8{!zW3=5`grY3PR#AI;5aLW+onyN)1H0E59zGV>wOdRdB&-;oqTO6*O-+%W(oY^
zcvd@mlb5PzX7v##vrU>bkA3?&AJsW+vcxI$jU4BwY+H5L<6}CLqtYbJMPEOAu7l0a
z%A}0Ga{IW>Xp<!`x0tyb+r2(dWot)XdU}~pSX_4E66a)0FT-|OKB@6FYW=OwGdx_M
zQn`8>wN_84@qMjyzRA^wa`Vl!KW*@}nvcG;U0`yxA#Fas&!~Lun$vhjf(xB|t+xZ;
zTkmI0&i0%n6w|A)H^PhDjBO}O-&@+}JgjZV;h5IX-n&2VX0}O__uVn9tbhB0%G%$k
zrM6=KcCo>=HT$<Onp|zDH@-KwOH@AoU90(`m~>R~?O}b%!RAO{TWiXt8mDz(Z(=TA
zkNRbk%aORm`RUo&xJ>11&znokI|SSP|BBAmo}Sb*UR>_tYeSCu?25k{%V~FLiAyqO
zt%g~rUg6@kDU;XMm~|+&6ZbWhlbxR+4ldq`J?WJyV;fTETV?xt$TeeLRG2syeQ$qP
zsXS)qnV8AfSARoiYC}u#z5QM7V6%%XX&dmp{e4qsw8;|Zrmx(tQQ5ZYTl=?krmb9V
z*E-njtZZkweOqU=$&!~_%>0_YwO^;QwIeS*Z|&c)xa`Cw&dHb&Z+E2adX=x|&@R0z
zPRiu7QuwaH<jBb6rRGW54I1Cl&aIcS?-@*ONm<Ou&1{~(uko$W6P7p+ef|FrbS6i_
zlGYI4%Eyf=n_cabr!3}ej=e+vP;zNDvo1rO<g770342q&$;Ik4*h!kNzHf7X<YL8c
z_13<h;%3RqKaK?-`;4Pesa5p~n>eMO-t)%>W8Y+LeQ9#*@!ewZ;ktsBzkc$a2lf+{
zPkaN_?(dRf+RWjoy|*^O-oCGsd~fwXwPVtrZ)iMi_qI4re1Y0p4fFMhKhs#P<06vx
zPM){z+fA;XQlq3NEjgd(ZSdzVzRczp8CS$u_aBKGe22<7JT^8vU8;_487ys1p<~)q
z?uNPXi8Hx!o!8>&dTk6J9MoF;!tn5z9LOR*zj#*hi8**MaoPB6p??_{vs|r#t28R5
zNuF8ubv%h<u_C5FKLYvltL9@7lPO6@7vJ0BooZ}GtM=h7iPL^qOn>t_r2W@|?K!HY
zD`NWnG-7gB!kEMqMdIA?Sp)yZj#Jxfrf=Q#Zo!``mrqPJVlEj=TK9b4&HYyL)o`&u
znVB}mm>MSLj45|EN4_VP(=aqlT(Z7*j^9bvdTm2xvRp4TO7&L9*BGlO_D$5iCR?rL
zFA-wcn2pu%1zT_?$4I?2R<5T@lauR9W5xx-$@4U3+++7I-6wf*^);?cN9|c%uFteQ
z6Zd_O^#{QSGs>-fJLLTaQ#;Oa@zu{ihJ3TrsrtrQb~~ItWl7oweKY<CBpa@wD@_Jx
zorI^3%(UT++_xh8C&}Kx_igM0Om|gHnqHqp-h(lW_HsaCR!>j&XN|SdD2+|9gJSJZ
zp_8}Dm~|Ys1M`r|+KqFos_D{nW43O50-BuD(*yla9H*VZiFti<r-xPEt-j9kzcyRU
zq$FuA@}0N$h?^Cc0oCje7Zc~MuTT1m%G{pyq?orBwu|?u%9g33S7=ME^|a5wx*6M0
zXMJBHJ*Ki^(3I)4KPpLFmcCl_fAOqsNUv`v`f(5QrWV)Fec$~4&C6>iI%%8r&Ex-W
zaBuC|%0CRgw#-&MW6TpSK08Yj*I?hQ;hzSly8}#IiZSoXY+e1y7)CpxiCI0f>VK)M
z*%{G2RH_?Wn+^JAM^D8tww_Mkc_05)S&yDA*Nsnz66a~miqB${<kJ>c>uK}V)&Hn`
zg)P;wiF&Qt@*UH@GJGb6vGsIz#k7zm$k+d>tm1+nIh?iev}I4Zk`8{msId28_lr5M
z(Mw#9Vs?Mox6{<g*LoiN>QaZvxw<qtTW)#h+0z4U<7Dhw)?yFj8Bey=7%{CaI}@3F
zJn?Y8z|CnUGdZWHzHTQtv7sjpKzF^eB(ATXIrR$-KIbGpaSDC&%y|YQCb+?w$slj_
zyglEkBQJ_)Zr4roF&3=*qi5LOW^dmu^PUoZamb4;Vzz)Q4%E?`YT^?1y}#_>X2fCD
zEw3<mb`f7<vexS*W5UqZRL(c}x@+}uEUMNf&5gU{te)}Zr3PznWfJT1cGf2DBRn~~
zqscj0+Su|+yr&O*naNkHPnB9+>-5>FEHL=kzHGHJQ^WPfEq}G{`;uiRlb0I>wW~yZ
zeeljsW^R|$T`9F%w!Sp)63?8TEKM1kAW3VnZ<S$JC$sz5!lb;u@3LO5^0MO{+SC7f
z<}C}|jICR2dsfPJb934qQ}W)>(=+XEvTo%*{}l$`bJOQ9GFaQyL-?#_Uukk~<vxE8
zlW!~c`L8ngo~u58vB~?K_4#``nV*Y3f3JAvt=#9o+R5CWKHs<MvA5)%!2acUX7kuY
zsk*T&)*$5ShutwI9&=*eHQ3qs`w0Hg>UvxWC*Lyk34dZX&l|?Rk}bTo%k|Qlyx;e%
qKkR2R%8SFcUK;ne8QBq0I$`28`gSJ{5R81yYBT2e*oXJ-zyCjVm_sE1

diff --git a/hip_runtime-sys/src/hip_runtime_api.rs b/hip_runtime-sys/src/hip_runtime_api.rs
index dde6f45..f6ba671 100644
--- a/hip_runtime-sys/src/hip_runtime_api.rs
+++ b/hip_runtime-sys/src/hip_runtime_api.rs
@@ -1,4 +1,4 @@
-/* automatically generated by rust-bindgen 0.69.1 */
+/* automatically generated by rust-bindgen 0.69.4 */
 
 #[repr(C)]
 #[derive(Copy, Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
@@ -87,7 +87,7 @@ pub const hipTextureTypeCubemap: u32 = 12;
 pub const hipTextureType1DLayered: u32 = 241;
 pub const hipTextureType2DLayered: u32 = 242;
 pub const hipTextureTypeCubemapLayered: u32 = 252;
-pub const hipIpcMemLazyEnablePeerAccess: u32 = 0;
+pub const hipIpcMemLazyEnablePeerAccess: u32 = 1;
 pub const hipStreamDefault: u32 = 0;
 pub const hipStreamNonBlocking: u32 = 1;
 pub const hipEventDefault: u32 = 0;
@@ -444,9 +444,15 @@ pub type hipUUID = hipUUID_t;
 #[doc = " hipDeviceProp\n"]
 #[repr(C)]
 #[derive(Copy, Clone)]
-pub struct hipDeviceProp_t {
+pub struct hipDeviceProp_tR0600 {
     #[doc = "< Device name."]
     pub name: [::std::os::raw::c_char; 256usize],
+    #[doc = "< UUID of a device"]
+    pub uuid: hipUUID,
+    #[doc = "< 8-byte unique identifier. Only valid on windows"]
+    pub luid: [::std::os::raw::c_char; 8usize],
+    #[doc = "< LUID node mask"]
+    pub luidDeviceNodeMask: ::std::os::raw::c_uint,
     #[doc = "< Size of global memory region (in bytes)."]
     pub totalGlobalMem: usize,
     #[doc = "< Size of shared memory region (in bytes)."]
@@ -455,6 +461,8 @@ pub struct hipDeviceProp_t {
     pub regsPerBlock: ::std::os::raw::c_int,
     #[doc = "< Warp size."]
     pub warpSize: ::std::os::raw::c_int,
+    #[doc = "< Maximum pitch in bytes allowed by memory copies\n< pitched memory"]
+    pub memPitch: usize,
     #[doc = "< Max work items per work group or workgroup max size."]
     pub maxThreadsPerBlock: ::std::os::raw::c_int,
     #[doc = "< Max number of threads in each dimension (XYZ) of a block."]
@@ -463,140 +471,234 @@ pub struct hipDeviceProp_t {
     pub maxGridSize: [::std::os::raw::c_int; 3usize],
     #[doc = "< Max clock frequency of the multiProcessors in khz."]
     pub clockRate: ::std::os::raw::c_int,
-    #[doc = "< Max global memory clock frequency in khz."]
-    pub memoryClockRate: ::std::os::raw::c_int,
-    #[doc = "< Global memory bus width in bits."]
-    pub memoryBusWidth: ::std::os::raw::c_int,
     #[doc = "< Size of shared memory region (in bytes)."]
     pub totalConstMem: usize,
     #[doc = "< Major compute capability.  On HCC, this is an approximation and features may\n< differ from CUDA CC.  See the arch feature flags for portable ways to query\n< feature caps."]
     pub major: ::std::os::raw::c_int,
     #[doc = "< Minor compute capability.  On HCC, this is an approximation and features may\n< differ from CUDA CC.  See the arch feature flags for portable ways to query\n< feature caps."]
     pub minor: ::std::os::raw::c_int,
+    #[doc = "< Alignment requirement for textures"]
+    pub textureAlignment: usize,
+    #[doc = "< Pitch alignment requirement for texture references bound to"]
+    pub texturePitchAlignment: usize,
+    #[doc = "< Deprecated. Use asyncEngineCount instead"]
+    pub deviceOverlap: ::std::os::raw::c_int,
     #[doc = "< Number of multi-processors (compute units)."]
     pub multiProcessorCount: ::std::os::raw::c_int,
-    #[doc = "< L2 cache size."]
-    pub l2CacheSize: ::std::os::raw::c_int,
-    #[doc = "< Maximum resident threads per multi-processor."]
-    pub maxThreadsPerMultiProcessor: ::std::os::raw::c_int,
+    #[doc = "< Run time limit for kernels executed on the device"]
+    pub kernelExecTimeoutEnabled: ::std::os::raw::c_int,
+    #[doc = "< APU vs dGPU"]
+    pub integrated: ::std::os::raw::c_int,
+    #[doc = "< Check whether HIP can map host memory"]
+    pub canMapHostMemory: ::std::os::raw::c_int,
     #[doc = "< Compute mode."]
     pub computeMode: ::std::os::raw::c_int,
-    #[doc = "< Frequency in khz of the timer used by the device-side \"clock*\"\n< instructions.  New for HIP."]
-    pub clockInstructionRate: ::std::os::raw::c_int,
-    #[doc = "< Architectural feature flags.  New for HIP."]
-    pub arch: hipDeviceArch_t,
+    #[doc = "< Maximum number of elements in 1D images"]
+    pub maxTexture1D: ::std::os::raw::c_int,
+    #[doc = "< Maximum 1D mipmap texture size"]
+    pub maxTexture1DMipmap: ::std::os::raw::c_int,
+    #[doc = "< Maximum size for 1D textures bound to linear memory"]
+    pub maxTexture1DLinear: ::std::os::raw::c_int,
+    #[doc = "< Maximum dimensions (width, height) of 2D images, in image elements"]
+    pub maxTexture2D: [::std::os::raw::c_int; 2usize],
+    #[doc = "< Maximum number of elements in 2D array mipmap of images"]
+    pub maxTexture2DMipmap: [::std::os::raw::c_int; 2usize],
+    #[doc = "< Maximum 2D tex dimensions if tex are bound to pitched memory"]
+    pub maxTexture2DLinear: [::std::os::raw::c_int; 3usize],
+    #[doc = "< Maximum 2D tex dimensions if gather has to be performed"]
+    pub maxTexture2DGather: [::std::os::raw::c_int; 2usize],
+    #[doc = "< Maximum dimensions (width, height, depth) of 3D images, in image\n< elements"]
+    pub maxTexture3D: [::std::os::raw::c_int; 3usize],
+    #[doc = "< Maximum alternate 3D texture dims"]
+    pub maxTexture3DAlt: [::std::os::raw::c_int; 3usize],
+    #[doc = "< Maximum cubemap texture dims"]
+    pub maxTextureCubemap: ::std::os::raw::c_int,
+    #[doc = "< Maximum number of elements in 1D array images"]
+    pub maxTexture1DLayered: [::std::os::raw::c_int; 2usize],
+    #[doc = "< Maximum number of elements in 2D array images"]
+    pub maxTexture2DLayered: [::std::os::raw::c_int; 3usize],
+    #[doc = "< Maximum cubemaps layered texture dims"]
+    pub maxTextureCubemapLayered: [::std::os::raw::c_int; 2usize],
+    #[doc = "< Maximum 1D surface size"]
+    pub maxSurface1D: ::std::os::raw::c_int,
+    #[doc = "< Maximum 2D surface size"]
+    pub maxSurface2D: [::std::os::raw::c_int; 2usize],
+    #[doc = "< Maximum 3D surface size"]
+    pub maxSurface3D: [::std::os::raw::c_int; 3usize],
+    #[doc = "< Maximum 1D layered surface size"]
+    pub maxSurface1DLayered: [::std::os::raw::c_int; 2usize],
+    #[doc = "< Maximum 2D layared surface size"]
+    pub maxSurface2DLayered: [::std::os::raw::c_int; 3usize],
+    #[doc = "< Maximum cubemap surface size"]
+    pub maxSurfaceCubemap: ::std::os::raw::c_int,
+    #[doc = "< Maximum cubemap layered surface size"]
+    pub maxSurfaceCubemapLayered: [::std::os::raw::c_int; 2usize],
+    #[doc = "< Alignment requirement for surface"]
+    pub surfaceAlignment: usize,
     #[doc = "< Device can possibly execute multiple kernels concurrently."]
     pub concurrentKernels: ::std::os::raw::c_int,
-    #[doc = "< PCI Domain ID"]
-    pub pciDomainID: ::std::os::raw::c_int,
+    #[doc = "< Device has ECC support enabled"]
+    pub ECCEnabled: ::std::os::raw::c_int,
     #[doc = "< PCI Bus ID."]
     pub pciBusID: ::std::os::raw::c_int,
     #[doc = "< PCI Device ID."]
     pub pciDeviceID: ::std::os::raw::c_int,
-    #[doc = "< Maximum Shared Memory Per Multiprocessor."]
-    pub maxSharedMemoryPerMultiProcessor: usize,
+    #[doc = "< PCI Domain ID"]
+    pub pciDomainID: ::std::os::raw::c_int,
+    #[doc = "< 1:If device is Tesla device using TCC driver, else 0"]
+    pub tccDriver: ::std::os::raw::c_int,
+    #[doc = "< Number of async engines"]
+    pub asyncEngineCount: ::std::os::raw::c_int,
+    #[doc = "< Does device and host share unified address space"]
+    pub unifiedAddressing: ::std::os::raw::c_int,
+    #[doc = "< Max global memory clock frequency in khz."]
+    pub memoryClockRate: ::std::os::raw::c_int,
+    #[doc = "< Global memory bus width in bits."]
+    pub memoryBusWidth: ::std::os::raw::c_int,
+    #[doc = "< L2 cache size."]
+    pub l2CacheSize: ::std::os::raw::c_int,
+    #[doc = "< Device's max L2 persisting lines in bytes"]
+    pub persistingL2CacheMaxSize: ::std::os::raw::c_int,
+    #[doc = "< Maximum resident threads per multi-processor."]
+    pub maxThreadsPerMultiProcessor: ::std::os::raw::c_int,
+    #[doc = "< Device supports stream priority"]
+    pub streamPrioritiesSupported: ::std::os::raw::c_int,
+    #[doc = "< Indicates globals are cached in L1"]
+    pub globalL1CacheSupported: ::std::os::raw::c_int,
+    #[doc = "< Locals are cahced in L1"]
+    pub localL1CacheSupported: ::std::os::raw::c_int,
+    #[doc = "< Amount of shared memory available per multiprocessor."]
+    pub sharedMemPerMultiprocessor: usize,
+    #[doc = "< registers available per multiprocessor"]
+    pub regsPerMultiprocessor: ::std::os::raw::c_int,
+    #[doc = "< Device supports allocating managed memory on this system"]
+    pub managedMemory: ::std::os::raw::c_int,
     #[doc = "< 1 if device is on a multi-GPU board, 0 if not."]
     pub isMultiGpuBoard: ::std::os::raw::c_int,
-    #[doc = "< Check whether HIP can map host memory"]
-    pub canMapHostMemory: ::std::os::raw::c_int,
-    #[doc = "< DEPRECATED: use gcnArchName instead"]
-    pub gcnArch: ::std::os::raw::c_int,
-    #[doc = "< AMD GCN Arch Name."]
-    pub gcnArchName: [::std::os::raw::c_char; 256usize],
-    #[doc = "< APU vs dGPU"]
-    pub integrated: ::std::os::raw::c_int,
+    #[doc = "< Unique identifier for a group of devices on same multiboard GPU"]
+    pub multiGpuBoardGroupID: ::std::os::raw::c_int,
+    #[doc = "< Link between host and device supports native atomics"]
+    pub hostNativeAtomicSupported: ::std::os::raw::c_int,
+    #[doc = "< Deprecated. CUDA only."]
+    pub singleToDoublePrecisionPerfRatio: ::std::os::raw::c_int,
+    #[doc = "< Device supports coherently accessing pageable memory\n< without calling hipHostRegister on it"]
+    pub pageableMemoryAccess: ::std::os::raw::c_int,
+    #[doc = "< Device can coherently access managed memory concurrently with\n< the CPU"]
+    pub concurrentManagedAccess: ::std::os::raw::c_int,
+    #[doc = "< Is compute preemption supported on the device"]
+    pub computePreemptionSupported: ::std::os::raw::c_int,
+    #[doc = "< Device can access host registered memory with same\n< address as the host"]
+    pub canUseHostPointerForRegisteredMem: ::std::os::raw::c_int,
     #[doc = "< HIP device supports cooperative launch"]
     pub cooperativeLaunch: ::std::os::raw::c_int,
-    #[doc = "< HIP device supports cooperative launch on multiple devices"]
+    #[doc = "< HIP device supports cooperative launch on multiple\n< devices"]
     pub cooperativeMultiDeviceLaunch: ::std::os::raw::c_int,
-    #[doc = "< Maximum size for 1D textures bound to linear memory"]
-    pub maxTexture1DLinear: ::std::os::raw::c_int,
-    #[doc = "< Maximum number of elements in 1D images"]
-    pub maxTexture1D: ::std::os::raw::c_int,
-    #[doc = "< Maximum dimensions (width, height) of 2D images, in image elements"]
-    pub maxTexture2D: [::std::os::raw::c_int; 2usize],
-    #[doc = "< Maximum dimensions (width, height, depth) of 3D images, in image elements"]
-    pub maxTexture3D: [::std::os::raw::c_int; 3usize],
+    #[doc = "< Per device m ax shared mem per block usable by special opt in"]
+    pub sharedMemPerBlockOptin: usize,
+    #[doc = "< Device accesses pageable memory via the host's\n< page tables"]
+    pub pageableMemoryAccessUsesHostPageTables: ::std::os::raw::c_int,
+    #[doc = "< Host can directly access managed memory on the device\n< without migration"]
+    pub directManagedMemAccessFromHost: ::std::os::raw::c_int,
+    #[doc = "< Max number of blocks on CU"]
+    pub maxBlocksPerMultiProcessor: ::std::os::raw::c_int,
+    #[doc = "< Max value of access policy window"]
+    pub accessPolicyMaxWindowSize: ::std::os::raw::c_int,
+    #[doc = "< Shared memory reserved by driver per block"]
+    pub reservedSharedMemPerBlock: usize,
+    #[doc = "< Device supports hipHostRegister"]
+    pub hostRegisterSupported: ::std::os::raw::c_int,
+    #[doc = "< Indicates if device supports sparse hip arrays"]
+    pub sparseHipArraySupported: ::std::os::raw::c_int,
+    #[doc = "< Device supports using the hipHostRegisterReadOnly flag\n< with hipHostRegistger"]
+    pub hostRegisterReadOnlySupported: ::std::os::raw::c_int,
+    #[doc = "< Indicates external timeline semaphore support"]
+    pub timelineSemaphoreInteropSupported: ::std::os::raw::c_int,
+    #[doc = "< Indicates if device supports hipMallocAsync and hipMemPool APIs"]
+    pub memoryPoolsSupported: ::std::os::raw::c_int,
+    #[doc = "< Indicates device support of RDMA APIs"]
+    pub gpuDirectRDMASupported: ::std::os::raw::c_int,
+    #[doc = "< Bitmask to be interpreted according to\n< hipFlushGPUDirectRDMAWritesOptions"]
+    pub gpuDirectRDMAFlushWritesOptions: ::std::os::raw::c_uint,
+    #[doc = "< value of hipGPUDirectRDMAWritesOrdering"]
+    pub gpuDirectRDMAWritesOrdering: ::std::os::raw::c_int,
+    #[doc = "< Bitmask of handle types support with mempool based IPC"]
+    pub memoryPoolSupportedHandleTypes: ::std::os::raw::c_uint,
+    #[doc = "< Device supports deferred mapping HIP arrays and HIP\n< mipmapped arrays"]
+    pub deferredMappingHipArraySupported: ::std::os::raw::c_int,
+    #[doc = "< Device supports IPC events"]
+    pub ipcEventSupported: ::std::os::raw::c_int,
+    #[doc = "< Device supports cluster launch"]
+    pub clusterLaunch: ::std::os::raw::c_int,
+    #[doc = "< Indicates device supports unified function pointers"]
+    pub unifiedFunctionPointers: ::std::os::raw::c_int,
+    #[doc = "< CUDA Reserved."]
+    pub reserved: [::std::os::raw::c_int; 63usize],
+    #[doc = "< Reserved for adding new entries for HIP/CUDA."]
+    pub hipReserved: [::std::os::raw::c_int; 32usize],
+    #[doc = "< AMD GCN Arch Name. HIP Only."]
+    pub gcnArchName: [::std::os::raw::c_char; 256usize],
+    #[doc = "< Maximum Shared Memory Per CU. HIP Only."]
+    pub maxSharedMemoryPerMultiProcessor: usize,
+    #[doc = "< Frequency in khz of the timer used by the device-side \"clock*\"\n< instructions.  New for HIP."]
+    pub clockInstructionRate: ::std::os::raw::c_int,
+    #[doc = "< Architectural feature flags.  New for HIP."]
+    pub arch: hipDeviceArch_t,
     #[doc = "< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register"]
     pub hdpMemFlushCntl: *mut ::std::os::raw::c_uint,
     #[doc = "< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register"]
     pub hdpRegFlushCntl: *mut ::std::os::raw::c_uint,
-    #[doc = "<Maximum pitch in bytes allowed by memory copies"]
-    pub memPitch: usize,
-    #[doc = "<Alignment requirement for textures"]
-    pub textureAlignment: usize,
-    #[doc = "<Pitch alignment requirement for texture references bound to pitched memory"]
-    pub texturePitchAlignment: usize,
-    #[doc = "<Run time limit for kernels executed on the device"]
-    pub kernelExecTimeoutEnabled: ::std::os::raw::c_int,
-    #[doc = "<Device has ECC support enabled"]
-    pub ECCEnabled: ::std::os::raw::c_int,
-    #[doc = "< 1:If device is Tesla device using TCC driver, else 0"]
-    pub tccDriver: ::std::os::raw::c_int,
-    #[doc = "< HIP device supports cooperative launch on multiple"]
+    #[doc = "< HIP device supports cooperative launch on\n< multiple"]
     pub cooperativeMultiDeviceUnmatchedFunc: ::std::os::raw::c_int,
-    #[doc = "< HIP device supports cooperative launch on multiple"]
+    #[doc = "< HIP device supports cooperative launch on\n< multiple"]
     pub cooperativeMultiDeviceUnmatchedGridDim: ::std::os::raw::c_int,
-    #[doc = "< HIP device supports cooperative launch on multiple"]
+    #[doc = "< HIP device supports cooperative launch on\n< multiple"]
     pub cooperativeMultiDeviceUnmatchedBlockDim: ::std::os::raw::c_int,
-    #[doc = "< HIP device supports cooperative launch on multiple"]
+    #[doc = "< HIP device supports cooperative launch on\n< multiple"]
     pub cooperativeMultiDeviceUnmatchedSharedMem: ::std::os::raw::c_int,
     #[doc = "< 1: if it is a large PCI bar device, else 0"]
     pub isLargeBar: ::std::os::raw::c_int,
     #[doc = "< Revision of the GPU in this device"]
     pub asicRevision: ::std::os::raw::c_int,
-    #[doc = "< Device supports allocating managed memory on this system"]
-    pub managedMemory: ::std::os::raw::c_int,
-    #[doc = "< Host can directly access managed memory on the device without migration"]
-    pub directManagedMemAccessFromHost: ::std::os::raw::c_int,
-    #[doc = "< Device can coherently access managed memory concurrently with the CPU"]
-    pub concurrentManagedAccess: ::std::os::raw::c_int,
-    #[doc = "< Device supports coherently accessing pageable memory\n< without calling hipHostRegister on it"]
-    pub pageableMemoryAccess: ::std::os::raw::c_int,
-    #[doc = "< Device accesses pageable memory via the host's page tables"]
-    pub pageableMemoryAccessUsesHostPageTables: ::std::os::raw::c_int,
+}
+impl hipMemoryType {
+    #[doc = "< Unregistered memory"]
+    pub const hipMemoryTypeUnregistered: hipMemoryType = hipMemoryType(0);
 }
 impl hipMemoryType {
     #[doc = "< Memory is physically located on host"]
-    pub const hipMemoryTypeHost: hipMemoryType = hipMemoryType(0);
+    pub const hipMemoryTypeHost: hipMemoryType = hipMemoryType(1);
 }
 impl hipMemoryType {
     #[doc = "< Memory is physically located on device. (see deviceId for\n< specific device)"]
-    pub const hipMemoryTypeDevice: hipMemoryType = hipMemoryType(1);
+    pub const hipMemoryTypeDevice: hipMemoryType = hipMemoryType(2);
+}
+impl hipMemoryType {
+    #[doc = "< Managed memory, automaticallly managed by the unified\n< memory system\n< place holder for new values."]
+    pub const hipMemoryTypeManaged: hipMemoryType = hipMemoryType(3);
 }
 impl hipMemoryType {
     #[doc = "< Array memory, physically located on device. (see deviceId for\n< specific device)"]
-    pub const hipMemoryTypeArray: hipMemoryType = hipMemoryType(2);
+    pub const hipMemoryTypeArray: hipMemoryType = hipMemoryType(10);
 }
 impl hipMemoryType {
-    #[doc = "< Not used currently"]
-    pub const hipMemoryTypeUnified: hipMemoryType = hipMemoryType(3);
-}
-impl hipMemoryType {
-    #[doc = "< Managed memory, automaticallly managed by the unified\n< memory system"]
-    pub const hipMemoryTypeManaged: hipMemoryType = hipMemoryType(4);
+    #[doc = "< unified address space"]
+    pub const hipMemoryTypeUnified: hipMemoryType = hipMemoryType(11);
 }
 #[repr(transparent)]
-#[doc = " hipMemoryType (for pointer attributes)\n\n @note  hipMemoryType enum values are different from cudaMemoryType enum values.\n In this case, memory type translation for hipPointerGetAttributes needs to be handled properly\n on nvidia platform to get the correct memory type in CUDA. Developers should use '#ifdef' in order\n to assign the correct enum values depending on Nvidia or AMD platform.\n\n @note  cudaMemoryTypeUnregistered is currently not supported due to HIP functionality backward\n compatibility."]
+#[doc = " hipMemoryType (for pointer attributes)\n\n @note hipMemoryType enum values are combination of cudaMemoryType and cuMemoryType and AMD specific enum values.\n"]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
 pub struct hipMemoryType(pub ::std::os::raw::c_int);
 #[doc = " Pointer attributes"]
 #[repr(C)]
 #[derive(Copy, Clone)]
 pub struct hipPointerAttribute_t {
-    pub __bindgen_anon_1: hipPointerAttribute_t__bindgen_ty_1,
+    pub type_: hipMemoryType,
     pub device: ::std::os::raw::c_int,
     pub devicePointer: *mut ::std::os::raw::c_void,
     pub hostPointer: *mut ::std::os::raw::c_void,
     pub isManaged: ::std::os::raw::c_int,
     pub allocationFlags: ::std::os::raw::c_uint,
 }
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub union hipPointerAttribute_t__bindgen_ty_1 {
-    pub memoryType: hipMemoryType,
-    pub type_: hipMemoryType,
-}
 impl hipError_t {
     #[doc = "< Successful completion."]
     pub const hipSuccess: hipError_t = hipError_t(0);
@@ -791,9 +893,11 @@ impl hipError_t {
     pub const hipErrorLaunchTimeOut: hipError_t = hipError_t(702);
 }
 impl hipError_t {
+    #[doc = "< Peer access was already enabled from the current\n< device."]
     pub const hipErrorPeerAccessAlreadyEnabled: hipError_t = hipError_t(704);
 }
 impl hipError_t {
+    #[doc = "< Peer access was never enabled from the current device."]
     pub const hipErrorPeerAccessNotEnabled: hipError_t = hipError_t(705);
 }
 impl hipError_t {
@@ -809,15 +913,19 @@ impl hipError_t {
     pub const hipErrorAssert: hipError_t = hipError_t(710);
 }
 impl hipError_t {
+    #[doc = "< Produced when trying to lock a page-locked\n< memory."]
     pub const hipErrorHostMemoryAlreadyRegistered: hipError_t = hipError_t(712);
 }
 impl hipError_t {
+    #[doc = "< Produced when trying to unlock a non-page-locked\n< memory."]
     pub const hipErrorHostMemoryNotRegistered: hipError_t = hipError_t(713);
 }
 impl hipError_t {
+    #[doc = "< An exception occurred on the device while executing a kernel."]
     pub const hipErrorLaunchFailure: hipError_t = hipError_t(719);
 }
 impl hipError_t {
+    #[doc = "< This error indicates that the number of blocks\n< launched per grid for a kernel that was launched\n< via cooperative launch APIs exceeds the maximum\n< number of allowed blocks for the current device."]
     pub const hipErrorCooperativeLaunchTooLarge: hipError_t = hipError_t(720);
 }
 impl hipError_t {
@@ -898,7 +1006,7 @@ impl hipDeviceAttribute_t {
         hipDeviceAttribute_t(1);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Cuda only. Asynchronous engines number."]
+    #[doc = "< Asynchronous engines number."]
     pub const hipDeviceAttributeAsyncEngineCount: hipDeviceAttribute_t = hipDeviceAttribute_t(2);
 }
 impl hipDeviceAttribute_t {
@@ -906,7 +1014,7 @@ impl hipDeviceAttribute_t {
     pub const hipDeviceAttributeCanMapHostMemory: hipDeviceAttribute_t = hipDeviceAttribute_t(3);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Cuda only. Device can access host registered memory\n< at the same virtual address as the CPU"]
+    #[doc = "< Device can access host registered memory\n< at the same virtual address as the CPU"]
     pub const hipDeviceAttributeCanUseHostPointerForRegisteredMem: hipDeviceAttribute_t =
         hipDeviceAttribute_t(4);
 }
@@ -919,7 +1027,7 @@ impl hipDeviceAttribute_t {
     pub const hipDeviceAttributeComputeMode: hipDeviceAttribute_t = hipDeviceAttribute_t(6);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Cuda only. Device supports Compute Preemption."]
+    #[doc = "< Device supports Compute Preemption."]
     pub const hipDeviceAttributeComputePreemptionSupported: hipDeviceAttribute_t =
         hipDeviceAttribute_t(7);
 }
@@ -942,7 +1050,7 @@ impl hipDeviceAttribute_t {
         hipDeviceAttribute_t(11);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Cuda only. Device can concurrently copy memory and execute a kernel.\n< Deprecated. Use instead asyncEngineCount."]
+    #[doc = "< Device can concurrently copy memory and execute a kernel.\n< Deprecated. Use instead asyncEngineCount."]
     pub const hipDeviceAttributeDeviceOverlap: hipDeviceAttribute_t = hipDeviceAttribute_t(12);
 }
 impl hipDeviceAttribute_t {
@@ -951,12 +1059,12 @@ impl hipDeviceAttribute_t {
         hipDeviceAttribute_t(13);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Cuda only. Device supports caching globals in L1"]
+    #[doc = "< Device supports caching globals in L1"]
     pub const hipDeviceAttributeGlobalL1CacheSupported: hipDeviceAttribute_t =
         hipDeviceAttribute_t(14);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Cuda only. Link between the device and the host supports native atomic operations"]
+    #[doc = "< Link between the device and the host supports native atomic operations"]
     pub const hipDeviceAttributeHostNativeAtomicSupported: hipDeviceAttribute_t =
         hipDeviceAttribute_t(15);
 }
@@ -982,11 +1090,11 @@ impl hipDeviceAttribute_t {
         hipDeviceAttribute_t(20);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Cuda only. 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms"]
+    #[doc = "< 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms"]
     pub const hipDeviceAttributeLuid: hipDeviceAttribute_t = hipDeviceAttribute_t(21);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Cuda only. Luid device node mask. Undefined on TCC and non-Windows platforms"]
+    #[doc = "< Luid device node mask. Undefined on TCC and non-Windows platforms"]
     pub const hipDeviceAttributeLuidDeviceNodeMask: hipDeviceAttribute_t = hipDeviceAttribute_t(22);
 }
 impl hipDeviceAttribute_t {
@@ -999,7 +1107,7 @@ impl hipDeviceAttribute_t {
     pub const hipDeviceAttributeManagedMemory: hipDeviceAttribute_t = hipDeviceAttribute_t(24);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Cuda only. Max block size per multiprocessor"]
+    #[doc = "< Max block size per multiprocessor"]
     pub const hipDeviceAttributeMaxBlocksPerMultiProcessor: hipDeviceAttribute_t =
         hipDeviceAttribute_t(25);
 }
@@ -1063,7 +1171,7 @@ impl hipDeviceAttribute_t {
     pub const hipDeviceAttributeMaxTexture1DWidth: hipDeviceAttribute_t = hipDeviceAttribute_t(39);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Cuda only. Maximum dimensions of 1D layered texture."]
+    #[doc = "< Maximum dimensions of 1D layered texture."]
     pub const hipDeviceAttributeMaxTexture1DLayered: hipDeviceAttribute_t =
         hipDeviceAttribute_t(40);
 }
@@ -1072,7 +1180,7 @@ impl hipDeviceAttribute_t {
     pub const hipDeviceAttributeMaxTexture1DLinear: hipDeviceAttribute_t = hipDeviceAttribute_t(41);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Cuda only. Maximum size of 1D mipmapped texture."]
+    #[doc = "< Maximum size of 1D mipmapped texture."]
     pub const hipDeviceAttributeMaxTexture1DMipmap: hipDeviceAttribute_t = hipDeviceAttribute_t(42);
 }
 impl hipDeviceAttribute_t {
@@ -1084,20 +1192,20 @@ impl hipDeviceAttribute_t {
     pub const hipDeviceAttributeMaxTexture2DHeight: hipDeviceAttribute_t = hipDeviceAttribute_t(44);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Cuda only. Maximum dimensions of 2D texture if gather operations  performed."]
+    #[doc = "< Maximum dimensions of 2D texture if gather operations  performed."]
     pub const hipDeviceAttributeMaxTexture2DGather: hipDeviceAttribute_t = hipDeviceAttribute_t(45);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Cuda only. Maximum dimensions of 2D layered texture."]
+    #[doc = "< Maximum dimensions of 2D layered texture."]
     pub const hipDeviceAttributeMaxTexture2DLayered: hipDeviceAttribute_t =
         hipDeviceAttribute_t(46);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Cuda only. Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory."]
+    #[doc = "< Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory."]
     pub const hipDeviceAttributeMaxTexture2DLinear: hipDeviceAttribute_t = hipDeviceAttribute_t(47);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Cuda only. Maximum dimensions of 2D mipmapped texture."]
+    #[doc = "< Maximum dimensions of 2D mipmapped texture."]
     pub const hipDeviceAttributeMaxTexture2DMipmap: hipDeviceAttribute_t = hipDeviceAttribute_t(48);
 }
 impl hipDeviceAttribute_t {
@@ -1113,15 +1221,15 @@ impl hipDeviceAttribute_t {
     pub const hipDeviceAttributeMaxTexture3DDepth: hipDeviceAttribute_t = hipDeviceAttribute_t(51);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Cuda only. Maximum dimensions of alternate 3D texture."]
+    #[doc = "< Maximum dimensions of alternate 3D texture."]
     pub const hipDeviceAttributeMaxTexture3DAlt: hipDeviceAttribute_t = hipDeviceAttribute_t(52);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Cuda only. Maximum dimensions of Cubemap texture"]
+    #[doc = "< Maximum dimensions of Cubemap texture"]
     pub const hipDeviceAttributeMaxTextureCubemap: hipDeviceAttribute_t = hipDeviceAttribute_t(53);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Cuda only. Maximum dimensions of Cubemap layered texture."]
+    #[doc = "< Maximum dimensions of Cubemap layered texture."]
     pub const hipDeviceAttributeMaxTextureCubemapLayered: hipDeviceAttribute_t =
         hipDeviceAttribute_t(54);
 }
@@ -1156,7 +1264,7 @@ impl hipDeviceAttribute_t {
         hipDeviceAttribute_t(61);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Cuda only. Unique ID of device group on the same multi-GPU board"]
+    #[doc = "< Unique ID of device group on the same multi-GPU board"]
     pub const hipDeviceAttributeMultiGpuBoardGroupID: hipDeviceAttribute_t =
         hipDeviceAttribute_t(62);
 }
@@ -1166,8 +1274,8 @@ impl hipDeviceAttribute_t {
         hipDeviceAttribute_t(63);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Device name."]
-    pub const hipDeviceAttributeName: hipDeviceAttribute_t = hipDeviceAttribute_t(64);
+    #[doc = "< Previously hipDeviceAttributeName"]
+    pub const hipDeviceAttributeUnused1: hipDeviceAttribute_t = hipDeviceAttribute_t(64);
 }
 impl hipDeviceAttribute_t {
     #[doc = "< Device supports coherently accessing pageable memory\n< without calling hipHostRegister on it"]
@@ -1192,7 +1300,7 @@ impl hipDeviceAttribute_t {
     pub const hipDeviceAttributePciDomainID: hipDeviceAttribute_t = hipDeviceAttribute_t(69);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Cuda11 only. Maximum l2 persisting lines capacity in bytes"]
+    #[doc = "< Maximum l2 persisting lines capacity in bytes"]
     pub const hipDeviceAttributePersistingL2CacheMaxSize: hipDeviceAttribute_t =
         hipDeviceAttribute_t(70);
 }
@@ -1207,7 +1315,7 @@ impl hipDeviceAttribute_t {
         hipDeviceAttribute_t(72);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Cuda11 only. Shared memory reserved by CUDA driver per block."]
+    #[doc = "< Shared memory reserved by CUDA driver per block."]
     pub const hipDeviceAttributeReservedSharedMemPerBlock: hipDeviceAttribute_t =
         hipDeviceAttribute_t(73);
 }
@@ -1217,12 +1325,12 @@ impl hipDeviceAttribute_t {
         hipDeviceAttribute_t(74);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Cuda only. Maximum shared memory per block usable by special opt in."]
+    #[doc = "< Maximum shared memory per block usable by special opt in."]
     pub const hipDeviceAttributeSharedMemPerBlockOptin: hipDeviceAttribute_t =
         hipDeviceAttribute_t(75);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Cuda only. Shared memory available per multiprocessor."]
+    #[doc = "< Shared memory available per multiprocessor."]
     pub const hipDeviceAttributeSharedMemPerMultiprocessor: hipDeviceAttribute_t =
         hipDeviceAttribute_t(76);
 }
@@ -1232,12 +1340,12 @@ impl hipDeviceAttribute_t {
         hipDeviceAttribute_t(77);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Cuda only. Whether to support stream priorities."]
+    #[doc = "< Whether to support stream priorities."]
     pub const hipDeviceAttributeStreamPrioritiesSupported: hipDeviceAttribute_t =
         hipDeviceAttribute_t(78);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Cuda only. Alignment requirement for surfaces"]
+    #[doc = "< Alignment requirement for surfaces"]
     pub const hipDeviceAttributeSurfaceAlignment: hipDeviceAttribute_t = hipDeviceAttribute_t(79);
 }
 impl hipDeviceAttribute_t {
@@ -1267,8 +1375,8 @@ impl hipDeviceAttribute_t {
     pub const hipDeviceAttributeUnifiedAddressing: hipDeviceAttribute_t = hipDeviceAttribute_t(85);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Cuda only. Unique ID in 16 byte."]
-    pub const hipDeviceAttributeUuid: hipDeviceAttribute_t = hipDeviceAttribute_t(86);
+    #[doc = "< Previously hipDeviceAttributeUuid"]
+    pub const hipDeviceAttributeUnused2: hipDeviceAttribute_t = hipDeviceAttribute_t(86);
 }
 impl hipDeviceAttribute_t {
     #[doc = "< Warp size in threads."]
@@ -1284,6 +1392,16 @@ impl hipDeviceAttribute_t {
     pub const hipDeviceAttributeVirtualMemoryManagementSupported: hipDeviceAttribute_t =
         hipDeviceAttribute_t(89);
 }
+impl hipDeviceAttribute_t {
+    #[doc = "< Can device support host memory registration via hipHostRegister"]
+    pub const hipDeviceAttributeHostRegisterSupported: hipDeviceAttribute_t =
+        hipDeviceAttribute_t(90);
+}
+impl hipDeviceAttribute_t {
+    #[doc = "< Supported handle mask for HIP Stream Ordered Memory Allocator"]
+    pub const hipDeviceAttributeMemoryPoolSupportedHandleTypes: hipDeviceAttribute_t =
+        hipDeviceAttribute_t(91);
+}
 impl hipDeviceAttribute_t {
     pub const hipDeviceAttributeCudaCompatibleEnd: hipDeviceAttribute_t =
         hipDeviceAttribute_t(9999);
@@ -1298,8 +1416,8 @@ impl hipDeviceAttribute_t {
         hipDeviceAttribute_t(10000);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Device architecture"]
-    pub const hipDeviceAttributeArch: hipDeviceAttribute_t = hipDeviceAttribute_t(10001);
+    #[doc = "< Previously hipDeviceAttributeArch"]
+    pub const hipDeviceAttributeUnused3: hipDeviceAttribute_t = hipDeviceAttribute_t(10001);
 }
 impl hipDeviceAttribute_t {
     #[doc = "< Maximum Shared Memory PerMultiprocessor."]
@@ -1307,12 +1425,12 @@ impl hipDeviceAttribute_t {
         hipDeviceAttribute_t(10002);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Device gcn architecture"]
-    pub const hipDeviceAttributeGcnArch: hipDeviceAttribute_t = hipDeviceAttribute_t(10003);
+    #[doc = "< Previously hipDeviceAttributeGcnArch"]
+    pub const hipDeviceAttributeUnused4: hipDeviceAttribute_t = hipDeviceAttribute_t(10003);
 }
 impl hipDeviceAttribute_t {
-    #[doc = "< Device gcnArch name in 256 bytes"]
-    pub const hipDeviceAttributeGcnArchName: hipDeviceAttribute_t = hipDeviceAttribute_t(10004);
+    #[doc = "< Previously hipDeviceAttributeGcnArchName"]
+    pub const hipDeviceAttributeUnused5: hipDeviceAttribute_t = hipDeviceAttribute_t(10004);
 }
 impl hipDeviceAttribute_t {
     #[doc = "< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register"]
@@ -1381,7 +1499,7 @@ impl hipDeviceAttribute_t {
         hipDeviceAttribute_t(20000);
 }
 #[repr(transparent)]
-#[doc = " hipDeviceAttribute_t\n"]
+#[doc = " hipDeviceAttribute_t\n hipDeviceAttributeUnused number: 5"]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
 pub struct hipDeviceAttribute_t(pub ::std::os::raw::c_int);
 impl hipComputeMode {
@@ -1399,6 +1517,32 @@ impl hipComputeMode {
 #[repr(transparent)]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
 pub struct hipComputeMode(pub ::std::os::raw::c_int);
+impl hipFlushGPUDirectRDMAWritesOptions {
+    pub const hipFlushGPUDirectRDMAWritesOptionHost: hipFlushGPUDirectRDMAWritesOptions =
+        hipFlushGPUDirectRDMAWritesOptions(1);
+}
+impl hipFlushGPUDirectRDMAWritesOptions {
+    pub const hipFlushGPUDirectRDMAWritesOptionMemOps: hipFlushGPUDirectRDMAWritesOptions =
+        hipFlushGPUDirectRDMAWritesOptions(2);
+}
+#[repr(transparent)]
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct hipFlushGPUDirectRDMAWritesOptions(pub ::std::os::raw::c_int);
+impl hipGPUDirectRDMAWritesOrdering {
+    pub const hipGPUDirectRDMAWritesOrderingNone: hipGPUDirectRDMAWritesOrdering =
+        hipGPUDirectRDMAWritesOrdering(0);
+}
+impl hipGPUDirectRDMAWritesOrdering {
+    pub const hipGPUDirectRDMAWritesOrderingOwner: hipGPUDirectRDMAWritesOrdering =
+        hipGPUDirectRDMAWritesOrdering(100);
+}
+impl hipGPUDirectRDMAWritesOrdering {
+    pub const hipGPUDirectRDMAWritesOrderingAllDevices: hipGPUDirectRDMAWritesOrdering =
+        hipGPUDirectRDMAWritesOrdering(200);
+}
+#[repr(transparent)]
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct hipGPUDirectRDMAWritesOrdering(pub ::std::os::raw::c_int);
 #[repr(transparent)]
 #[derive(Copy, Clone)]
 pub struct hipDeviceptr_t(pub *mut ::std::os::raw::c_void);
@@ -1426,6 +1570,23 @@ pub struct hipChannelFormatDesc {
     pub w: ::std::os::raw::c_int,
     pub f: hipChannelFormatKind,
 }
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct hipArray {
+    pub data: *mut ::std::os::raw::c_void,
+    pub desc: hipChannelFormatDesc,
+    pub type_: ::std::os::raw::c_uint,
+    pub width: ::std::os::raw::c_uint,
+    pub height: ::std::os::raw::c_uint,
+    pub depth: ::std::os::raw::c_uint,
+    pub Format: hipArray_Format,
+    pub NumChannels: ::std::os::raw::c_uint,
+    pub isDrv: bool,
+    pub textureType: ::std::os::raw::c_uint,
+    pub flags: ::std::os::raw::c_uint,
+}
+pub type hipArray_t = *mut hipArray;
+pub type hipArray_const_t = *const hipArray;
 impl hipArray_Format {
     pub const HIP_AD_FORMAT_UNSIGNED_INT8: hipArray_Format = hipArray_Format(1);
 }
@@ -1473,42 +1634,24 @@ pub struct HIP_ARRAY3D_DESCRIPTOR {
 }
 #[repr(C)]
 #[derive(Copy, Clone)]
-pub struct hipArray {
-    pub data: *mut ::std::os::raw::c_void,
-    pub desc: hipChannelFormatDesc,
-    pub type_: ::std::os::raw::c_uint,
-    pub width: ::std::os::raw::c_uint,
-    pub height: ::std::os::raw::c_uint,
-    pub depth: ::std::os::raw::c_uint,
-    pub Format: hipArray_Format,
-    pub NumChannels: ::std::os::raw::c_uint,
-    pub isDrv: bool,
-    pub textureType: ::std::os::raw::c_uint,
-    pub flags: ::std::os::raw::c_uint,
-}
-#[repr(C)]
-#[derive(Copy, Clone)]
 pub struct hip_Memcpy2D {
     pub srcXInBytes: usize,
     pub srcY: usize,
     pub srcMemoryType: hipMemoryType,
     pub srcHost: *const ::std::os::raw::c_void,
     pub srcDevice: hipDeviceptr_t,
-    pub srcArray: *mut hipArray,
+    pub srcArray: hipArray_t,
     pub srcPitch: usize,
     pub dstXInBytes: usize,
     pub dstY: usize,
     pub dstMemoryType: hipMemoryType,
     pub dstHost: *mut ::std::os::raw::c_void,
     pub dstDevice: hipDeviceptr_t,
-    pub dstArray: *mut hipArray,
+    pub dstArray: hipArray_t,
     pub dstPitch: usize,
     pub WidthInBytes: usize,
     pub Height: usize,
 }
-pub type hipArray_t = *mut hipArray;
-pub type hiparray = hipArray_t;
-pub type hipArray_const_t = *const hipArray;
 #[repr(C)]
 #[derive(Copy, Clone)]
 pub struct hipMipmappedArray {
@@ -2076,8 +2219,13 @@ impl hipMemcpyKind {
     pub const hipMemcpyDeviceToDevice: hipMemcpyKind = hipMemcpyKind(3);
 }
 impl hipMemcpyKind {
+    #[doc = "< Runtime will automatically determine\n<copy-kind based on virtual addresses."]
     pub const hipMemcpyDefault: hipMemcpyKind = hipMemcpyKind(4);
 }
+impl hipMemcpyKind {
+    #[doc = "< Device-to-Device Copy without using compute units"]
+    pub const hipMemcpyDeviceToDeviceNoCU: hipMemcpyKind = hipMemcpyKind(1024);
+}
 #[repr(transparent)]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
 pub struct hipMemcpyKind(pub ::std::os::raw::c_int);
@@ -2118,29 +2266,29 @@ pub struct hipMemcpy3DParms {
 #[repr(C)]
 #[derive(Copy, Clone)]
 pub struct HIP_MEMCPY3D {
-    pub srcXInBytes: ::std::os::raw::c_uint,
-    pub srcY: ::std::os::raw::c_uint,
-    pub srcZ: ::std::os::raw::c_uint,
-    pub srcLOD: ::std::os::raw::c_uint,
+    pub srcXInBytes: usize,
+    pub srcY: usize,
+    pub srcZ: usize,
+    pub srcLOD: usize,
     pub srcMemoryType: hipMemoryType,
     pub srcHost: *const ::std::os::raw::c_void,
     pub srcDevice: hipDeviceptr_t,
     pub srcArray: hipArray_t,
-    pub srcPitch: ::std::os::raw::c_uint,
-    pub srcHeight: ::std::os::raw::c_uint,
-    pub dstXInBytes: ::std::os::raw::c_uint,
-    pub dstY: ::std::os::raw::c_uint,
-    pub dstZ: ::std::os::raw::c_uint,
-    pub dstLOD: ::std::os::raw::c_uint,
+    pub srcPitch: usize,
+    pub srcHeight: usize,
+    pub dstXInBytes: usize,
+    pub dstY: usize,
+    pub dstZ: usize,
+    pub dstLOD: usize,
     pub dstMemoryType: hipMemoryType,
     pub dstHost: *mut ::std::os::raw::c_void,
     pub dstDevice: hipDeviceptr_t,
     pub dstArray: hipArray_t,
-    pub dstPitch: ::std::os::raw::c_uint,
-    pub dstHeight: ::std::os::raw::c_uint,
-    pub WidthInBytes: ::std::os::raw::c_uint,
-    pub Height: ::std::os::raw::c_uint,
-    pub Depth: ::std::os::raw::c_uint,
+    pub dstPitch: usize,
+    pub dstHeight: usize,
+    pub WidthInBytes: usize,
+    pub Height: usize,
+    pub Depth: usize,
 }
 impl hipFunction_attribute {
     pub const HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: hipFunction_attribute =
@@ -2444,22 +2592,23 @@ pub struct ihipEvent_t {
 }
 pub type hipEvent_t = *mut ihipEvent_t;
 impl hipLimit_t {
-    #[doc = "< limit of stack size in bytes on the current device"]
+    #[doc = "< Limit of stack size in bytes on the current device, per\n< thread. The size is in units of 256 dwords, up to the\n< limit of (128K - 16)"]
     pub const hipLimitStackSize: hipLimit_t = hipLimit_t(0);
 }
 impl hipLimit_t {
-    #[doc = "< size limit in bytes of fifo used by printf call on the device"]
+    #[doc = "< Size limit in bytes of fifo used by printf call on the\n< device. Currently not supported"]
     pub const hipLimitPrintfFifoSize: hipLimit_t = hipLimit_t(1);
 }
 impl hipLimit_t {
-    #[doc = "< limit of heap size in bytes on the current device"]
+    #[doc = "< Limit of heap size in bytes on the current device, should\n< be less than the global memory size on the device"]
     pub const hipLimitMallocHeapSize: hipLimit_t = hipLimit_t(2);
 }
 impl hipLimit_t {
-    #[doc = "< supported limit range"]
+    #[doc = "< Supported limit range"]
     pub const hipLimitRange: hipLimit_t = hipLimit_t(3);
 }
 #[repr(transparent)]
+#[doc = " hipLimit\n\n @note In HIP device limit-related APIs, any input limit value other than those defined in the\n enum is treated as \"UnsupportedLimit\" by default."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
 pub struct hipLimit_t(pub ::std::os::raw::c_int);
 impl hipMemoryAdvise {
@@ -2872,6 +3021,10 @@ impl hipExternalMemoryHandleType_enum {
     pub const hipExternalMemoryHandleTypeD3D11ResourceKmt: hipExternalMemoryHandleType_enum =
         hipExternalMemoryHandleType_enum(7);
 }
+impl hipExternalMemoryHandleType_enum {
+    pub const hipExternalMemoryHandleTypeNvSciBuf: hipExternalMemoryHandleType_enum =
+        hipExternalMemoryHandleType_enum(8);
+}
 #[repr(transparent)]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
 pub struct hipExternalMemoryHandleType_enum(pub ::std::os::raw::c_int);
@@ -2883,12 +3036,14 @@ pub struct hipExternalMemoryHandleDesc_st {
     pub handle: hipExternalMemoryHandleDesc_st__bindgen_ty_1,
     pub size: ::std::os::raw::c_ulonglong,
     pub flags: ::std::os::raw::c_uint,
+    pub reserved: [::std::os::raw::c_uint; 16usize],
 }
 #[repr(C)]
 #[derive(Copy, Clone)]
 pub union hipExternalMemoryHandleDesc_st__bindgen_ty_1 {
     pub fd: ::std::os::raw::c_int,
     pub win32: hipExternalMemoryHandleDesc_st__bindgen_ty_1__bindgen_ty_1,
+    pub nvSciBufObject: *const ::std::os::raw::c_void,
 }
 #[repr(C)]
 #[derive(Copy, Clone)]
@@ -2903,8 +3058,19 @@ pub struct hipExternalMemoryBufferDesc_st {
     pub offset: ::std::os::raw::c_ulonglong,
     pub size: ::std::os::raw::c_ulonglong,
     pub flags: ::std::os::raw::c_uint,
+    pub reserved: [::std::os::raw::c_uint; 16usize],
 }
 pub type hipExternalMemoryBufferDesc = hipExternalMemoryBufferDesc_st;
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct hipExternalMemoryMipmappedArrayDesc_st {
+    pub offset: ::std::os::raw::c_ulonglong,
+    pub formatDesc: hipChannelFormatDesc,
+    pub extent: hipExtent,
+    pub flags: ::std::os::raw::c_uint,
+    pub numLevels: ::std::os::raw::c_uint,
+}
+pub type hipExternalMemoryMipmappedArrayDesc = hipExternalMemoryMipmappedArrayDesc_st;
 pub type hipExternalMemory_t = *mut ::std::os::raw::c_void;
 impl hipExternalSemaphoreHandleType_enum {
     pub const hipExternalSemaphoreHandleTypeOpaqueFd: hipExternalSemaphoreHandleType_enum =
@@ -2922,6 +3088,30 @@ impl hipExternalSemaphoreHandleType_enum {
     pub const hipExternalSemaphoreHandleTypeD3D12Fence: hipExternalSemaphoreHandleType_enum =
         hipExternalSemaphoreHandleType_enum(4);
 }
+impl hipExternalSemaphoreHandleType_enum {
+    pub const hipExternalSemaphoreHandleTypeD3D11Fence: hipExternalSemaphoreHandleType_enum =
+        hipExternalSemaphoreHandleType_enum(5);
+}
+impl hipExternalSemaphoreHandleType_enum {
+    pub const hipExternalSemaphoreHandleTypeNvSciSync: hipExternalSemaphoreHandleType_enum =
+        hipExternalSemaphoreHandleType_enum(6);
+}
+impl hipExternalSemaphoreHandleType_enum {
+    pub const hipExternalSemaphoreHandleTypeKeyedMutex: hipExternalSemaphoreHandleType_enum =
+        hipExternalSemaphoreHandleType_enum(7);
+}
+impl hipExternalSemaphoreHandleType_enum {
+    pub const hipExternalSemaphoreHandleTypeKeyedMutexKmt: hipExternalSemaphoreHandleType_enum =
+        hipExternalSemaphoreHandleType_enum(8);
+}
+impl hipExternalSemaphoreHandleType_enum {
+    pub const hipExternalSemaphoreHandleTypeTimelineSemaphoreFd:
+        hipExternalSemaphoreHandleType_enum = hipExternalSemaphoreHandleType_enum(9);
+}
+impl hipExternalSemaphoreHandleType_enum {
+    pub const hipExternalSemaphoreHandleTypeTimelineSemaphoreWin32:
+        hipExternalSemaphoreHandleType_enum = hipExternalSemaphoreHandleType_enum(10);
+}
 #[repr(transparent)]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
 pub struct hipExternalSemaphoreHandleType_enum(pub ::std::os::raw::c_int);
@@ -2932,12 +3122,14 @@ pub struct hipExternalSemaphoreHandleDesc_st {
     pub type_: hipExternalSemaphoreHandleType,
     pub handle: hipExternalSemaphoreHandleDesc_st__bindgen_ty_1,
     pub flags: ::std::os::raw::c_uint,
+    pub reserved: [::std::os::raw::c_uint; 16usize],
 }
 #[repr(C)]
 #[derive(Copy, Clone)]
 pub union hipExternalSemaphoreHandleDesc_st__bindgen_ty_1 {
     pub fd: ::std::os::raw::c_int,
     pub win32: hipExternalSemaphoreHandleDesc_st__bindgen_ty_1__bindgen_ty_1,
+    pub NvSciSyncObj: *const ::std::os::raw::c_void,
 }
 #[repr(C)]
 #[derive(Copy, Clone)]
@@ -2958,7 +3150,8 @@ pub struct hipExternalSemaphoreSignalParams_st {
 #[derive(Copy, Clone)]
 pub struct hipExternalSemaphoreSignalParams_st__bindgen_ty_1 {
     pub fence: hipExternalSemaphoreSignalParams_st__bindgen_ty_1__bindgen_ty_1,
-    pub keyedMutex: hipExternalSemaphoreSignalParams_st__bindgen_ty_1__bindgen_ty_2,
+    pub nvSciSync: hipExternalSemaphoreSignalParams_st__bindgen_ty_1__bindgen_ty_2,
+    pub keyedMutex: hipExternalSemaphoreSignalParams_st__bindgen_ty_1__bindgen_ty_3,
     pub reserved: [::std::os::raw::c_uint; 12usize],
 }
 #[repr(C)]
@@ -2968,7 +3161,13 @@ pub struct hipExternalSemaphoreSignalParams_st__bindgen_ty_1__bindgen_ty_1 {
 }
 #[repr(C)]
 #[derive(Copy, Clone)]
-pub struct hipExternalSemaphoreSignalParams_st__bindgen_ty_1__bindgen_ty_2 {
+pub union hipExternalSemaphoreSignalParams_st__bindgen_ty_1__bindgen_ty_2 {
+    pub fence: *mut ::std::os::raw::c_void,
+    pub reserved: ::std::os::raw::c_ulonglong,
+}
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct hipExternalSemaphoreSignalParams_st__bindgen_ty_1__bindgen_ty_3 {
     pub key: ::std::os::raw::c_ulonglong,
 }
 pub type hipExternalSemaphoreSignalParams = hipExternalSemaphoreSignalParams_st;
@@ -2984,7 +3183,8 @@ pub struct hipExternalSemaphoreWaitParams_st {
 #[derive(Copy, Clone)]
 pub struct hipExternalSemaphoreWaitParams_st__bindgen_ty_1 {
     pub fence: hipExternalSemaphoreWaitParams_st__bindgen_ty_1__bindgen_ty_1,
-    pub keyedMutex: hipExternalSemaphoreWaitParams_st__bindgen_ty_1__bindgen_ty_2,
+    pub nvSciSync: hipExternalSemaphoreWaitParams_st__bindgen_ty_1__bindgen_ty_2,
+    pub keyedMutex: hipExternalSemaphoreWaitParams_st__bindgen_ty_1__bindgen_ty_3,
     pub reserved: [::std::os::raw::c_uint; 10usize],
 }
 #[repr(C)]
@@ -2994,28 +3194,18 @@ pub struct hipExternalSemaphoreWaitParams_st__bindgen_ty_1__bindgen_ty_1 {
 }
 #[repr(C)]
 #[derive(Copy, Clone)]
-pub struct hipExternalSemaphoreWaitParams_st__bindgen_ty_1__bindgen_ty_2 {
+pub union hipExternalSemaphoreWaitParams_st__bindgen_ty_1__bindgen_ty_2 {
+    pub fence: *mut ::std::os::raw::c_void,
+    pub reserved: ::std::os::raw::c_ulonglong,
+}
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct hipExternalSemaphoreWaitParams_st__bindgen_ty_1__bindgen_ty_3 {
     pub key: ::std::os::raw::c_ulonglong,
     pub timeoutMs: ::std::os::raw::c_uint,
 }
 #[doc = " External semaphore wait parameters, compatible with driver type"]
 pub type hipExternalSemaphoreWaitParams = hipExternalSemaphoreWaitParams_st;
-impl hipGLDeviceList {
-    #[doc = "< All hip devices used by current OpenGL context."]
-    pub const hipGLDeviceListAll: hipGLDeviceList = hipGLDeviceList(1);
-}
-impl hipGLDeviceList {
-    #[doc = "< Hip devices used by current OpenGL context in current\n< frame"]
-    pub const hipGLDeviceListCurrentFrame: hipGLDeviceList = hipGLDeviceList(2);
-}
-impl hipGLDeviceList {
-    #[doc = "< Hip devices used by current OpenGL context in next\n< frame."]
-    pub const hipGLDeviceListNextFrame: hipGLDeviceList = hipGLDeviceList(3);
-}
-#[repr(transparent)]
-#[doc = " HIP Devices used by current OpenGL Context."]
-#[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct hipGLDeviceList(pub ::std::os::raw::c_int);
 impl hipGraphicsRegisterFlags {
     pub const hipGraphicsRegisterFlagsNone: hipGraphicsRegisterFlags = hipGraphicsRegisterFlags(0);
 }
@@ -3219,6 +3409,23 @@ pub union hipKernelNodeAttrValue {
     pub accessPolicyWindow: hipAccessPolicyWindow,
     pub cooperative: ::std::os::raw::c_int,
 }
+#[doc = " Memset node params"]
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct HIP_MEMSET_NODE_PARAMS {
+    #[doc = "< Destination pointer on device"]
+    pub dst: hipDeviceptr_t,
+    #[doc = "< Destination device pointer pitch. Unused if height equals 1"]
+    pub pitch: usize,
+    #[doc = "< Value of memset to be set"]
+    pub value: ::std::os::raw::c_uint,
+    #[doc = "< Element in bytes. Must be 1, 2, or 4."]
+    pub elementSize: ::std::os::raw::c_uint,
+    #[doc = "< Width of a row"]
+    pub width: usize,
+    #[doc = "< Number of rows"]
+    pub height: usize,
+}
 impl hipGraphExecUpdateResult {
     #[doc = "< The update succeeded"]
     pub const hipGraphExecUpdateSuccess: hipGraphExecUpdateResult = hipGraphExecUpdateResult(0);
@@ -3561,9 +3768,73 @@ pub struct hipArrayMapInfo__bindgen_ty_2__bindgen_ty_2 {
 pub union hipArrayMapInfo__bindgen_ty_3 {
     pub memHandle: hipMemGenericAllocationHandle_t,
 }
+#[doc = " Memcpy node params"]
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct hipMemcpyNodeParams {
+    #[doc = "< Must be zero."]
+    pub flags: ::std::os::raw::c_int,
+    #[doc = "< Must be zero."]
+    pub reserved: [::std::os::raw::c_int; 3usize],
+    #[doc = "< Params set for the memory copy."]
+    pub copyParams: hipMemcpy3DParms,
+}
+#[doc = " Child graph node params"]
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct hipChildGraphNodeParams {
+    #[doc = "< Either the child graph to clone into the node, or\n< a handle to the graph possesed by the node used during query"]
+    pub graph: hipGraph_t,
+}
+#[doc = " Event record node params"]
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct hipEventWaitNodeParams {
+    #[doc = "< Event to wait on"]
+    pub event: hipEvent_t,
+}
+#[doc = " Event record node params"]
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct hipEventRecordNodeParams {
+    #[doc = "< The event to be recorded when node executes"]
+    pub event: hipEvent_t,
+}
+#[doc = " Memory free node params"]
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct hipMemFreeNodeParams {
+    #[doc = "< the pointer to be freed"]
+    pub dptr: *mut ::std::os::raw::c_void,
+}
+#[doc = " Params for different graph nodes"]
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct hipGraphNodeParams {
+    pub type_: hipGraphNodeType,
+    pub reserved0: [::std::os::raw::c_int; 3usize],
+    pub __bindgen_anon_1: hipGraphNodeParams__bindgen_ty_1,
+    pub reserved2: ::std::os::raw::c_longlong,
+}
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub union hipGraphNodeParams__bindgen_ty_1 {
+    pub reserved1: [::std::os::raw::c_longlong; 29usize],
+    pub kernel: hipKernelNodeParams,
+    pub memcpy: hipMemcpyNodeParams,
+    pub memset: hipMemsetParams,
+    pub host: hipHostNodeParams,
+    pub graph: hipChildGraphNodeParams,
+    pub eventWait: hipEventWaitNodeParams,
+    pub eventRecord: hipEventRecordNodeParams,
+    pub extSemSignal: hipExternalSemaphoreSignalNodeParams,
+    pub extSemWait: hipExternalSemaphoreWaitNodeParams,
+    pub alloc: hipMemAllocNodeParams,
+    pub free: hipMemFreeNodeParams,
+}
 extern "C" {
     #[must_use]
-    #[doc = " @}\n/\n/**\n  @defgroup API HIP API\n  @{\n\n  Defines the HIP API.  See the individual sections for more information.\n/\n/**\n  @defgroup Driver Initialization and Version\n  @{\n  This section describes the initializtion and version functions of HIP runtime API.\n\n/\n/**\n @brief Explicitly initializes the HIP runtime.\n\n Most HIP APIs implicitly initialize the HIP runtime.\n This API provides control over the timing of the initialization."]
+    #[doc = " @}\n/\n/**\n  @defgroup API HIP API\n  @{\n\n  Defines the HIP API.  See the individual sections for more information.\n/\n/**\n  @defgroup Driver Initialization and Version\n  @{\n  This section describes the initializtion and version functions of HIP runtime API.\n\n/\n/**\n @brief Explicitly initializes the HIP runtime.\n\n @param [in] flags  Initialization flag, should be zero.\n\n Most HIP APIs implicitly initialize the HIP runtime.\n This API provides control over the timing of the initialization.\n\n @returns #hipSuccess, #hipErrorInvalidValue"]
     pub fn hipInit(flags: ::std::os::raw::c_uint) -> hipError_t;
 }
 extern "C" {
@@ -3601,7 +3872,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Returns an UUID for the device.[BETA]\n @param [out] uuid UUID for the device\n @param [in] device device ordinal\n\n @beta This API is marked as beta, meaning, while this is feature complete,\n it is still open to changes and may have outstanding issues.\n\n @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, #hipErrorNotInitialized,\n #hipErrorDeinitialized"]
+    #[doc = " @brief Returns an UUID for the device.[BETA]\n @param [out] uuid UUID for the device\n @param [in] device device ordinal\n\n @warning This API is marked as beta, meaning, while this is feature complete,\n it is still open to changes and may have outstanding issues.\n\n @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, #hipErrorNotInitialized,\n #hipErrorDeinitialized"]
     pub fn hipDeviceGetUuid(uuid: *mut hipUUID, device: hipDevice_t) -> hipError_t;
 }
 extern "C" {
@@ -3648,7 +3919,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Set default device to be used for subsequent hip API calls from this thread.\n\n @param[in] deviceId Valid device in range 0...hipGetDeviceCount().\n\n Sets @p device as the default device for the calling host thread.  Valid device id's are 0...\n (hipGetDeviceCount()-1).\n\n Many HIP APIs implicitly use the \"default device\" :\n\n - Any device memory subsequently allocated from this host thread (using hipMalloc) will be\n allocated on device.\n - Any streams or events created from this host thread will be associated with device.\n - Any kernels launched from this host thread (using hipLaunchKernel) will be executed on device\n (unless a specific stream is specified, in which case the device associated with that stream will\n be used).\n\n This function may be called from any host thread.  Multiple host threads may use the same device.\n This function does no synchronization with the previous or new device, and has very little\n runtime overhead. Applications can use hipSetDevice to quickly switch the default device before\n making a HIP runtime call which uses the default device.\n\n The default device is stored in thread-local-storage for each thread.\n Thread-pool implementations may inherit the default device of the previous thread.  A good\n practice is to always call hipSetDevice at the start of HIP coding sequency to establish a known\n standard device.\n\n @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorDeviceAlreadyInUse\n\n @see hipGetDevice, hipGetDeviceCount"]
+    #[doc = " @brief Set default device to be used for subsequent hip API calls from this thread.\n\n @param[in] deviceId Valid device in range 0...hipGetDeviceCount().\n\n Sets @p device as the default device for the calling host thread.  Valid device id's are 0...\n (hipGetDeviceCount()-1).\n\n Many HIP APIs implicitly use the \"default device\" :\n\n - Any device memory subsequently allocated from this host thread (using hipMalloc) will be\n allocated on device.\n - Any streams or events created from this host thread will be associated with device.\n - Any kernels launched from this host thread (using hipLaunchKernel) will be executed on device\n (unless a specific stream is specified, in which case the device associated with that stream will\n be used).\n\n This function may be called from any host thread.  Multiple host threads may use the same device.\n This function does no synchronization with the previous or new device, and has very little\n runtime overhead. Applications can use hipSetDevice to quickly switch the default device before\n making a HIP runtime call which uses the default device.\n\n The default device is stored in thread-local-storage for each thread.\n Thread-pool implementations may inherit the default device of the previous thread.  A good\n practice is to always call hipSetDevice at the start of HIP coding sequency to establish a known\n standard device.\n\n @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorNoDevice\n\n @see #hipGetDevice, #hipGetDeviceCount"]
     pub fn hipSetDevice(deviceId: ::std::os::raw::c_int) -> hipError_t;
 }
 extern "C" {
@@ -3695,8 +3966,8 @@ extern "C" {
 extern "C" {
     #[must_use]
     #[doc = " @brief Returns device properties.\n\n @param [out] prop written with device properties\n @param [in]  deviceId which device to query for information\n\n @return #hipSuccess, #hipErrorInvalidDevice\n @bug HCC always returns 0 for maxThreadsPerMultiProcessor\n @bug HCC always returns 0 for regsPerBlock\n @bug HCC always returns 0 for l2CacheSize\n\n Populates hipGetDeviceProperties with information for the specified device."]
-    pub fn hipGetDeviceProperties(
-        prop: *mut hipDeviceProp_t,
+    pub fn hipGetDevicePropertiesR0600(
+        prop: *mut hipDeviceProp_tR0600,
         deviceId: ::std::os::raw::c_int,
     ) -> hipError_t;
 }
@@ -3712,12 +3983,12 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Gets resource limits of current device\n The funtion querys the size of limit value, as required input enum hipLimit_t, can be either\n hipLimitStackSize, or hipLimitMallocHeapSize.\n\n @param [out] pValue returns the size of the limit in bytes\n @param [in]  limit the limit to query\n\n @returns #hipSuccess, #hipErrorUnsupportedLimit, #hipErrorInvalidValue\n"]
+    #[doc = " @brief Gets resource limits of current device\n\n The function queries the size of limit value, as required by the input enum value hipLimit_t,\n which can be either #hipLimitStackSize, or #hipLimitMallocHeapSize. Any other input as\n default, the function will return #hipErrorUnsupportedLimit.\n\n @param [out] pValue Returns the size of the limit in bytes\n @param [in]  limit The limit to query\n\n @returns #hipSuccess, #hipErrorUnsupportedLimit, #hipErrorInvalidValue\n"]
     pub fn hipDeviceGetLimit(pValue: *mut usize, limit: hipLimit_t) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Sets resource limits of current device\n As the input enum limit, hipLimitStackSize sets the limit value of the stack size on current\n GPU devie, hipLimitMallocHeapSize sets the limit value of the heap used by the malloc()/free()\n calls.\n\n @param [in] limit enum of hipLimit_t to set\n @param [in] value the size of limit value in bytes\n\n @returns #hipSuccess, #hipErrorUnsupportedLimit, #hipErrorInvalidValue\n"]
+    #[doc = " @brief Sets resource limits of current device.\n\n As the input enum limit,\n #hipLimitStackSize sets the limit value of the stack size on the current GPU device, per thread.\n The limit size can get via hipDeviceGetLimit. The size is in units of 256 dwords, up to the limit\n (128K - 16).\n\n #hipLimitMallocHeapSize sets the limit value of the heap used by the malloc()/free()\n calls. For limit size, use the #hipDeviceGetLimit API.\n\n Any other input as default, the funtion will return hipErrorUnsupportedLimit.\n\n @param [in] limit Enum of hipLimit_t to set\n @param [in] value The size of limit value in bytes\n\n @returns #hipSuccess, #hipErrorUnsupportedLimit, #hipErrorInvalidValue\n"]
     pub fn hipDeviceSetLimit(limit: hipLimit_t, value: usize) -> hipError_t;
 }
 extern "C" {
@@ -3743,14 +4014,14 @@ extern "C" {
 extern "C" {
     #[must_use]
     #[doc = " @brief Device which matches hipDeviceProp_t is returned\n\n @param [out] device Pointer of the device\n @param [in]  prop Pointer of the properties\n\n @returns #hipSuccess, #hipErrorInvalidValue"]
-    pub fn hipChooseDevice(
+    pub fn hipChooseDeviceR0600(
         device: *mut ::std::os::raw::c_int,
-        prop: *const hipDeviceProp_t,
+        prop: *const hipDeviceProp_tR0600,
     ) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Returns the link type and hop count between two devices\n\n @param [in] device1 Ordinal for device1\n @param [in] device2 Ordinal for device2\n @param [out] linktype Returns the link type (See hsa_amd_link_info_type_t) between the two devices\n @param [out] hopcount Returns the hop count between the two devices\n\n Queries and returns the HSA link type and the hop count between the two specified devices.\n\n @returns #hipSuccess, #hipInvalidDevice, #hipErrorRuntimeOther"]
+    #[doc = " @brief Returns the link type and hop count between two devices\n\n @param [in] device1 Ordinal for device1\n @param [in] device2 Ordinal for device2\n @param [out] linktype Returns the link type (See hsa_amd_link_info_type_t) between the two devices\n @param [out] hopcount Returns the hop count between the two devices\n\n Queries and returns the HSA link type and the hop count between the two specified devices.\n\n @returns #hipSuccess, #hipErrorInvalidValue"]
     pub fn hipExtGetLinkTypeAndHopCount(
         device1: ::std::os::raw::c_int,
         device2: ::std::os::raw::c_int,
@@ -3760,7 +4031,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Gets an interprocess memory handle for an existing device memory\n          allocation\n\n Takes a pointer to the base of an existing device memory allocation created\n with hipMalloc and exports it for use in another process. This is a\n lightweight operation and may be called multiple times on an allocation\n without adverse effects.\n\n If a region of memory is freed with hipFree and a subsequent call\n to hipMalloc returns memory with the same device address,\n hipIpcGetMemHandle will return a unique handle for the\n new memory.\n\n @param handle - Pointer to user allocated hipIpcMemHandle to return\n                    the handle in.\n @param devPtr - Base pointer to previously allocated device memory\n\n @returns\n #hipSuccess\n #hipErrorInvalidHandle\n #hipErrorOutOfMemory\n #hipErrorMapFailed\n\n @note This IPC memory related feature API on Windows may behave differently from Linux.\n"]
+    #[doc = " @brief Gets an interprocess memory handle for an existing device memory\n          allocation\n\n Takes a pointer to the base of an existing device memory allocation created\n with hipMalloc and exports it for use in another process. This is a\n lightweight operation and may be called multiple times on an allocation\n without adverse effects.\n\n If a region of memory is freed with hipFree and a subsequent call\n to hipMalloc returns memory with the same device address,\n hipIpcGetMemHandle will return a unique handle for the\n new memory.\n\n @param handle - Pointer to user allocated hipIpcMemHandle to return\n                    the handle in.\n @param devPtr - Base pointer to previously allocated device memory\n\n @returns #hipSuccess, #hipErrorInvalidHandle, #hipErrorOutOfMemory, #hipErrorMapFailed\n\n @note This IPC memory related feature API on Windows may behave differently from Linux.\n"]
     pub fn hipIpcGetMemHandle(
         handle: *mut hipIpcMemHandle_t,
         devPtr: *mut ::std::os::raw::c_void,
@@ -3768,7 +4039,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Opens an interprocess memory handle exported from another process\n          and returns a device pointer usable in the local process.\n\n Maps memory exported from another process with hipIpcGetMemHandle into\n the current device address space. For contexts on different devices\n hipIpcOpenMemHandle can attempt to enable peer access between the\n devices as if the user called hipDeviceEnablePeerAccess. This behavior is\n controlled by the hipIpcMemLazyEnablePeerAccess flag.\n hipDeviceCanAccessPeer can determine if a mapping is possible.\n\n Contexts that may open hipIpcMemHandles are restricted in the following way.\n hipIpcMemHandles from each device in a given process may only be opened\n by one context per device per other process.\n\n Memory returned from hipIpcOpenMemHandle must be freed with\n hipIpcCloseMemHandle.\n\n Calling hipFree on an exported memory region before calling\n hipIpcCloseMemHandle in the importing context will result in undefined\n behavior.\n\n @param devPtr - Returned device pointer\n @param handle - hipIpcMemHandle to open\n @param flags  - Flags for this operation. Must be specified as hipIpcMemLazyEnablePeerAccess\n\n @returns\n #hipSuccess,\n #hipErrorMapFailed,\n #hipErrorInvalidHandle,\n #hipErrorTooManyPeers\n\n @note During multiple processes, using the same memory handle opened by the current context,\n there is no guarantee that the same device poiter will be returned in @p *devPtr.\n This is diffrent from CUDA.\n @note This IPC memory related feature API on Windows may behave differently from Linux.\n"]
+    #[doc = " @brief Opens an interprocess memory handle exported from another process\n          and returns a device pointer usable in the local process.\n\n Maps memory exported from another process with hipIpcGetMemHandle into\n the current device address space. For contexts on different devices\n hipIpcOpenMemHandle can attempt to enable peer access between the\n devices as if the user called hipDeviceEnablePeerAccess. This behavior is\n controlled by the hipIpcMemLazyEnablePeerAccess flag.\n hipDeviceCanAccessPeer can determine if a mapping is possible.\n\n Contexts that may open hipIpcMemHandles are restricted in the following way.\n hipIpcMemHandles from each device in a given process may only be opened\n by one context per device per other process.\n\n Memory returned from hipIpcOpenMemHandle must be freed with\n hipIpcCloseMemHandle.\n\n Calling hipFree on an exported memory region before calling\n hipIpcCloseMemHandle in the importing context will result in undefined\n behavior.\n\n @param devPtr - Returned device pointer\n @param handle - hipIpcMemHandle to open\n @param flags  - Flags for this operation. Must be specified as hipIpcMemLazyEnablePeerAccess\n\n @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidContext,\n  #hipErrorInvalidDevicePointer\n\n @note During multiple processes, using the same memory handle opened by the current context,\n there is no guarantee that the same device poiter will be returned in @p *devPtr.\n This is diffrent from CUDA.\n @note This IPC memory related feature API on Windows may behave differently from Linux.\n"]
     pub fn hipIpcOpenMemHandle(
         devPtr: *mut *mut ::std::os::raw::c_void,
         handle: hipIpcMemHandle_t,
@@ -3777,7 +4048,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Close memory mapped with hipIpcOpenMemHandle\n\n Unmaps memory returnd by hipIpcOpenMemHandle. The original allocation\n in the exporting process as well as imported mappings in other processes\n will be unaffected.\n\n Any resources used to enable peer access will be freed if this is the\n last mapping using them.\n\n @param devPtr - Device pointer returned by hipIpcOpenMemHandle\n\n @returns\n #hipSuccess,\n #hipErrorMapFailed,\n #hipErrorInvalidHandle\n\n @note This IPC memory related feature API on Windows may behave differently from Linux.\n"]
+    #[doc = " @brief Close memory mapped with hipIpcOpenMemHandle\n\n Unmaps memory returnd by hipIpcOpenMemHandle. The original allocation\n in the exporting process as well as imported mappings in other processes\n will be unaffected.\n\n Any resources used to enable peer access will be freed if this is the\n last mapping using them.\n\n @param devPtr - Device pointer returned by hipIpcOpenMemHandle\n\n @returns #hipSuccess, #hipErrorMapFailed, #hipErrorInvalidHandle\n\n @note This IPC memory related feature API on Windows may behave differently from Linux.\n"]
     pub fn hipIpcCloseMemHandle(devPtr: *mut ::std::os::raw::c_void) -> hipError_t;
 }
 extern "C" {
@@ -3821,6 +4092,11 @@ extern "C" {
     #[doc = " @}\n/\n/**\n-------------------------------------------------------------------------------------------------\n-------------------------------------------------------------------------------------------------\n  @defgroup Error Error Handling\n  @{\n  This section describes the error handling functions of HIP runtime API.\n/\n/**\n @brief Return last error returned by any HIP runtime API call and resets the stored error code to\n #hipSuccess\n\n @returns return code from last HIP called from the active host thread\n\n Returns the last error that has been returned by any of the runtime calls in the same host\n thread, and then resets the saved error to #hipSuccess.\n\n @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t"]
     pub fn hipGetLastError() -> hipError_t;
 }
+extern "C" {
+    #[must_use]
+    #[doc = " @brief Return last error returned by any HIP runtime API call and resets the stored error code to\n #hipSuccess\n\n @returns return code from last HIP called from the active host thread\n\n Returns the last error that has been returned by any of the runtime calls in the same host\n thread, and then resets the saved error to #hipSuccess.\n\n @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t"]
+    pub fn hipExtGetLastError() -> hipError_t;
+}
 extern "C" {
     #[must_use]
     #[doc = " @brief Return last error returned by any HIP runtime API call.\n\n @return #hipSuccess\n\n Returns the last error that has been returned by any of the runtime calls in the same host\n thread. Unlike hipGetLastError, this function does not reset the saved error code.\n\n @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t"]
@@ -3874,7 +4150,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Returns numerical values that correspond to the least and greatest stream priority.\n\n @param[in, out] leastPriority pointer in which value corresponding to least priority is returned.\n @param[in, out] greatestPriority pointer in which value corresponding to greatest priority is returned.\n\n Returns in *leastPriority and *greatestPriority the numerical values that correspond to the least\n and greatest stream priority respectively. Stream priorities follow a convention where lower numbers\n imply greater priorities. The range of meaningful stream priorities is given by\n [*greatestPriority, *leastPriority]. If the user attempts to create a stream with a priority value\n that is outside the the meaningful range as specified by this API, the priority is automatically\n clamped to within the valid range."]
+    #[doc = " @brief Returns numerical values that correspond to the least and greatest stream priority.\n\n @param[in, out] leastPriority pointer in which value corresponding to least priority is returned.\n @param[in, out] greatestPriority pointer in which value corresponding to greatest priority is returned.\n @returns #hipSuccess\n\n Returns in *leastPriority and *greatestPriority the numerical values that correspond to the least\n and greatest stream priority respectively. Stream priorities follow a convention where lower numbers\n imply greater priorities. The range of meaningful stream priorities is given by\n [*greatestPriority, *leastPriority]. If the user attempts to create a stream with a priority value\n that is outside the the meaningful range as specified by this API, the priority is automatically\n clamped to within the valid range."]
     pub fn hipDeviceGetStreamPriorityRange(
         leastPriority: *mut ::std::os::raw::c_int,
         greatestPriority: *mut ::std::os::raw::c_int,
@@ -3882,17 +4158,17 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Destroys the specified stream.\n\n @param[in] stream stream identifier.\n @return #hipSuccess #hipErrorInvalidHandle\n\n Destroys the specified stream.\n\n If commands are still executing on the specified stream, some may complete execution before the\n queue is deleted.\n\n The queue may be destroyed while some commands are still inflight, or may wait for all commands\n queued to the stream before destroying it.\n\n @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamQuery, hipStreamWaitEvent,\n hipStreamSynchronize"]
+    #[doc = " @brief Destroys the specified stream.\n\n @param[in] stream stream identifier.\n @return #hipSuccess #hipErrorInvalidHandle\n\n Destroys the specified stream.\n\n If commands are still executing on the specified stream, some may complete execution before the\n queue is deleted.\n\n The queue may be destroyed while some commands are still inflight, or may wait for all commands\n queued to the stream before destroying it.\n\n @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamQuery,\n hipStreamWaitEvent, hipStreamSynchronize"]
     pub fn hipStreamDestroy(stream: hipStream_t) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Return #hipSuccess if all of the operations in the specified @p stream have completed, or\n #hipErrorNotReady if not.\n\n @param[in] stream stream to query\n\n @return #hipSuccess, #hipErrorNotReady, #hipErrorInvalidHandle\n\n This is thread-safe and returns a snapshot of the current state of the queue.  However, if other\n host threads are sending work to the stream, the status may change immediately after the function\n is called.  It is typically used for debug.\n\n @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamWaitEvent, hipStreamSynchronize,\n hipStreamDestroy"]
+    #[doc = " @brief Return #hipSuccess if all of the operations in the specified @p stream have completed, or\n #hipErrorNotReady if not.\n\n @param[in] stream stream to query\n\n @return #hipSuccess, #hipErrorNotReady, #hipErrorInvalidHandle\n\n This is thread-safe and returns a snapshot of the current state of the queue.  However, if other\n host threads are sending work to the stream, the status may change immediately after the function\n is called.  It is typically used for debug.\n\n @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamWaitEvent,\n hipStreamSynchronize, hipStreamDestroy"]
     pub fn hipStreamQuery(stream: hipStream_t) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Wait for all commands in stream to complete.\n\n @param[in] stream stream identifier.\n\n @return #hipSuccess, #hipErrorInvalidHandle\n\n This command is host-synchronous : the host will block until the specified stream is empty.\n\n This command follows standard null-stream semantics.  Specifically, specifying the null stream\n will cause the command to wait for other streams on the same device to complete all pending\n operations.\n\n This command honors the hipDeviceLaunchBlocking flag, which controls whether the wait is active\n or blocking.\n\n @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamWaitEvent, hipStreamDestroy\n"]
+    #[doc = " @brief Wait for all commands in stream to complete.\n\n @param[in] stream stream identifier.\n\n @return #hipSuccess, #hipErrorInvalidHandle\n\n This command is host-synchronous : the host will block until the specified stream is empty.\n\n This command follows standard null-stream semantics.  Specifically, specifying the null stream\n will cause the command to wait for other streams on the same device to complete all pending\n operations.\n\n This command honors the hipDeviceLaunchBlocking flag, which controls whether the wait is active\n or blocking.\n\n @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamWaitEvent,\n hipStreamDestroy\n"]
     pub fn hipStreamSynchronize(stream: hipStream_t) -> hipError_t;
 }
 extern "C" {
@@ -3961,7 +4237,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @}\n/\n/**\n-------------------------------------------------------------------------------------------------\n-------------------------------------------------------------------------------------------------\n  @defgroup StreamM Stream Memory Operations\n  @{\n  This section describes Stream Memory Wait and Write functions of HIP runtime API.\n/\n/**\n @brief Enqueues a wait command to the stream.[BETA]\n\n @param [in] stream - Stream identifier\n @param [in] ptr    - Pointer to memory object allocated using 'hipMallocSignalMemory' flag\n @param [in] value  - Value to be used in compare operation\n @param [in] flags  - Defines the compare operation, supported values are hipStreamWaitValueGte\n hipStreamWaitValueEq, hipStreamWaitValueAnd and hipStreamWaitValueNor\n @param [in] mask   - Mask to be applied on value at memory before it is compared with value,\n default value is set to enable every bit\n\n @returns #hipSuccess, #hipErrorInvalidValue\n\n Enqueues a wait command to the stream, all operations enqueued  on this stream after this, will\n not execute until the defined wait condition is true.\n\n hipStreamWaitValueGte: waits until *ptr&mask >= value\n hipStreamWaitValueEq : waits until *ptr&mask == value\n hipStreamWaitValueAnd: waits until ((*ptr&mask) & value) != 0\n hipStreamWaitValueNor: waits until ~((*ptr&mask) | (value&mask)) != 0\n\n @note when using 'hipStreamWaitValueNor', mask is applied on both 'value' and '*ptr'.\n\n @note Support for hipStreamWaitValue32 can be queried using 'hipDeviceGetAttribute()' and\n 'hipDeviceAttributeCanUseStreamWaitValue' flag.\n\n @beta This API is marked as beta, meaning, while this is feature complete,\n it is still open to changes and may have outstanding issues.\n\n @see hipExtMallocWithFlags, hipFree, hipStreamWaitValue64, hipStreamWriteValue64,\n hipStreamWriteValue32, hipDeviceGetAttribute"]
+    #[doc = " @}\n/\n/**\n-------------------------------------------------------------------------------------------------\n-------------------------------------------------------------------------------------------------\n  @defgroup StreamM Stream Memory Operations\n  @{\n  This section describes Stream Memory Wait and Write functions of HIP runtime API.\n/\n/**\n @brief Enqueues a wait command to the stream.[BETA]\n\n @param [in] stream - Stream identifier\n @param [in] ptr    - Pointer to memory object allocated using 'hipMallocSignalMemory' flag\n @param [in] value  - Value to be used in compare operation\n @param [in] flags  - Defines the compare operation, supported values are hipStreamWaitValueGte\n hipStreamWaitValueEq, hipStreamWaitValueAnd and hipStreamWaitValueNor\n @param [in] mask   - Mask to be applied on value at memory before it is compared with value,\n default value is set to enable every bit\n\n @returns #hipSuccess, #hipErrorInvalidValue\n\n Enqueues a wait command to the stream, all operations enqueued  on this stream after this, will\n not execute until the defined wait condition is true.\n\n hipStreamWaitValueGte: waits until *ptr&mask >= value\n hipStreamWaitValueEq : waits until *ptr&mask == value\n hipStreamWaitValueAnd: waits until ((*ptr&mask) & value) != 0\n hipStreamWaitValueNor: waits until ~((*ptr&mask) | (value&mask)) != 0\n\n @note when using 'hipStreamWaitValueNor', mask is applied on both 'value' and '*ptr'.\n\n @note Support for hipStreamWaitValue32 can be queried using 'hipDeviceGetAttribute()' and\n 'hipDeviceAttributeCanUseStreamWaitValue' flag.\n\n @warning This API is marked as beta, meaning, while this is feature complete,\n it is still open to changes and may have outstanding issues.\n\n @see hipExtMallocWithFlags, hipFree, hipStreamWaitValue64, hipStreamWriteValue64,\n hipStreamWriteValue32, hipDeviceGetAttribute"]
     pub fn hipStreamWaitValue32(
         stream: hipStream_t,
         ptr: *mut ::std::os::raw::c_void,
@@ -3972,7 +4248,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Enqueues a wait command to the stream.[BETA]\n\n @param [in] stream - Stream identifier\n @param [in] ptr    - Pointer to memory object allocated using 'hipMallocSignalMemory' flag\n @param [in] value  - Value to be used in compare operation\n @param [in] flags  - Defines the compare operation, supported values are hipStreamWaitValueGte\n hipStreamWaitValueEq, hipStreamWaitValueAnd and hipStreamWaitValueNor.\n @param [in] mask   - Mask to be applied on value at memory before it is compared with value\n default value is set to enable every bit\n\n @returns #hipSuccess, #hipErrorInvalidValue\n\n Enqueues a wait command to the stream, all operations enqueued  on this stream after this, will\n not execute until the defined wait condition is true.\n\n hipStreamWaitValueGte: waits until *ptr&mask >= value\n hipStreamWaitValueEq : waits until *ptr&mask == value\n hipStreamWaitValueAnd: waits until ((*ptr&mask) & value) != 0\n hipStreamWaitValueNor: waits until ~((*ptr&mask) | (value&mask)) != 0\n\n @note when using 'hipStreamWaitValueNor', mask is applied on both 'value' and '*ptr'.\n\n @note Support for hipStreamWaitValue64 can be queried using 'hipDeviceGetAttribute()' and\n 'hipDeviceAttributeCanUseStreamWaitValue' flag.\n\n @beta This API is marked as beta, meaning, while this is feature complete,\n it is still open to changes and may have outstanding issues.\n\n @see hipExtMallocWithFlags, hipFree, hipStreamWaitValue32, hipStreamWriteValue64,\n hipStreamWriteValue32, hipDeviceGetAttribute"]
+    #[doc = " @brief Enqueues a wait command to the stream.[BETA]\n\n @param [in] stream - Stream identifier\n @param [in] ptr    - Pointer to memory object allocated using 'hipMallocSignalMemory' flag\n @param [in] value  - Value to be used in compare operation\n @param [in] flags  - Defines the compare operation, supported values are hipStreamWaitValueGte\n hipStreamWaitValueEq, hipStreamWaitValueAnd and hipStreamWaitValueNor.\n @param [in] mask   - Mask to be applied on value at memory before it is compared with value\n default value is set to enable every bit\n\n @returns #hipSuccess, #hipErrorInvalidValue\n\n Enqueues a wait command to the stream, all operations enqueued  on this stream after this, will\n not execute until the defined wait condition is true.\n\n hipStreamWaitValueGte: waits until *ptr&mask >= value\n hipStreamWaitValueEq : waits until *ptr&mask == value\n hipStreamWaitValueAnd: waits until ((*ptr&mask) & value) != 0\n hipStreamWaitValueNor: waits until ~((*ptr&mask) | (value&mask)) != 0\n\n @note when using 'hipStreamWaitValueNor', mask is applied on both 'value' and '*ptr'.\n\n @note Support for hipStreamWaitValue64 can be queried using 'hipDeviceGetAttribute()' and\n 'hipDeviceAttributeCanUseStreamWaitValue' flag.\n\n @warning This API is marked as beta, meaning, while this is feature complete,\n it is still open to changes and may have outstanding issues.\n\n @see hipExtMallocWithFlags, hipFree, hipStreamWaitValue32, hipStreamWriteValue64,\n hipStreamWriteValue32, hipDeviceGetAttribute"]
     pub fn hipStreamWaitValue64(
         stream: hipStream_t,
         ptr: *mut ::std::os::raw::c_void,
@@ -3983,7 +4259,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Enqueues a write command to the stream.[BETA]\n\n @param [in] stream - Stream identifier\n @param [in] ptr    - Pointer to a GPU accessible memory object\n @param [in] value  - Value to be written\n @param [in] flags  - reserved, ignored for now, will be used in future releases\n\n @returns #hipSuccess, #hipErrorInvalidValue\n\n Enqueues a write command to the stream, write operation is performed after all earlier commands\n on this stream have completed the execution.\n\n @beta This API is marked as beta, meaning, while this is feature complete,\n it is still open to changes and may have outstanding issues.\n\n @see hipExtMallocWithFlags, hipFree, hipStreamWriteValue32, hipStreamWaitValue32,\n hipStreamWaitValue64"]
+    #[doc = " @brief Enqueues a write command to the stream.[BETA]\n\n @param [in] stream - Stream identifier\n @param [in] ptr    - Pointer to a GPU accessible memory object\n @param [in] value  - Value to be written\n @param [in] flags  - reserved, ignored for now, will be used in future releases\n\n @returns #hipSuccess, #hipErrorInvalidValue\n\n Enqueues a write command to the stream, write operation is performed after all earlier commands\n on this stream have completed the execution.\n\n @warning This API is marked as beta, meaning, while this is feature complete,\n it is still open to changes and may have outstanding issues.\n\n @see hipExtMallocWithFlags, hipFree, hipStreamWriteValue32, hipStreamWaitValue32,\n hipStreamWaitValue64"]
     pub fn hipStreamWriteValue32(
         stream: hipStream_t,
         ptr: *mut ::std::os::raw::c_void,
@@ -3993,7 +4269,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Enqueues a write command to the stream.[BETA]\n\n @param [in] stream - Stream identifier\n @param [in] ptr    - Pointer to a GPU accessible memory object\n @param [in] value  - Value to be written\n @param [in] flags  - reserved, ignored for now, will be used in future releases\n\n @returns #hipSuccess, #hipErrorInvalidValue\n\n Enqueues a write command to the stream, write operation is performed after all earlier commands\n on this stream have completed the execution.\n\n @beta This API is marked as beta, meaning, while this is feature complete,\n it is still open to changes and may have outstanding issues.\n\n @see hipExtMallocWithFlags, hipFree, hipStreamWriteValue32, hipStreamWaitValue32,\n hipStreamWaitValue64"]
+    #[doc = " @brief Enqueues a write command to the stream.[BETA]\n\n @param [in] stream - Stream identifier\n @param [in] ptr    - Pointer to a GPU accessible memory object\n @param [in] value  - Value to be written\n @param [in] flags  - reserved, ignored for now, will be used in future releases\n\n @returns #hipSuccess, #hipErrorInvalidValue\n\n Enqueues a write command to the stream, write operation is performed after all earlier commands\n on this stream have completed the execution.\n\n @warning This API is marked as beta, meaning, while this is feature complete,\n it is still open to changes and may have outstanding issues.\n\n @see hipExtMallocWithFlags, hipFree, hipStreamWriteValue32, hipStreamWaitValue32,\n hipStreamWaitValue64"]
     pub fn hipStreamWriteValue64(
         stream: hipStream_t,
         ptr: *mut ::std::os::raw::c_void,
@@ -4025,7 +4301,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Wait for an event to complete.\n\n  This function will block until the event is ready, waiting for all previous work in the stream\n specified when event was recorded with hipEventRecord().\n\n  If hipEventRecord() has not been called on @p event, this function returns immediately.\n\n  TODO-hip- This function needs to support hipEventBlockingSync parameter.\n\n  @param[in] event Event on which to wait.\n  @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized,\n #hipErrorInvalidHandle, #hipErrorLaunchFailure\n\n  @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventDestroy, hipEventRecord,\n hipEventElapsedTime"]
+    #[doc = "  @brief Wait for an event to complete.\n\n  This function will block until the event is ready, waiting for all previous work in the stream\n specified when event was recorded with hipEventRecord().\n\n  If hipEventRecord() has not been called on @p event, this function returns #hipSuccess when no\n  event is captured.\n\n  This function needs to support hipEventBlockingSync parameter.\n\n  @param[in] event Event on which to wait.\n\n  @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized,\n #hipErrorInvalidHandle, #hipErrorLaunchFailure\n\n  @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventDestroy, hipEventRecord,\n hipEventElapsedTime"]
     pub fn hipEventSynchronize(event: hipEvent_t) -> hipError_t;
 }
 extern "C" {
@@ -4035,12 +4311,12 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Query event status\n\n @param[in] event Event to query.\n @returns #hipSuccess, #hipErrorNotReady, #hipErrorInvalidHandle, #hipErrorInvalidValue,\n #hipErrorNotInitialized, #hipErrorLaunchFailure\n\n Query the status of the specified event.  This function will return #hipSuccess if all\n commands in the appropriate stream (specified to hipEventRecord()) have completed.  If that work\n has not completed, or if hipEventRecord() was not called on the event, then #hipErrorNotReady is\n returned.\n\n @see hipEventCreate, hipEventCreateWithFlags, hipEventRecord, hipEventDestroy,\n hipEventSynchronize, hipEventElapsedTime"]
+    #[doc = " @brief Query event status\n\n @param[in] event Event to query.\n @returns #hipSuccess, #hipErrorNotReady, #hipErrorInvalidHandle, #hipErrorInvalidValue,\n #hipErrorNotInitialized, #hipErrorLaunchFailure\n\n Query the status of the specified event.  This function will return #hipSuccess if all\n commands in the appropriate stream (specified to hipEventRecord()) have completed.  If any execution\n has not completed, then #hipErrorNotReady is returned.\n\n @note: This API returns #hipSuccess, if hipEventRecord() is not called before this API.\n\n @see hipEventCreate, hipEventCreateWithFlags, hipEventRecord, hipEventDestroy,\n hipEventSynchronize, hipEventElapsedTime"]
     pub fn hipEventQuery(event: hipEvent_t) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Sets information on the specified pointer.[BETA]\n\n  @param [in]      value     sets pointer attribute value\n  @param [in]      atribute attribute to set\n  @param [in]      ptr      pointer to set attributes for\n\n  @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue\n\n  @beta This API is marked as beta, meaning, while this is feature complete,\n  it is still open to changes and may have outstanding issues.\n"]
+    #[doc = "  @brief Sets information on the specified pointer.[BETA]\n\n  @param [in]      value     Sets pointer attribute value\n  @param [in]      attribute  Attribute to set\n  @param [in]      ptr      Pointer to set attributes for\n\n  @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue\n\n  @warning This API is marked as beta, meaning, while this is feature complete,\n  it is still open to changes and may have outstanding issues.\n"]
     pub fn hipPointerSetAttribute(
         value: *const ::std::os::raw::c_void,
         attribute: hipPointer_attribute,
@@ -4049,7 +4325,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Return attributes for the specified pointer\n\n  @param [out]  attributes  attributes for the specified pointer\n  @param [in]   ptr         pointer to get attributes for\n\n  @note  To get pointer's memory type, the parameter attributes has 'type' as member variable.\n  The 'type' indicates input pointer is allocated on device or host. That means the input\n  pointer must be returned or passed through an HIP API such as hipHostMalloc, hipMallocManaged,\n  hipHostRegister, etc. Otherwise, the pointer couldn't be handled by this API and attributes\n  returned hipErrorInvalidValue, due to the hipMemoryType enums values, unrecognized memory type\n  is currently not supported due to HIP functionality backward compatibility.\n\n  @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue\n\n  @see hipPointerGetAttribute\n"]
+    #[doc = "  @brief Returns attributes for the specified pointer\n\n  @param [out]  attributes  attributes for the specified pointer\n  @param [in]   ptr         pointer to get attributes for\n\n  The output parameter 'attributes' has a member named 'type' that describes what memory the\n  pointer is associated with, such as device memory, host memory, managed memory, and others.\n  Otherwise, the API cannot handle the pointer and returns #hipErrorInvalidValue.\n\n  @note  The unrecognized memory type is unsupported to keep the HIP functionality backward\n  compatibility due to #hipMemoryType enum values.\n\n  @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue\n\n  @note  The current behavior of this HIP API corresponds to the CUDA API before version 11.0.\n\n  @see hipPointerGetAttribute"]
     pub fn hipPointerGetAttributes(
         attributes: *mut hipPointerAttribute_t,
         ptr: *const ::std::os::raw::c_void,
@@ -4057,7 +4333,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Returns information about the specified pointer.[BETA]\n\n  @param [in, out] data     returned pointer attribute value\n  @param [in]      atribute attribute to query for\n  @param [in]      ptr      pointer to get attributes for\n\n  @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue\n\n  @beta This API is marked as beta, meaning, while this is feature complete,\n  it is still open to changes and may have outstanding issues.\n\n  @see hipPointerGetAttributes"]
+    #[doc = "  @brief Returns information about the specified pointer.[BETA]\n\n  @param [in, out] data     Returned pointer attribute value\n  @param [in]      attribute  Attribute to query for\n  @param [in]      ptr      Pointer to get attributes for\n\n  @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue\n\n  @warning This API is marked as beta, meaning, while this is feature complete,\n  it is still open to changes and may have outstanding issues.\n\n  @see hipPointerGetAttributes"]
     pub fn hipPointerGetAttribute(
         data: *mut ::std::os::raw::c_void,
         attribute: hipPointer_attribute,
@@ -4066,7 +4342,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Returns information about the specified pointer.[BETA]\n\n  @param [in]  numAttributes   number of attributes to query for\n  @param [in]  attributes      attributes to query for\n  @param [in, out] data        a two-dimensional containing pointers to memory locations\n                               where the result of each attribute query will be written to\n  @param [in]  ptr             pointer to get attributes for\n\n  @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue\n\n  @beta This API is marked as beta, meaning, while this is feature complete,\n  it is still open to changes and may have outstanding issues.\n\n  @see hipPointerGetAttribute"]
+    #[doc = "  @brief Returns information about the specified pointer.[BETA]\n\n  @param [in]  numAttributes   number of attributes to query for\n  @param [in]  attributes      attributes to query for\n  @param [in, out] data        a two-dimensional containing pointers to memory locations\n                               where the result of each attribute query will be written to\n  @param [in]  ptr             pointer to get attributes for\n\n  @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue\n\n  @warning This API is marked as beta, meaning, while this is feature complete,\n  it is still open to changes and may have outstanding issues.\n\n  @see hipPointerGetAttribute"]
     pub fn hipDrvPointerGetAttributes(
         numAttributes: ::std::os::raw::c_uint,
         attributes: *mut hipPointer_attribute,
@@ -4084,7 +4360,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Signals a set of external semaphore objects.\n\n  @param[in] extSem_out  External semaphores to be waited on\n  @param[in] paramsArray Array of semaphore parameters\n  @param[in] numExtSems Number of semaphores to wait on\n  @param[in] stream Stream to enqueue the wait operations in\n\n  @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue\n\n  @see"]
+    #[doc = "  @brief Signals a set of external semaphore objects.\n\n  @param[in] extSemArray  External semaphores to be waited on\n  @param[in] paramsArray Array of semaphore parameters\n  @param[in] numExtSems Number of semaphores to wait on\n  @param[in] stream Stream to enqueue the wait operations in\n\n  @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue\n\n  @see"]
     pub fn hipSignalExternalSemaphoresAsync(
         extSemArray: *const hipExternalSemaphore_t,
         paramsArray: *const hipExternalSemaphoreSignalParams,
@@ -4094,7 +4370,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Waits on a set of external semaphore objects\n\n  @param[in] extSem_out  External semaphores to be waited on\n  @param[in] paramsArray Array of semaphore parameters\n  @param[in] numExtSems Number of semaphores to wait on\n  @param[in] stream Stream to enqueue the wait operations in\n\n  @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue\n\n  @see"]
+    #[doc = "  @brief Waits on a set of external semaphore objects\n\n  @param[in] extSemArray  External semaphores to be waited on\n  @param[in] paramsArray Array of semaphore parameters\n  @param[in] numExtSems Number of semaphores to wait on\n  @param[in] stream Stream to enqueue the wait operations in\n\n  @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue\n\n  @see"]
     pub fn hipWaitExternalSemaphoresAsync(
         extSemArray: *const hipExternalSemaphore_t,
         paramsArray: *const hipExternalSemaphoreWaitParams,
@@ -4126,17 +4402,26 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Destroys an external memory object.\n\n  @param[in] extMem  External memory object to be destroyed\n\n  @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue\n\n  @see"]
+    #[doc = "  @brief Destroys an external memory object.\n\n  @param[in] extMem  External memory object to be destroyed\n\n  @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue\n\n  @see"]
     pub fn hipDestroyExternalMemory(extMem: hipExternalMemory_t) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @}"]
+    #[doc = "  @brief Maps a mipmapped array onto an external memory object.\n\n  @param[out] mipmap mipmapped array to return\n  @param[in]  extMem external memory object handle\n  @param[in]  mipmapDesc external mipmapped array descriptor\n\n  Returned mipmapped array must be freed using hipFreeMipmappedArray.\n\n  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidResourceHandle\n\n  @see hipImportExternalMemory, hipDestroyExternalMemory, hipExternalMemoryGetMappedBuffer, hipFreeMipmappedArray"]
+    pub fn hipExternalMemoryGetMappedMipmappedArray(
+        mipmap: *mut hipMipmappedArray_t,
+        extMem: hipExternalMemory_t,
+        mipmapDesc: *const hipExternalMemoryMipmappedArrayDesc,
+    ) -> hipError_t;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @}\n/\n/**\n  @brief Allocate memory on the default accelerator\n\n  @param[out] ptr Pointer to the allocated memory\n  @param[in]  size Requested memory size\n\n  If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.\n\n  @return #hipSuccess, #hipErrorOutOfMemory, #hipErrorInvalidValue (bad context, null *ptr)\n\n  @see hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D, hipMalloc3DArray,\n hipHostFree, hipHostMalloc"]
     pub fn hipMalloc(ptr: *mut *mut ::std::os::raw::c_void, size: usize) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Allocate memory on the default accelerator\n\n  @param[out] ptr Pointer to the allocated memory\n  @param[in]  size Requested memory size\n  @param[in]  flags Type of memory allocation\n\n  If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.\n\n  @return #hipSuccess, #hipErrorOutOfMemory, #hipErrorInvalidValue (bad context, null *ptr)\n\n  @see hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D, hipMalloc3DArray,\n hipHostFree, hipHostMalloc"]
+    #[doc = "  @brief Allocate memory on the default accelerator\n\n  @param[out] ptr  Pointer to the allocated memory\n  @param[in]  sizeBytes  Requested memory size\n  @param[in]  flags  Type of memory allocation\n\n  If requested memory size is 0, no memory is allocated, *ptr returns nullptr, and #hipSuccess\n  is returned.\n\n  The memory allocation flag should be either #hipDeviceMallocDefault,\n  #hipDeviceMallocFinegrained, #hipDeviceMallocUncached, or #hipMallocSignalMemory.\n  If the flag is any other value, the API returns #hipErrorInvalidValue.\n\n  @return #hipSuccess, #hipErrorOutOfMemory, #hipErrorInvalidValue (bad context, null *ptr)\n\n  @see hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D, hipMalloc3DArray,\n hipHostFree, hipHostMalloc"]
     pub fn hipExtMallocWithFlags(
         ptr: *mut *mut ::std::os::raw::c_void,
         sizeBytes: usize,
@@ -4145,7 +4430,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Allocate pinned host memory [Deprecated]\n\n  @param[out] ptr Pointer to the allocated host pinned memory\n  @param[in]  size Requested memory size\n\n  If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.\n\n  @return #hipSuccess, #hipErrorOutOfMemory\n\n  @warning  This API is deprecated use hipHostMalloc() instead"]
+    #[doc = "  @brief Allocate pinned host memory [Deprecated]\n\n  @param[out] ptr Pointer to the allocated host pinned memory\n  @param[in]  size Requested memory size\n\n  If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.\n\n  @return #hipSuccess, #hipErrorOutOfMemory\n\n  @warning  This API is deprecated, use hipHostMalloc() instead"]
     pub fn hipMallocHost(ptr: *mut *mut ::std::os::raw::c_void, size: usize) -> hipError_t;
 }
 extern "C" {
@@ -4183,7 +4468,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Advise about the usage of a given memory range to HIP.\n\n @param [in] dev_ptr  pointer to memory to set the advice for\n @param [in] count    size in bytes of the memory range, it should be CPU page size alligned.\n @param [in] advice   advice to be applied for the specified memory range\n @param [in] device   device to apply the advice for\n\n @returns #hipSuccess, #hipErrorInvalidValue\n\n This HIP API advises about the usage to be applied on unified memory allocation in the\n range starting from the pointer address devPtr, with the size of count bytes. The memory range\n must refer to managed memory allocated via the API hipMallocManaged, and the range will be\n handled with proper round down and round up respectively in the driver to be aligned to\n CPU page size.\n\n @note  This API is implemented on Linux, under development on Windows."]
+    #[doc = " @brief Advise about the usage of a given memory range to HIP.\n\n @param [in] dev_ptr  pointer to memory to set the advice for\n @param [in] count    size in bytes of the memory range, it should be CPU page size alligned.\n @param [in] advice   advice to be applied for the specified memory range\n @param [in] device   device to apply the advice for\n\n @returns #hipSuccess, #hipErrorInvalidValue\n\n This HIP API advises about the usage to be applied on unified memory allocation in the\n range starting from the pointer address devPtr, with the size of count bytes.\n The memory range must refer to managed memory allocated via the API hipMallocManaged, and the\n range will be handled with proper round down and round up respectively in the driver to\n be aligned to CPU page size, the same way as corresponding CUDA API behaves in CUDA version 8.0\n and afterwards.\n\n @note  This API is implemented on Linux and is under development on Windows."]
     pub fn hipMemAdvise(
         dev_ptr: *const ::std::os::raw::c_void,
         count: usize,
@@ -4350,7 +4635,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Get Device pointer from Host Pointer allocated through hipHostMalloc\n\n  @param[out] dstPtr Device Pointer mapped to passed host pointer\n  @param[in]  hstPtr Host Pointer allocated through hipHostMalloc\n  @param[in]  flags Flags to be passed for extension\n\n  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorOutOfMemory\n\n  @see hipSetDeviceFlags, hipHostMalloc"]
+    #[doc = "  @brief Get Device pointer from Host Pointer allocated through hipHostMalloc\n\n  @param[out] devPtr Device Pointer mapped to passed host pointer\n  @param[in]  hstPtr Host Pointer allocated through hipHostMalloc\n  @param[in]  flags Flags to be passed for extension\n\n  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorOutOfMemory\n\n  @see hipSetDeviceFlags, hipHostMalloc"]
     pub fn hipHostGetDevicePointer(
         devPtr: *mut *mut ::std::os::raw::c_void,
         hstPtr: *mut ::std::os::raw::c_void,
@@ -4367,7 +4652,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Register host memory so it can be accessed from the current device.\n\n  @param[out] hostPtr Pointer to host memory to be registered.\n  @param[in] sizeBytes size of the host memory\n  @param[in] flags.  See below.\n\n  Flags:\n  - #hipHostRegisterDefault   Memory is Mapped and Portable\n  - #hipHostRegisterPortable  Memory is considered registered by all contexts.  HIP only supports\n one context so this is always assumed true.\n  - #hipHostRegisterMapped    Map the allocation into the address space for the current device.\n The device pointer can be obtained with #hipHostGetDevicePointer.\n\n\n  After registering the memory, use #hipHostGetDevicePointer to obtain the mapped device pointer.\n  On many systems, the mapped device pointer will have a different value than the mapped host\n pointer.  Applications must use the device pointer in device code, and the host pointer in device\n code.\n\n  On some systems, registered memory is pinned.  On some systems, registered memory may not be\n actually be pinned but uses OS or hardware facilities to all GPU access to the host memory.\n\n  Developers are strongly encouraged to register memory blocks which are aligned to the host\n cache-line size. (typically 64-bytes but can be obtains from the CPUID instruction).\n\n  If registering non-aligned pointers, the application must take care when register pointers from\n the same cache line on different devices.  HIP's coarse-grained synchronization model does not\n guarantee correct results if different devices write to different parts of the same cache block -\n typically one of the writes will \"win\" and overwrite data from the other registered memory\n region.\n\n  @return #hipSuccess, #hipErrorOutOfMemory\n\n  @see hipHostUnregister, hipHostGetFlags, hipHostGetDevicePointer"]
+    #[doc = "  @brief Register host memory so it can be accessed from the current device.\n\n  @param[out] hostPtr Pointer to host memory to be registered.\n  @param[in] sizeBytes Size of the host memory\n  @param[in] flags  See below.\n\n  Flags:\n  - #hipHostRegisterDefault   Memory is Mapped and Portable\n  - #hipHostRegisterPortable  Memory is considered registered by all contexts.  HIP only supports\n one context so this is always assumed true.\n  - #hipHostRegisterMapped    Map the allocation into the address space for the current device.\n The device pointer can be obtained with #hipHostGetDevicePointer.\n\n\n  After registering the memory, use #hipHostGetDevicePointer to obtain the mapped device pointer.\n  On many systems, the mapped device pointer will have a different value than the mapped host\n pointer.  Applications must use the device pointer in device code, and the host pointer in device\n code.\n\n  On some systems, registered memory is pinned.  On some systems, registered memory may not be\n actually be pinned but uses OS or hardware facilities to all GPU access to the host memory.\n\n  Developers are strongly encouraged to register memory blocks which are aligned to the host\n cache-line size. (typically 64-bytes but can be obtains from the CPUID instruction).\n\n  If registering non-aligned pointers, the application must take care when register pointers from\n the same cache line on different devices.  HIP's coarse-grained synchronization model does not\n guarantee correct results if different devices write to different parts of the same cache block -\n typically one of the writes will \"win\" and overwrite data from the other registered memory\n region.\n\n  @return #hipSuccess, #hipErrorOutOfMemory\n\n  @see hipHostUnregister, hipHostGetFlags, hipHostGetDevicePointer"]
     pub fn hipHostRegister(
         hostPtr: *mut ::std::os::raw::c_void,
         sizeBytes: usize,
@@ -4391,7 +4676,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  Allocates at least width (in bytes) * height bytes of linear memory\n  Padding may occur to ensure alighnment requirements are met for the given row\n  The change in width size due to padding will be returned in *pitch.\n  Currently the alignment is set to 128 bytes\n\n  @param[out] dptr Pointer to the allocated device memory\n  @param[out] pitch Pitch for allocation (in bytes)\n  @param[in]  width Requested pitched allocation width (in bytes)\n  @param[in]  height Requested pitched allocation height\n\n  If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.\n  The intended usage of pitch is as a separate parameter of the allocation, used to compute addresses within the 2D array.\n  Given the row and column of an array element of type T, the address is computed as:\n  T* pElement = (T*)((char*)BaseAddress + Row * Pitch) + Column;\n\n  @return Error code\n\n  @see hipMalloc, hipFree, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D,\n hipMalloc3DArray, hipHostMalloc"]
+    #[doc = "  Allocates at least width (in bytes) * height bytes of linear memory\n  Padding may occur to ensure alighnment requirements are met for the given row\n  The change in width size due to padding will be returned in *pitch.\n  Currently the alignment is set to 128 bytes\n\n  @param[out] dptr  Pointer to the allocated device memory\n  @param[out] pitch  Pitch for allocation (in bytes)\n  @param[in]  widthInBytes  Requested pitched allocation width (in bytes)\n  @param[in]  height  Requested pitched allocation height\n  @param[in]  elementSizeBytes  The size of element bytes, should be 4, 8 or 16\n\n  If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.\n  The intended usage of pitch is as a separate parameter of the allocation, used to compute addresses within the 2D array.\n  Given the row and column of an array element of type T, the address is computed as:\n  T* pElement = (T*)((char*)BaseAddress + Row * Pitch) + Column;\n\n  @return Error code\n\n  @see hipMalloc, hipFree, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D,\n hipMalloc3DArray, hipHostMalloc"]
     pub fn hipMemAllocPitch(
         dptr: *mut hipDeviceptr_t,
         pitch: *mut usize,
@@ -4407,7 +4692,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Free memory allocated by the hcc hip host memory allocation API [Deprecated]\n\n  @param[in] ptr Pointer to memory to be freed\n  @return #hipSuccess, #hipErrorInvalidValue (if pointer is invalid, including device pointers\n  allocated with hipMalloc)\n\n  @warning  This API is deprecated, use hipHostFree() instead"]
+    #[doc = "  @brief Free memory allocated by the hcc hip host memory allocation API [Deprecated]\n\n  @param[in] ptr Pointer to memory to be freed\n  @return #hipSuccess,\n          #hipErrorInvalidValue (if pointer is invalid, including device pointers allocated\n  with hipMalloc)\n\n  @warning  This API is deprecated, use hipHostFree() instead"]
     pub fn hipFreeHost(ptr: *mut ::std::os::raw::c_void) -> hipError_t;
 }
 extern "C" {
@@ -4417,7 +4702,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Copy data from src to dst.\n\n  It supports memory from host to device,\n  device to host, device to device and host to host\n  The src and dst must not overlap.\n\n  For hipMemcpy, the copy is always performed by the current device (set by hipSetDevice).\n  For multi-gpu or peer-to-peer configurations, it is recommended to set the current device to the\n  device where the src data is physically located. For optimal peer-to-peer copies, the copy device\n  must be able to access the src and dst pointers (by calling hipDeviceEnablePeerAccess with copy\n  agent as the current device and src/dest as the peerDevice argument.  if this is not done, the\n  hipMemcpy will still work, but will perform the copy using a staging buffer on the host.\n  Calling hipMemcpy with dst and src pointers that do not match the hipMemcpyKind results in\n  undefined behavior.\n\n  @param[out]  dst Data being copy to\n  @param[in]  src Data being copy from\n  @param[in]  sizeBytes Data size in bytes\n  @param[in]  copyType Memory copy type\n  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown\n\n  @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,\n hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,\n hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,\n hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,\n hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,\n hipMemHostAlloc, hipMemHostGetDevicePointer"]
+    #[doc = "  @brief Copy data from src to dst.\n\n  It supports memory from host to device,\n  device to host, device to device and host to host\n  The src and dst must not overlap.\n\n  For hipMemcpy, the copy is always performed by the current device (set by hipSetDevice).\n  For multi-gpu or peer-to-peer configurations, it is recommended to set the current device to the\n  device where the src data is physically located. For optimal peer-to-peer copies, the copy device\n  must be able to access the src and dst pointers (by calling hipDeviceEnablePeerAccess with copy\n  agent as the current device and src/dest as the peerDevice argument.  if this is not done, the\n  hipMemcpy will still work, but will perform the copy using a staging buffer on the host.\n  Calling hipMemcpy with dst and src pointers that do not match the hipMemcpyKind results in\n  undefined behavior.\n\n  @param[out]  dst Data being copy to\n  @param[in]  src Data being copy from\n  @param[in]  sizeBytes Data size in bytes\n  @param[in]  kind Kind of transfer\n  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorUnknown\n\n  @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,\n hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,\n hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,\n hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,\n hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,\n hipMemHostAlloc, hipMemHostGetDevicePointer"]
     pub fn hipMemcpy(
         dst: *mut ::std::os::raw::c_void,
         src: *const ::std::os::raw::c_void,
@@ -4427,7 +4712,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Memory copy on the stream.\n  It allows single or multiple devices to do memory copy on single or multiple streams.\n\n  @param[out]  dst Data being copy to\n  @param[in]  src Data being copy from\n  @param[in]  sizeBytes Data size in bytes\n  @param[in]  copyType Memory copy type\n  @param[in]  stream Valid stream\n  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown, #hipErrorContextIsDestroyed\n\n  @see hipMemcpy, hipStreamCreate, hipStreamSynchronize, hipStreamDestroy, hipSetDevice, hipLaunchKernelGGL\n"]
+    #[doc = "  @brief Memory copy on the stream.\n  It allows single or multiple devices to do memory copy on single or multiple streams.\n\n  @param[out]  dst Data being copy to\n  @param[in]  src Data being copy from\n  @param[in]  sizeBytes Data size in bytes\n  @param[in]  kind Kind of transfer\n  @param[in]  stream Valid stream\n  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorUnknown, #hipErrorContextIsDestroyed\n\n  @see hipMemcpy, hipStreamCreate, hipStreamSynchronize, hipStreamDestroy, hipSetDevice, hipLaunchKernelGGL\n"]
     pub fn hipMemcpyWithStream(
         dst: *mut ::std::os::raw::c_void,
         src: *const ::std::os::raw::c_void,
@@ -4461,7 +4746,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Copy data from Host to Device asynchronously\n\n  @param[out]  dst Data being copy to\n  @param[in]   src Data being copy from\n  @param[in]   sizeBytes Data size in bytes\n\n  @return #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,\n #hipErrorInvalidValue\n\n  @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,\n hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,\n hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,\n hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,\n hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,\n hipMemHostAlloc, hipMemHostGetDevicePointer"]
+    #[doc = "  @brief Copy data from Host to Device asynchronously\n\n  @param[out]  dst  Data being copy to\n  @param[in]   src  Data being copy from\n  @param[in]   sizeBytes  Data size in bytes\n  @param[in]   stream  Stream identifier\n\n  @return #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,\n #hipErrorInvalidValue\n\n  @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,\n hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,\n hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,\n hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,\n hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,\n hipMemHostAlloc, hipMemHostGetDevicePointer"]
     pub fn hipMemcpyHtoDAsync(
         dst: hipDeviceptr_t,
         src: *mut ::std::os::raw::c_void,
@@ -4471,7 +4756,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Copy data from Device to Host asynchronously\n\n  @param[out]  dst Data being copy to\n  @param[in]   src Data being copy from\n  @param[in]   sizeBytes Data size in bytes\n\n  @return #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,\n #hipErrorInvalidValue\n\n  @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,\n hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,\n hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,\n hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,\n hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,\n hipMemHostAlloc, hipMemHostGetDevicePointer"]
+    #[doc = "  @brief Copy data from Device to Host asynchronously\n\n  @param[out]  dst Data being copy to\n  @param[in]   src Data being copy from\n  @param[in]   sizeBytes Data size in bytes\n  @param[in]   stream  Stream identifier\n\n  @return #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,\n #hipErrorInvalidValue\n\n  @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,\n hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,\n hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,\n hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,\n hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,\n hipMemHostAlloc, hipMemHostGetDevicePointer"]
     pub fn hipMemcpyDtoHAsync(
         dst: *mut ::std::os::raw::c_void,
         src: hipDeviceptr_t,
@@ -4481,7 +4766,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Copy data from Device to Device asynchronously\n\n  @param[out]  dst Data being copy to\n  @param[in]   src Data being copy from\n  @param[in]   sizeBytes Data size in bytes\n\n  @return #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,\n #hipErrorInvalidValue\n\n  @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,\n hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,\n hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,\n hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,\n hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,\n hipMemHostAlloc, hipMemHostGetDevicePointer"]
+    #[doc = "  @brief Copy data from Device to Device asynchronously\n\n  @param[out]  dst  Data being copy to\n  @param[in]   src  Data being copy from\n  @param[in]   sizeBytes  Data size in bytes\n  @param[in]   stream  Stream identifier\n\n  @return #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,\n #hipErrorInvalidValue\n\n  @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,\n hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,\n hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,\n hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,\n hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,\n hipMemHostAlloc, hipMemHostGetDevicePointer"]
     pub fn hipMemcpyDtoDAsync(
         dst: hipDeviceptr_t,
         src: hipDeviceptr_t,
@@ -4514,7 +4799,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Copies data to the given symbol on the device.\n Symbol HIP APIs allow a kernel to define a device-side data symbol which can be accessed on\n the host side. The symbol can be in __constant or device space.\n Note that the symbol name needs to be encased in the HIP_SYMBOL macro.\n This also applies to hipMemcpyFromSymbol, hipGetSymbolAddress, and hipGetSymbolSize.\n For detail usage, see the example at\n https://github.com/ROCm-Developer-Tools/HIP/blob/rocm-5.0.x/docs/markdown/hip_porting_guide.md\n\n  @param[out]  symbol  pointer to the device symbole\n  @param[in]   src  pointer to the source address\n  @param[in]   sizeBytes  size in bytes to copy\n  @param[in]   offset  offset in bytes from start of symbole\n  @param[in]   kind  type of memory transfer\n\n  @return #hipSuccess, #hipErrorInvalidValue\n"]
+    #[doc = "  @brief Copies data to the given symbol on the device.\n Symbol HIP APIs allow a kernel to define a device-side data symbol which can be accessed on\n the host side. The symbol can be in __constant or device space.\n Note that the symbol name needs to be encased in the HIP_SYMBOL macro.\n This also applies to hipMemcpyFromSymbol, hipGetSymbolAddress, and hipGetSymbolSize.\n For detail usage, see the example at\n https://github.com/ROCm/HIP/blob/develop/docs/user_guide/hip_porting_guide.md\n\n  @param[out]  symbol  pointer to the device symbole\n  @param[in]   src  pointer to the source address\n  @param[in]   sizeBytes  size in bytes to copy\n  @param[in]   offset  offset in bytes from start of symbole\n  @param[in]   kind  type of memory transfer\n\n  @return #hipSuccess, #hipErrorInvalidValue\n"]
     pub fn hipMemcpyToSymbol(
         symbol: *const ::std::os::raw::c_void,
         src: *const ::std::os::raw::c_void,
@@ -4537,7 +4822,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Copies data from the given symbol on the device.\n\n  @param[out]  dptr  Returns pointer to destinition memory address\n  @param[in]   symbol  pointer to the symbole address on the device\n  @param[in]   sizeBytes  size in bytes to copy\n  @param[in]   offset  offset in bytes from the start of symbole\n  @param[in]   kind  type of memory transfer\n\n  @return #hipSuccess, #hipErrorInvalidValue\n"]
+    #[doc = "  @brief Copies data from the given symbol on the device.\n\n  @param[out]  dst  Returns pointer to destinition memory address\n  @param[in]   symbol  Pointer to the symbole address on the device\n  @param[in]   sizeBytes  Size in bytes to copy\n  @param[in]   offset  Offset in bytes from the start of symbole\n  @param[in]   kind  Type of memory transfer\n\n  @return #hipSuccess, #hipErrorInvalidValue\n"]
     pub fn hipMemcpyFromSymbol(
         dst: *mut ::std::os::raw::c_void,
         symbol: *const ::std::os::raw::c_void,
@@ -4548,7 +4833,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Copies data from the given symbol on the device asynchronously.\n\n  @param[out]  dptr  Returns pointer to destinition memory address\n  @param[in]   symbol  pointer to the symbole address on the device\n  @param[in]   sizeBytes  size in bytes to copy\n  @param[in]   offset  offset in bytes from the start of symbole\n  @param[in]   kind  type of memory transfer\n  @param[in]   stream  stream identifier\n\n  @return #hipSuccess, #hipErrorInvalidValue\n"]
+    #[doc = "  @brief Copies data from the given symbol on the device asynchronously.\n\n  @param[out]  dst  Returns pointer to destinition memory address\n  @param[in]   symbol  pointer to the symbole address on the device\n  @param[in]   sizeBytes  size in bytes to copy\n  @param[in]   offset  offset in bytes from the start of symbole\n  @param[in]   kind  type of memory transfer\n  @param[in]   stream  stream identifier\n\n  @return #hipSuccess, #hipErrorInvalidValue\n"]
     pub fn hipMemcpyFromSymbolAsync(
         dst: *mut ::std::os::raw::c_void,
         symbol: *const ::std::os::raw::c_void,
@@ -4560,7 +4845,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Copy data from src to dst asynchronously.\n\n  @warning If host or dest are not pinned, the memory copy will be performed synchronously.  For\n best performance, use hipHostMalloc to allocate host memory that is transferred asynchronously.\n\n  @warning on HCC hipMemcpyAsync does not support overlapped H2D and D2H copies.\n  For hipMemcpy, the copy is always performed by the device associated with the specified stream.\n\n  For multi-gpu or peer-to-peer configurations, it is recommended to use a stream which is a\n attached to the device where the src data is physically located. For optimal peer-to-peer copies,\n the copy device must be able to access the src and dst pointers (by calling\n hipDeviceEnablePeerAccess with copy agent as the current device and src/dest as the peerDevice\n argument.  if this is not done, the hipMemcpy will still work, but will perform the copy using a\n staging buffer on the host.\n\n  @param[out] dst Data being copy to\n  @param[in]  src Data being copy from\n  @param[in]  sizeBytes Data size in bytes\n  @param[in]  accelerator_view Accelerator view which the copy is being enqueued\n  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown\n\n  @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray,\n hipMemcpy2DFromArray, hipMemcpyArrayToArray, hipMemcpy2DArrayToArray, hipMemcpyToSymbol,\n hipMemcpyFromSymbol, hipMemcpy2DAsync, hipMemcpyToArrayAsync, hipMemcpy2DToArrayAsync,\n hipMemcpyFromArrayAsync, hipMemcpy2DFromArrayAsync, hipMemcpyToSymbolAsync,\n hipMemcpyFromSymbolAsync"]
+    #[doc = "  @brief Copy data from src to dst asynchronously.\n\n  @warning If host or dest are not pinned, the memory copy will be performed synchronously.  For\n best performance, use hipHostMalloc to allocate host memory that is transferred asynchronously.\n\n  @warning on HCC hipMemcpyAsync does not support overlapped H2D and D2H copies.\n  For hipMemcpy, the copy is always performed by the device associated with the specified stream.\n\n  For multi-gpu or peer-to-peer configurations, it is recommended to use a stream which is a\n attached to the device where the src data is physically located. For optimal peer-to-peer copies,\n the copy device must be able to access the src and dst pointers (by calling\n hipDeviceEnablePeerAccess with copy agent as the current device and src/dest as the peerDevice\n argument.  if this is not done, the hipMemcpy will still work, but will perform the copy using a\n staging buffer on the host.\n\n  @param[out] dst Data being copy to\n  @param[in]  src Data being copy from\n  @param[in]  sizeBytes Data size in bytes\n  @param[in]  kind  Type of memory transfer\n  @param[in]  stream  Stream identifier\n  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorUnknown\n\n  @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray,\n hipMemcpy2DFromArray, hipMemcpyArrayToArray, hipMemcpy2DArrayToArray, hipMemcpyToSymbol,\n hipMemcpyFromSymbol, hipMemcpy2DAsync, hipMemcpyToArrayAsync, hipMemcpy2DToArrayAsync,\n hipMemcpyFromArrayAsync, hipMemcpy2DFromArrayAsync, hipMemcpyToSymbolAsync,\n hipMemcpyFromSymbolAsync"]
     pub fn hipMemcpyAsync(
         dst: *mut ::std::os::raw::c_void,
         src: *const ::std::os::raw::c_void,
@@ -4571,7 +4856,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant\n byte value value.\n\n  @param[out] dst Data being filled\n  @param[in]  constant value to be set\n  @param[in]  sizeBytes Data size in bytes\n  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized"]
+    #[doc = "  @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant\n byte value value.\n\n  @param[out] dst  Data being filled\n  @param[in]  value  Value to be set\n  @param[in]  sizeBytes  Data size in bytes\n  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized"]
     pub fn hipMemset(
         dst: *mut ::std::os::raw::c_void,
         value: ::std::os::raw::c_int,
@@ -4580,7 +4865,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant\n byte value value.\n\n  @param[out] dst Data ptr to be filled\n  @param[in]  constant value to be set\n  @param[in]  number of values to be set\n  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized"]
+    #[doc = "  @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant\n byte value value.\n\n  @param[out] dest  Data ptr to be filled\n  @param[in]  value  Value to be set\n  @param[in]  count  Number of values to be set\n  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized"]
     pub fn hipMemsetD8(
         dest: hipDeviceptr_t,
         value: ::std::os::raw::c_uchar,
@@ -4589,7 +4874,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant\n byte value value.\n\n hipMemsetD8Async() is asynchronous with respect to the host, so the call may return before the\n memset is complete. The operation can optionally be associated to a stream by passing a non-zero\n stream argument. If stream is non-zero, the operation may overlap with operations in other\n streams.\n\n  @param[out] dst Data ptr to be filled\n  @param[in]  constant value to be set\n  @param[in]  number of values to be set\n  @param[in]  stream - Stream identifier\n  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized"]
+    #[doc = "  @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant\n byte value value.\n\n hipMemsetD8Async() is asynchronous with respect to the host, so the call may return before the\n memset is complete. The operation can optionally be associated to a stream by passing a non-zero\n stream argument. If stream is non-zero, the operation may overlap with operations in other\n streams.\n\n  @param[out] dest  Data ptr to be filled\n  @param[in]  value  Constant value to be set\n  @param[in]  count  Number of values to be set\n  @param[in]  stream  Stream identifier\n  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized"]
     pub fn hipMemsetD8Async(
         dest: hipDeviceptr_t,
         value: ::std::os::raw::c_uchar,
@@ -4599,7 +4884,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant\n short value value.\n\n  @param[out] dst Data ptr to be filled\n  @param[in]  constant value to be set\n  @param[in]  number of values to be set\n  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized"]
+    #[doc = "  @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant\n short value value.\n\n  @param[out] dest  Data ptr to be filled\n  @param[in]  value  Constant value to be set\n  @param[in]  count  Number of values to be set\n  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized"]
     pub fn hipMemsetD16(
         dest: hipDeviceptr_t,
         value: ::std::os::raw::c_ushort,
@@ -4608,7 +4893,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant\n short value value.\n\n hipMemsetD16Async() is asynchronous with respect to the host, so the call may return before the\n memset is complete. The operation can optionally be associated to a stream by passing a non-zero\n stream argument. If stream is non-zero, the operation may overlap with operations in other\n streams.\n\n  @param[out] dst Data ptr to be filled\n  @param[in]  constant value to be set\n  @param[in]  number of values to be set\n  @param[in]  stream - Stream identifier\n  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized"]
+    #[doc = "  @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant\n short value value.\n\n hipMemsetD16Async() is asynchronous with respect to the host, so the call may return before the\n memset is complete. The operation can optionally be associated to a stream by passing a non-zero\n stream argument. If stream is non-zero, the operation may overlap with operations in other\n streams.\n\n  @param[out] dest  Data ptr to be filled\n  @param[in]  value  Constant value to be set\n  @param[in]  count  Number of values to be set\n  @param[in]  stream  Stream identifier\n  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized"]
     pub fn hipMemsetD16Async(
         dest: hipDeviceptr_t,
         value: ::std::os::raw::c_ushort,
@@ -4618,7 +4903,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Fills the memory area pointed to by dest with the constant integer\n value for specified number of times.\n\n  @param[out] dst Data being filled\n  @param[in]  constant value to be set\n  @param[in]  number of values to be set\n  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized"]
+    #[doc = "  @brief Fills the memory area pointed to by dest with the constant integer\n value for specified number of times.\n\n  @param[out] dest  Data being filled\n  @param[in]  value  Constant value to be set\n  @param[in]  count  Number of values to be set\n  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized"]
     pub fn hipMemsetD32(
         dest: hipDeviceptr_t,
         value: ::std::os::raw::c_int,
@@ -4627,7 +4912,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant\n byte value value.\n\n hipMemsetAsync() is asynchronous with respect to the host, so the call may return before the\n memset is complete. The operation can optionally be associated to a stream by passing a non-zero\n stream argument. If stream is non-zero, the operation may overlap with operations in other\n streams.\n\n  @param[out] dst Pointer to device memory\n  @param[in]  value - Value to set for each byte of specified memory\n  @param[in]  sizeBytes - Size in bytes to set\n  @param[in]  stream - Stream identifier\n  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree"]
+    #[doc = "  @brief Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant\n byte value value.\n\n hipMemsetAsync() is asynchronous with respect to the host, so the call may return before the\n memset is complete. The operation can optionally be associated to a stream by passing a non-zero\n stream argument. If stream is non-zero, the operation may overlap with operations in other\n streams.\n\n  @param[out] dst Pointer to device memory\n  @param[in]  value  Value to set for each byte of specified memory\n  @param[in]  sizeBytes  Size in bytes to set\n  @param[in]  stream  Stream identifier\n  @return #hipSuccess, #hipErrorInvalidValue"]
     pub fn hipMemsetAsync(
         dst: *mut ::std::os::raw::c_void,
         value: ::std::os::raw::c_int,
@@ -4637,7 +4922,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Fills the memory area pointed to by dev with the constant integer\n value for specified number of times.\n\n  hipMemsetD32Async() is asynchronous with respect to the host, so the call may return before the\n memset is complete. The operation can optionally be associated to a stream by passing a non-zero\n stream argument. If stream is non-zero, the operation may overlap with operations in other\n streams.\n\n  @param[out] dst Pointer to device memory\n  @param[in]  value - Value to set for each byte of specified memory\n  @param[in]  count - number of values to be set\n  @param[in]  stream - Stream identifier\n  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree"]
+    #[doc = "  @brief Fills the memory area pointed to by dev with the constant integer\n value for specified number of times.\n\n  hipMemsetD32Async() is asynchronous with respect to the host, so the call may return before the\n memset is complete. The operation can optionally be associated to a stream by passing a non-zero\n stream argument. If stream is non-zero, the operation may overlap with operations in other\n streams.\n\n  @param[out] dst Pointer to device memory\n  @param[in]  value  Value to set for each byte of specified memory\n  @param[in]  count  Number of values to be set\n  @param[in]  stream  Stream identifier\n  @return #hipSuccess, #hipErrorInvalidValue"]
     pub fn hipMemsetD32Async(
         dst: hipDeviceptr_t,
         value: ::std::os::raw::c_int,
@@ -4647,7 +4932,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Fills the memory area pointed to by dst with the constant value.\n\n  @param[out] dst Pointer to device memory\n  @param[in]  pitch - data size in bytes\n  @param[in]  value - constant value to be set\n  @param[in]  width\n  @param[in]  height\n  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree"]
+    #[doc = "  @brief Fills the memory area pointed to by dst with the constant value.\n\n  @param[out] dst Pointer to device memory\n  @param[in]  pitch  Data size in bytes\n  @param[in]  value  Constant value to be set\n  @param[in]  width\n  @param[in]  height\n  @return #hipSuccess, #hipErrorInvalidValue"]
     pub fn hipMemset2D(
         dst: *mut ::std::os::raw::c_void,
         pitch: usize,
@@ -4658,7 +4943,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Fills asynchronously the memory area pointed to by dst with the constant value.\n\n  @param[in]  dst Pointer to device memory\n  @param[in]  pitch - data size in bytes\n  @param[in]  value - constant value to be set\n  @param[in]  width\n  @param[in]  height\n  @param[in]  stream\n  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree"]
+    #[doc = "  @brief Fills asynchronously the memory area pointed to by dst with the constant value.\n\n  @param[in]  dst Pointer to 2D device memory\n  @param[in]  pitch  Pitch size in bytes\n  @param[in]  value  Value to be set for each byte of specified memory\n  @param[in]  width  Width of matrix set columns in bytes\n  @param[in]  height  Height of matrix set rows in bytes\n  @param[in]  stream  Stream identifier\n  @return #hipSuccess, #hipErrorInvalidValue"]
     pub fn hipMemset2DAsync(
         dst: *mut ::std::os::raw::c_void,
         pitch: usize,
@@ -4670,7 +4955,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Fills synchronously the memory area pointed to by pitchedDevPtr with the constant value.\n\n  @param[in] pitchedDevPtr\n  @param[in]  value - constant value to be set\n  @param[in]  extent\n  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree"]
+    #[doc = "  @brief Fills synchronously the memory area pointed to by pitchedDevPtr with the constant value.\n\n  @param[in] pitchedDevPtr  Pointer to pitched device memory\n  @param[in]  value  Value to set for each byte of specified memory\n  @param[in]  extent  Size parameters for width field in bytes in device memory\n  @return #hipSuccess, #hipErrorInvalidValue"]
     pub fn hipMemset3D(
         pitchedDevPtr: hipPitchedPtr,
         value: ::std::os::raw::c_int,
@@ -4679,7 +4964,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Fills asynchronously the memory area pointed to by pitchedDevPtr with the constant value.\n\n  @param[in] pitchedDevPtr\n  @param[in]  value - constant value to be set\n  @param[in]  extent\n  @param[in]  stream\n  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree"]
+    #[doc = "  @brief Fills asynchronously the memory area pointed to by pitchedDevPtr with the constant value.\n\n  @param[in] pitchedDevPtr  Pointer to pitched device memory\n  @param[in]  value  Value to set for each byte of specified memory\n  @param[in]  extent  Size parameters for width field in bytes in device memory\n  @param[in]  stream  Stream identifier\n  @return #hipSuccess, #hipErrorInvalidValue"]
     pub fn hipMemset3DAsync(
         pitchedDevPtr: hipPitchedPtr,
         value: ::std::os::raw::c_int,
@@ -4701,7 +4986,7 @@ extern "C" {
     #[must_use]
     #[doc = "  @brief Allocate an array on the device.\n\n  @param[out]  array  Pointer to allocated array in device memory\n  @param[in]   desc   Requested channel format\n  @param[in]   width  Requested array allocation width\n  @param[in]   height Requested array allocation height\n  @param[in]   flags  Requested properties of allocated array\n  @return      #hipSuccess, #hipErrorOutOfMemory\n\n  @see hipMalloc, hipMallocPitch, hipFree, hipFreeArray, hipHostMalloc, hipHostFree"]
     pub fn hipMallocArray(
-        array: *mut *mut hipArray,
+        array: *mut hipArray_t,
         desc: *const hipChannelFormatDesc,
         width: usize,
         height: usize,
@@ -4712,20 +4997,20 @@ extern "C" {
     #[must_use]
     #[doc = "  @brief Create an array memory pointer on the device.\n\n  @param[out]  pHandle  Pointer to the array memory\n  @param[in]   pAllocateArray   Requested array desciptor\n\n  @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported\n\n  @see hipMallocArray, hipArrayDestroy, hipFreeArray"]
     pub fn hipArrayCreate(
-        pHandle: *mut *mut hipArray,
+        pHandle: *mut hipArray_t,
         pAllocateArray: *const HIP_ARRAY_DESCRIPTOR,
     ) -> hipError_t;
 }
 extern "C" {
     #[must_use]
     #[doc = "  @brief Destroy an array memory pointer on the device.\n\n  @param[in]  array  Pointer to the array memory\n\n  @return      #hipSuccess, #hipErrorInvalidValue\n\n  @see hipArrayCreate, hipArrayDestroy, hipFreeArray"]
-    pub fn hipArrayDestroy(array: *mut hipArray) -> hipError_t;
+    pub fn hipArrayDestroy(array: hipArray_t) -> hipError_t;
 }
 extern "C" {
     #[must_use]
     #[doc = "  @brief Create a 3D array memory pointer on the device.\n\n  @param[out]  array  Pointer to the 3D array memory\n  @param[in]   pAllocateArray   Requested array desciptor\n\n  @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported\n\n  @see hipMallocArray, hipArrayDestroy, hipFreeArray"]
     pub fn hipArray3DCreate(
-        array: *mut *mut hipArray,
+        array: *mut hipArray_t,
         pAllocateArray: *const HIP_ARRAY3D_DESCRIPTOR,
     ) -> hipError_t;
 }
@@ -4737,13 +5022,13 @@ extern "C" {
 extern "C" {
     #[must_use]
     #[doc = "  @brief Frees an array on the device.\n\n  @param[in]  array  Pointer to array to free\n  @return     #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized\n\n  @see hipMalloc, hipMallocPitch, hipFree, hipMallocArray, hipHostMalloc, hipHostFree"]
-    pub fn hipFreeArray(array: *mut hipArray) -> hipError_t;
+    pub fn hipFreeArray(array: hipArray_t) -> hipError_t;
 }
 extern "C" {
     #[must_use]
     #[doc = "  @brief Allocate an array on the device.\n\n  @param[out]  array  Pointer to allocated array in device memory\n  @param[in]   desc   Requested channel format\n  @param[in]   extent Requested array allocation width, height and depth\n  @param[in]   flags  Requested properties of allocated array\n  @return      #hipSuccess, #hipErrorOutOfMemory\n\n  @see hipMalloc, hipMallocPitch, hipFree, hipFreeArray, hipHostMalloc, hipHostFree"]
     pub fn hipMalloc3DArray(
-        array: *mut *mut hipArray,
+        array: *mut hipArray_t,
         desc: *const hipChannelFormatDesc,
         extent: hipExtent,
         flags: ::std::os::raw::c_uint,
@@ -4756,23 +5041,23 @@ extern "C" {
         desc: *mut hipChannelFormatDesc,
         extent: *mut hipExtent,
         flags: *mut ::std::os::raw::c_uint,
-        array: *mut hipArray,
+        array: hipArray_t,
     ) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Gets a 1D or 2D array descriptor\n\n @param[out] pArrayDescriptor - Returned array descriptor\n @param[in]  array            - Array to get descriptor of\n\n @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,\n #hipErrorInvalidValue #hipErrorInvalidHandle\n\n @see hipArray3DCreate, hipArray3DGetDescriptor, hipArrayCreate, hipArrayDestroy, hipMemAlloc,\n hipMemAllocHost, hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned,\n hipMemcpy3D, hipMemcpy3DAsync, hipMemcpyAtoA, hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync,\n hipMemcpyDtoA, hipMemcpyDtoD, hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync,\n hipMemcpyHtoA, hipMemcpyHtoAAsync, hipMemcpyHtoD, hipMemcpyHtoDAsync, hipMemFree,\n hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, hipMemHostAlloc,\n hipMemHostGetDevicePointer, hipMemsetD8, hipMemsetD16, hipMemsetD32, hipArrayGetInfo"]
+    #[doc = " @brief Gets a 1D or 2D array descriptor\n\n @param[out] pArrayDescriptor - Returned array descriptor\n @param[in]  array            - Array to get descriptor of\n\n @return #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,\n #hipErrorInvalidValue #hipErrorInvalidHandle\n\n @see hipArray3DCreate, hipArray3DGetDescriptor, hipArrayCreate, hipArrayDestroy, hipMemAlloc,\n hipMemAllocHost, hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned,\n hipMemcpy3D, hipMemcpy3DAsync, hipMemcpyAtoA, hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync,\n hipMemcpyDtoA, hipMemcpyDtoD, hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync,\n hipMemcpyHtoA, hipMemcpyHtoAAsync, hipMemcpyHtoD, hipMemcpyHtoDAsync, hipMemFree,\n hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, hipMemHostAlloc,\n hipMemHostGetDevicePointer, hipMemsetD8, hipMemsetD16, hipMemsetD32, hipArrayGetInfo"]
     pub fn hipArrayGetDescriptor(
         pArrayDescriptor: *mut HIP_ARRAY_DESCRIPTOR,
-        array: *mut hipArray,
+        array: hipArray_t,
     ) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Gets a 3D array descriptor\n\n @param[out] pArrayDescriptor - Returned 3D array descriptor\n @param[in]  array            - 3D array to get descriptor of\n\n @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,\n #hipErrorInvalidValue #hipErrorInvalidHandle, #hipErrorContextIsDestroyed\n\n @see hipArray3DCreate, hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc,\n hipMemAllocHost, hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned,\n hipMemcpy3D, hipMemcpy3DAsync, hipMemcpyAtoA, hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync,\n hipMemcpyDtoA, hipMemcpyDtoD, hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync,\n hipMemcpyHtoA, hipMemcpyHtoAAsync, hipMemcpyHtoD, hipMemcpyHtoDAsync, hipMemFree,\n hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, hipMemHostAlloc,\n hipMemHostGetDevicePointer, hipMemsetD8, hipMemsetD16, hipMemsetD32, hipArrayGetInfo"]
+    #[doc = " @brief Gets a 3D array descriptor\n\n @param[out] pArrayDescriptor - Returned 3D array descriptor\n @param[in]  array            - 3D array to get descriptor of\n\n @return #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,\n #hipErrorInvalidValue #hipErrorInvalidHandle, #hipErrorContextIsDestroyed\n\n @see hipArray3DCreate, hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc,\n hipMemAllocHost, hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned,\n hipMemcpy3D, hipMemcpy3DAsync, hipMemcpyAtoA, hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync,\n hipMemcpyDtoA, hipMemcpyDtoD, hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync,\n hipMemcpyHtoA, hipMemcpyHtoAAsync, hipMemcpyHtoD, hipMemcpyHtoDAsync, hipMemFree,\n hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, hipMemHostAlloc,\n hipMemHostGetDevicePointer, hipMemsetD8, hipMemsetD16, hipMemsetD32, hipArrayGetInfo"]
     pub fn hipArray3DGetDescriptor(
         pArrayDescriptor: *mut HIP_ARRAY3D_DESCRIPTOR,
-        array: *mut hipArray,
+        array: hipArray_t,
     ) -> hipError_t;
 }
 extern "C" {
@@ -4816,7 +5101,7 @@ extern "C" {
     #[must_use]
     #[doc = "  @brief Copies data between host and device.\n\n  @param[in]   dst     Destination memory address\n  @param[in]   wOffset Destination starting X offset\n  @param[in]   hOffset Destination starting Y offset\n  @param[in]   src     Source memory address\n  @param[in]   spitch  Pitch of source memory\n  @param[in]   width   Width of matrix transfer (columns in bytes)\n  @param[in]   height  Height of matrix transfer (rows)\n  @param[in]   kind    Type of transfer\n  @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,\n #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection\n\n  @see hipMemcpy, hipMemcpyToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,\n hipMemcpyAsync"]
     pub fn hipMemcpy2DToArray(
-        dst: *mut hipArray,
+        dst: hipArray_t,
         wOffset: usize,
         hOffset: usize,
         src: *const ::std::os::raw::c_void,
@@ -4830,7 +5115,7 @@ extern "C" {
     #[must_use]
     #[doc = "  @brief Copies data between host and device.\n\n  @param[in]   dst     Destination memory address\n  @param[in]   wOffset Destination starting X offset\n  @param[in]   hOffset Destination starting Y offset\n  @param[in]   src     Source memory address\n  @param[in]   spitch  Pitch of source memory\n  @param[in]   width   Width of matrix transfer (columns in bytes)\n  @param[in]   height  Height of matrix transfer (rows)\n  @param[in]   kind    Type of transfer\n  @param[in]   stream    Accelerator view which the copy is being enqueued\n  @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,\n #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection\n\n  @see hipMemcpy, hipMemcpyToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,\n hipMemcpyAsync"]
     pub fn hipMemcpy2DToArrayAsync(
-        dst: *mut hipArray,
+        dst: hipArray_t,
         wOffset: usize,
         hOffset: usize,
         src: *const ::std::os::raw::c_void,
@@ -4843,9 +5128,9 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Copies data between host and device.\n\n  @param[in]   dst     Destination memory address\n  @param[in]   wOffset Destination starting X offset\n  @param[in]   hOffset Destination starting Y offset\n  @param[in]   src     Source memory address\n  @param[in]   count   size in bytes to copy\n  @param[in]   kind    Type of transfer\n  @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,\n #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection\n\n  @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,\n hipMemcpyAsync\n\n @warning  This API is deprecated."]
+    #[doc = "  @brief Copies data between host and device.\n\n  @param[in]   dst     Destination memory address\n  @param[in]   wOffset Destination starting X offset\n  @param[in]   hOffset Destination starting Y offset\n  @param[in]   src     Source memory address\n  @param[in]   count   size in bytes to copy\n  @param[in]   kind    Type of transfer\n  @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,\n #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection\n\n  @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,\n  hipMemcpyAsync\n  @warning  This API is deprecated."]
     pub fn hipMemcpyToArray(
-        dst: *mut hipArray,
+        dst: hipArray_t,
         wOffset: usize,
         hOffset: usize,
         src: *const ::std::os::raw::c_void,
@@ -4855,7 +5140,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Copies data between host and device.\n\n  @param[in]   dst       Destination memory address\n  @param[in]   srcArray  Source memory address\n  @param[in]   wOffset   Source starting X offset\n  @param[in]   hOffset   Source starting Y offset\n  @param[in]   count     Size in bytes to copy\n  @param[in]   kind      Type of transfer\n  @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,\n #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection\n\n  @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,\n hipMemcpyAsync\n\n @warning  This API is deprecated."]
+    #[doc = "  @brief Copies data between host and device.\n\n  @param[in]   dst       Destination memory address\n  @param[in]   srcArray  Source memory address\n  @param[in]   wOffset   Source starting X offset\n  @param[in]   hOffset   Source starting Y offset\n  @param[in]   count     Size in bytes to copy\n  @param[in]   kind      Type of transfer\n  @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,\n #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection\n\n  @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,\n hipMemcpyAsync\n @warning  This API is deprecated."]
     pub fn hipMemcpyFromArray(
         dst: *mut ::std::os::raw::c_void,
         srcArray: hipArray_const_t,
@@ -4896,10 +5181,10 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "  @brief Copies data between host and device.\n\n  @param[in]   dst       Destination memory address\n  @param[in]   srcArray  Source array\n  @param[in]   srcoffset Offset in bytes of source array\n  @param[in]   count     Size of memory copy in bytes\n  @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,\n #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection\n\n  @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,\n hipMemcpyAsync"]
+    #[doc = "  @brief Copies data between host and device.\n\n  @param[in]   dst       Destination memory address\n  @param[in]   srcArray  Source array\n  @param[in]   srcOffset Offset in bytes of source array\n  @param[in]   count     Size of memory copy in bytes\n  @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,\n #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection\n\n  @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,\n hipMemcpyAsync"]
     pub fn hipMemcpyAtoH(
         dst: *mut ::std::os::raw::c_void,
-        srcArray: *mut hipArray,
+        srcArray: hipArray_t,
         srcOffset: usize,
         count: usize,
     ) -> hipError_t;
@@ -4908,7 +5193,7 @@ extern "C" {
     #[must_use]
     #[doc = "  @brief Copies data between host and device.\n\n  @param[in]   dstArray   Destination memory address\n  @param[in]   dstOffset  Offset in bytes of destination array\n  @param[in]   srcHost    Source host pointer\n  @param[in]   count      Size of memory copy in bytes\n  @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,\n #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection\n\n  @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,\n hipMemcpyAsync"]
     pub fn hipMemcpyHtoA(
-        dstArray: *mut hipArray,
+        dstArray: hipArray_t,
         dstOffset: usize,
         srcHost: *const ::std::os::raw::c_void,
         count: usize,
@@ -4936,7 +5221,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @}\n/\n/**\n-------------------------------------------------------------------------------------------------\n-------------------------------------------------------------------------------------------------\n  @defgroup PeerToPeer PeerToPeer Device Memory Access\n  @{\n  @warning PeerToPeer support is experimental.\n  This section describes the PeerToPeer device memory access functions of HIP runtime API.\n/\n/**\n @brief Determine if a device can access a peer's memory.\n\n @param [out] canAccessPeer Returns the peer access capability (0 or 1)\n @param [in] device - device from where memory may be accessed.\n @param [in] peerDevice - device where memory is physically located\n\n Returns \"1\" in @p canAccessPeer if the specified @p device is capable\n of directly accessing memory physically located on peerDevice , or \"0\" if not.\n\n Returns \"0\" in @p canAccessPeer if deviceId == peerDeviceId, and both are valid devices : a\n device is not a peer of itself.\n\n @returns #hipSuccess,\n @returns #hipErrorInvalidDevice if deviceId or peerDeviceId are not valid devices"]
+    #[doc = " @}\n/\n/**\n-------------------------------------------------------------------------------------------------\n-------------------------------------------------------------------------------------------------\n  @defgroup PeerToPeer PeerToPeer Device Memory Access\n  @{\n  @warning PeerToPeer support is experimental.\n  This section describes the PeerToPeer device memory access functions of HIP runtime API.\n/\n/**\n @brief Determine if a device can access a peer's memory.\n\n @param [out] canAccessPeer Returns the peer access capability (0 or 1)\n @param [in] deviceId - device from where memory may be accessed.\n @param [in] peerDeviceId - device where memory is physically located\n\n Returns \"1\" in @p canAccessPeer if the specified @p device is capable\n of directly accessing memory physically located on peerDevice , or \"0\" if not.\n\n Returns \"0\" in @p canAccessPeer if deviceId == peerDeviceId, and both are valid devices : a\n device is not a peer of itself.\n\n @returns #hipSuccess,\n @returns #hipErrorInvalidDevice if deviceId or peerDeviceId are not valid devices"]
     pub fn hipDeviceCanAccessPeer(
         canAccessPeer: *mut ::std::os::raw::c_int,
         deviceId: ::std::os::raw::c_int,
@@ -4945,7 +5230,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Enable direct access from current device's virtual address space to memory allocations\n physically located on a peer device.\n\n Memory which already allocated on peer device will be mapped into the address space of the\n current device.  In addition, all future memory allocations on peerDeviceId will be mapped into\n the address space of the current device when the memory is allocated. The peer memory remains\n accessible from the current device until a call to hipDeviceDisablePeerAccess or hipDeviceReset.\n\n\n @param [in] peerDeviceId\n @param [in] flags\n\n Returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue,\n @returns #hipErrorPeerAccessAlreadyEnabled if peer access is already enabled for this device."]
+    #[doc = " @brief Enable direct access from current device's virtual address space to memory allocations\n physically located on a peer device.\n\n Memory which already allocated on peer device will be mapped into the address space of the\n current device.  In addition, all future memory allocations on peerDeviceId will be mapped into\n the address space of the current device when the memory is allocated. The peer memory remains\n accessible from the current device until a call to hipDeviceDisablePeerAccess or hipDeviceReset.\n\n\n @param [in] peerDeviceId  Peer device to enable direct access to from the current device\n @param [in] flags  Reserved for future use, must be zero\n\n Returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue,\n @returns #hipErrorPeerAccessAlreadyEnabled if peer access is already enabled for this device."]
     pub fn hipDeviceEnablePeerAccess(
         peerDeviceId: ::std::os::raw::c_int,
         flags: ::std::os::raw::c_uint,
@@ -4953,7 +5238,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Disable direct access from current device's virtual address space to memory allocations\n physically located on a peer device.\n\n Returns hipErrorPeerAccessNotEnabled if direct access to memory on peerDevice has not yet been\n enabled from the current device.\n\n @param [in] peerDeviceId\n\n @returns #hipSuccess, #hipErrorPeerAccessNotEnabled"]
+    #[doc = " @brief Disable direct access from current device's virtual address space to memory allocations\n physically located on a peer device.\n\n Returns hipErrorPeerAccessNotEnabled if direct access to memory on peerDevice has not yet been\n enabled from the current device.\n\n @param [in] peerDeviceId  Peer device to disable direct access to\n\n @returns #hipSuccess, #hipErrorPeerAccessNotEnabled"]
     pub fn hipDeviceDisablePeerAccess(peerDeviceId: ::std::os::raw::c_int) -> hipError_t;
 }
 extern "C" {
@@ -4978,7 +5263,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Copies memory from one device to memory on another device.\n\n @param [out] dst - Destination device pointer.\n @param [in] dstDevice - Destination device\n @param [in] src - Source device pointer\n @param [in] srcDevice - Source device\n @param [in] sizeBytes - Size of memory copy in bytes\n @param [in] stream - Stream identifier\n\n @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice"]
+    #[doc = " @brief Copies memory from one device to memory on another device.\n\n @param [out] dst - Destination device pointer.\n @param [in] dstDeviceId - Destination device\n @param [in] src - Source device pointer\n @param [in] srcDevice - Source device\n @param [in] sizeBytes - Size of memory copy in bytes\n @param [in] stream - Stream identifier\n\n @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice"]
     pub fn hipMemcpyPeerAsync(
         dst: *mut ::std::os::raw::c_void,
         dstDeviceId: ::std::os::raw::c_int,
@@ -4990,7 +5275,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @}\n/\n/**\n-------------------------------------------------------------------------------------------------\n-------------------------------------------------------------------------------------------------\n  @defgroup Context Context Management\n  @{\n  This section describes the context management functions of HIP runtime API.\n/\n/**\n\n  @addtogroup ContextD Context Management [Deprecated]\n  @{\n  @ingroup Context\n  This section describes the deprecated context management functions of HIP runtime API.\n/\n/**\n @brief Create a context and set it as current/default context.\n\n @param [out] ctx  Context to create\n @param [in] flags  Context creation flags\n @param [in] device  device handle\n\n @return #hipSuccess\n\n @see hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxPushCurrent,\n hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n\n @warning : This HIP API is deprecated."]
+    #[doc = " @brief Create a context and set it as current/default context\n\n @param [out] ctx  Context to create\n @param [in] flags  Context creation flags\n @param [in] device  device handle\n\n @return #hipSuccess\n\n @see hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxPushCurrent,\n hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n\n @warning  This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the\n NVIDIA platform.\n"]
     pub fn hipCtxCreate(
         ctx: *mut hipCtx_t,
         flags: ::std::os::raw::c_uint,
@@ -4999,83 +5284,83 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Destroy a HIP context.\n\n @param [in] ctx Context to destroy\n\n @returns #hipSuccess, #hipErrorInvalidValue\n\n @see hipCtxCreate, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,hipCtxSetCurrent,\n hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice\n\n @warning : This HIP API is deprecated."]
+    #[doc = " @brief Destroy a HIP context.\n\n @param [in] ctx Context to destroy\n\n @returns #hipSuccess, #hipErrorInvalidValue\n\n @see hipCtxCreate, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,hipCtxSetCurrent,\n hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice\n\n @warning  This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the\n NVIDIA platform."]
     pub fn hipCtxDestroy(ctx: hipCtx_t) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Pop the current/default context and return the popped context.\n\n @param [out] ctx\n\n @returns #hipSuccess, #hipErrorInvalidContext\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxSetCurrent, hipCtxGetCurrent,\n hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n\n @warning : This HIP API is deprecated."]
+    #[doc = " @brief Pop the current/default context and return the popped context.\n\n @param [out] ctx  The current context to pop\n\n @returns #hipSuccess, #hipErrorInvalidContext\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxSetCurrent, hipCtxGetCurrent,\n hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n\n @warning  This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the\n NVIDIA platform."]
     pub fn hipCtxPopCurrent(ctx: *mut hipCtx_t) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Push the context to be set as current/ default context.\n\n @param [in] ctx\n\n @returns #hipSuccess, #hipErrorInvalidContext\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice\n\n @warning : This HIP API is deprecated."]
+    #[doc = " @brief Push the context to be set as current/ default context\n\n @param [in] ctx  The current context to push\n\n @returns #hipSuccess, #hipErrorInvalidContext\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice\n\n @warning  This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the\n NVIDIA platform."]
     pub fn hipCtxPushCurrent(ctx: hipCtx_t) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Set the passed context as current/default.\n\n @param [in] ctx\n\n @returns #hipSuccess, #hipErrorInvalidContext\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice\n\n @warning : This HIP API is deprecated."]
+    #[doc = " @brief Set the passed context as current/default\n\n @param [in] ctx The context to set as current\n\n @returns #hipSuccess, #hipErrorInvalidContext\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice\n\n @warning  This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the\n NVIDIA platform."]
     pub fn hipCtxSetCurrent(ctx: hipCtx_t) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Get the handle of the current/ default context.\n\n @param [out] ctx\n\n @returns #hipSuccess, #hipErrorInvalidContext\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetDevice, hipCtxGetFlags, hipCtxPopCurrent,\n hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n\n @warning : This HIP API is deprecated."]
+    #[doc = " @brief Get the handle of the current/ default context\n\n @param [out] ctx  The context to get as current\n\n @returns #hipSuccess, #hipErrorInvalidContext\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetDevice, hipCtxGetFlags, hipCtxPopCurrent,\n hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n\n @warning  This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the\n NVIDIA platform."]
     pub fn hipCtxGetCurrent(ctx: *mut hipCtx_t) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Get the handle of the device associated with current/default context.\n\n @param [out] device\n\n @returns #hipSuccess, #hipErrorInvalidContext\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize\n\n @warning : This HIP API is deprecated."]
+    #[doc = " @brief Get the handle of the device associated with current/default context\n\n @param [out] device The device from the current context\n\n @returns #hipSuccess, #hipErrorInvalidContext\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize\n\n @warning  This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the\n NVIDIA platform."]
     pub fn hipCtxGetDevice(device: *mut hipDevice_t) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Returns the approximate HIP api version.\n\n @param [in]  ctx Context to check\n @param [out] apiVersion\n\n @return #hipSuccess\n\n @warning The HIP feature set does not correspond to an exact CUDA SDK api revision.\n This function always set *apiVersion to 4 as an approximation though HIP supports\n some features which were introduced in later CUDA SDK revisions.\n HIP apps code should not rely on the api revision number here and should\n use arch feature flags to test device capabilities or conditional compilation.\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetDevice, hipCtxGetFlags, hipCtxPopCurrent,\n hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n\n @warning : This HIP API is deprecated."]
+    #[doc = " @brief Returns the approximate HIP api version.\n\n @param [in]  ctx Context to check\n @param [out] apiVersion API version to get\n\n @return #hipSuccess\n\n @warning The HIP feature set does not correspond to an exact CUDA SDK api revision.\n This function always set *apiVersion to 4 as an approximation though HIP supports\n some features which were introduced in later CUDA SDK revisions.\n HIP apps code should not rely on the api revision number here and should\n use arch feature flags to test device capabilities or conditional compilation.\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetDevice, hipCtxGetFlags, hipCtxPopCurrent,\n hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n\n @warning  This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the\n NVIDIA platform."]
     pub fn hipCtxGetApiVersion(ctx: hipCtx_t, apiVersion: *mut ::std::os::raw::c_int)
         -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Get Cache configuration for a specific function.\n\n @param [out] cacheConfig  Cache configuration\n\n @return #hipSuccess\n\n @warning AMD devices and some Nvidia GPUS do not support reconfigurable cache.  This hint is\n ignored on those architectures.\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n\n @warning : This HIP API is deprecated."]
+    #[doc = " @brief Get Cache configuration for a specific function\n\n @param [out] cacheConfig  Cache configuration\n\n @return #hipSuccess\n\n @warning AMD devices and some Nvidia GPUS do not support reconfigurable cache.  This hint is\n ignored on those architectures.\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n\n @warning  This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the\n NVIDIA platform."]
     pub fn hipCtxGetCacheConfig(cacheConfig: *mut hipFuncCache_t) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Set L1/Shared cache partition.\n\n @param [in] cacheConfig  Cache configuration to set\n\n @return #hipSuccess\n\n @warning AMD devices and some Nvidia GPUS do not support reconfigurable cache.  This hint is\n ignored on those architectures.\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n\n @warning : This HIP API is deprecated."]
+    #[doc = " @brief Set L1/Shared cache partition.\n\n @param [in] cacheConfig  Cache configuration to set\n\n @return #hipSuccess\n\n @warning AMD devices and some Nvidia GPUS do not support reconfigurable cache.  This hint is\n ignored on those architectures.\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n\n @warning  This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the\n NVIDIA platform."]
     pub fn hipCtxSetCacheConfig(cacheConfig: hipFuncCache_t) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Set Shared memory bank configuration.\n\n @param [in] config  Shared memory configuration to set\n\n @return #hipSuccess\n\n @warning AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is\n ignored on those architectures.\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n\n @warning : This HIP API is deprecated."]
+    #[doc = " @brief Set Shared memory bank configuration.\n\n @param [in] config  Shared memory configuration to set\n\n @return #hipSuccess\n\n @warning AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is\n ignored on those architectures.\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n\n @warning  This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the\n NVIDIA platform."]
     pub fn hipCtxSetSharedMemConfig(config: hipSharedMemConfig) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Get Shared memory bank configuration.\n\n @param [out] pConfig  Pointer of shared memory configuration\n\n @return #hipSuccess\n\n @warning AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is\n ignored on those architectures.\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n\n @warning : This HIP API is deprecated."]
+    #[doc = " @brief Get Shared memory bank configuration.\n\n @param [out] pConfig  Pointer of shared memory configuration\n\n @return #hipSuccess\n\n @warning AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is\n ignored on those architectures.\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n\n @warning  This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the\n NVIDIA platform."]
     pub fn hipCtxGetSharedMemConfig(pConfig: *mut hipSharedMemConfig) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Blocks until the default context has completed all preceding requested tasks.\n\n @return #hipSuccess\n\n @warning This function waits for all streams on the default context to complete execution, and\n then returns.\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxGetDevice\n\n @warning : This HIP API is deprecated."]
+    #[doc = " @brief Blocks until the default context has completed all preceding requested tasks.\n\n @return #hipSuccess\n\n @warning This function waits for all streams on the default context to complete execution, and\n then returns.\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxGetDevice\n\n @warning  This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the\n NVIDIA platform."]
     pub fn hipCtxSynchronize() -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Return flags used for creating default context.\n\n @param [out] flags  Pointer of flags\n\n @returns #hipSuccess\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxGetCurrent,\n hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n\n @warning : This HIP API is deprecated."]
+    #[doc = " @brief Return flags used for creating default context.\n\n @param [out] flags  Pointer of flags\n\n @returns #hipSuccess\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxGetCurrent,\n hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n\n @warning  This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the\n NVIDIA platform."]
     pub fn hipCtxGetFlags(flags: *mut ::std::os::raw::c_uint) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Enables direct access to memory allocations in a peer context.\n\n Memory which already allocated on peer device will be mapped into the address space of the\n current device.  In addition, all future memory allocations on peerDeviceId will be mapped into\n the address space of the current device when the memory is allocated. The peer memory remains\n accessible from the current device until a call to hipDeviceDisablePeerAccess or hipDeviceReset.\n\n\n @param [in] peerCtx  Peer context\n @param [in] flags  flags, need to set as 0\n\n @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue,\n #hipErrorPeerAccessAlreadyEnabled\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n @warning PeerToPeer support is experimental.\n\n @warning : This HIP API is deprecated."]
+    #[doc = " @brief Enables direct access to memory allocations in a peer context.\n\n Memory which already allocated on peer device will be mapped into the address space of the\n current device.  In addition, all future memory allocations on peerDeviceId will be mapped into\n the address space of the current device when the memory is allocated. The peer memory remains\n accessible from the current device until a call to hipDeviceDisablePeerAccess or hipDeviceReset.\n\n\n @param [in] peerCtx  Peer context\n @param [in] flags  flags, need to set as 0\n\n @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue,\n #hipErrorPeerAccessAlreadyEnabled\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n @warning PeerToPeer support is experimental.\n\n @warning  This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the\n NVIDIA platform."]
     pub fn hipCtxEnablePeerAccess(peerCtx: hipCtx_t, flags: ::std::os::raw::c_uint) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Disable direct access from current context's virtual address space to memory allocations\n physically located on a peer context.Disables direct access to memory allocations in a peer\n context and unregisters any registered allocations.\n\n Returns #hipErrorPeerAccessNotEnabled if direct access to memory on peerDevice has not yet been\n enabled from the current device.\n\n @param [in] peerCtx  Peer context to be disabled\n\n @returns #hipSuccess, #hipErrorPeerAccessNotEnabled\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n @warning PeerToPeer support is experimental.\n\n @warning : This HIP API is deprecated."]
+    #[doc = " @brief Disable direct access from current context's virtual address space to memory allocations\n physically located on a peer context.Disables direct access to memory allocations in a peer\n context and unregisters any registered allocations.\n\n Returns #hipErrorPeerAccessNotEnabled if direct access to memory on peerDevice has not yet been\n enabled from the current device.\n\n @param [in] peerCtx  Peer context to be disabled\n\n @returns #hipSuccess, #hipErrorPeerAccessNotEnabled\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n @warning PeerToPeer support is experimental.\n\n @warning  This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the\n NVIDIA platform."]
     pub fn hipCtxDisablePeerAccess(peerCtx: hipCtx_t) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @}\n/\n/**\n @brief Get the state of the primary context.\n\n @param [in] dev  Device to get primary context flags for\n @param [out] flags  Pointer to store flags\n @param [out] active  Pointer to store context state; 0 = inactive, 1 = active\n\n @returns #hipSuccess\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice"]
+    #[doc = " @brief Get the state of the primary context.\n\n @param [in] dev  Device to get primary context flags for\n @param [out] flags  Pointer to store flags\n @param [out] active  Pointer to store context state; 0 = inactive, 1 = active\n\n @returns #hipSuccess\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n\n @warning  This API is deprecated on the AMD platform, only for equivalent driver API on the\n NVIDIA platform."]
     pub fn hipDevicePrimaryCtxGetState(
         dev: hipDevice_t,
         flags: *mut ::std::os::raw::c_uint,
@@ -5084,22 +5369,22 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Release the primary context on the GPU.\n\n @param [in] dev  Device which primary context is released\n\n @returns #hipSuccess\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n @warning This function return #hipSuccess though doesn't release the primaryCtx by design on\n HIP/HCC path."]
+    #[doc = " @brief Release the primary context on the GPU.\n\n @param [in] dev  Device which primary context is released\n\n @returns #hipSuccess\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n @warning This function return #hipSuccess though doesn't release the primaryCtx by design on\n HIP/HCC path.\n\n @warning  This API is deprecated on the AMD platform, only for equivalent driver API on the NVIDIA\n platform."]
     pub fn hipDevicePrimaryCtxRelease(dev: hipDevice_t) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Retain the primary context on the GPU.\n\nhipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev);\n @param [out] pctx  Returned context handle of the new context\n @param [in] dev  Device which primary context is released\n\n @returns #hipSuccess\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice"]
+    #[doc = " @brief Retain the primary context on the GPU.\n\n @param [out] pctx  Returned context handle of the new context\n @param [in] dev  Device which primary context is released\n\n @returns #hipSuccess\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n\n @warning  This API is deprecated on the AMD platform, only for equivalent driver API on the NVIDIA\n platform."]
     pub fn hipDevicePrimaryCtxRetain(pctx: *mut hipCtx_t, dev: hipDevice_t) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Resets the primary context on the GPU.\n\n @param [in] dev  Device which primary context is reset\n\n @returns #hipSuccess\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice"]
+    #[doc = " @brief Resets the primary context on the GPU.\n\n @param [in] dev  Device which primary context is reset\n\n @returns #hipSuccess\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n\n @warning  This API is deprecated on the AMD platform, only for equivalent driver API on the NVIDIA\n platform."]
     pub fn hipDevicePrimaryCtxReset(dev: hipDevice_t) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Set flags for the primary context.\n\n @param [in] dev  Device for which the primary context flags are set\n @param [in] flags  New flags for the device\n\n @returns #hipSuccess, #hipErrorContextAlreadyInUse\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice"]
+    #[doc = " @brief Set flags for the primary context.\n\n @param [in] dev  Device for which the primary context flags are set\n @param [in] flags  New flags for the device\n\n @returns #hipSuccess, #hipErrorContextAlreadyInUse\n\n @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,\n hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice\n\n @warning  This API is deprecated on the AMD platform, only for equivalent driver API on the NVIDIA\n platform."]
     pub fn hipDevicePrimaryCtxSetFlags(
         dev: hipDevice_t,
         flags: ::std::os::raw::c_uint,
@@ -5155,7 +5440,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief builds module from code object which resides in host memory. Image is pointer to that\n location.\n\n @param [in] image\n @param [out] module\n\n @returns hipSuccess, hipErrorNotInitialized, hipErrorOutOfMemory, hipErrorNotInitialized"]
+    #[doc = " @brief builds module from code object which resides in host memory. Image is pointer to that\n location.\n\n @param [in] image  The pointer to the location of data\n @param [out] module  Retuned module\n\n @returns hipSuccess, hipErrorNotInitialized, hipErrorOutOfMemory, hipErrorNotInitialized"]
     pub fn hipModuleLoadData(
         module: *mut hipModule_t,
         image: *const ::std::os::raw::c_void,
@@ -5163,7 +5448,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief builds module from code object which resides in host memory. Image is pointer to that\n location. Options are not used. hipModuleLoadData is called.\n\n @param [in] image\n @param [out] module\n @param [in] number of options\n @param [in] options for JIT\n @param [in] option values for JIT\n\n @returns hipSuccess, hipErrorNotInitialized, hipErrorOutOfMemory, hipErrorNotInitialized"]
+    #[doc = " @brief builds module from code object which resides in host memory. Image is pointer to that\n location. Options are not used. hipModuleLoadData is called.\n\n @param [in] image  The pointer to the location of data\n @param [out] module  Retuned module\n @param [in] numOptions Number of options\n @param [in] options Options for JIT\n @param [in] optionValues  Option values for JIT\n\n @returns hipSuccess, hipErrorNotInitialized, hipErrorOutOfMemory, hipErrorNotInitialized"]
     pub fn hipModuleLoadDataEx(
         module: *mut hipModule_t,
         image: *const ::std::os::raw::c_void,
@@ -5174,7 +5459,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief launches kernel f with launch parameters and shared memory on stream with arguments passed\n to kernelparams or extra\n\n @param [in] f         Kernel to launch.\n @param [in] gridDimX  X grid dimension specified as multiple of blockDimX.\n @param [in] gridDimY  Y grid dimension specified as multiple of blockDimY.\n @param [in] gridDimZ  Z grid dimension specified as multiple of blockDimZ.\n @param [in] blockDimX X block dimensions specified in work-items\n @param [in] blockDimY Y grid dimension specified in work-items\n @param [in] blockDimZ Z grid dimension specified in work-items\n @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The\n HIP-Clang compiler provides support for extern shared declarations.\n @param [in] stream    Stream where the kernel should be dispatched.  May be 0, in which case th\n default stream is used with associated synchronization rules.\n @param [in] kernelParams\n @param [in] extra     Pointer to kernel arguments.   These are passed directly to the kernel and\n must be in the memory layout and alignment expected by the kernel.\n All passed arguments must be naturally aligned according to their type. The memory address of each\n argument should be a multiple of its size in bytes. Please refer to hip_porting_driver_api.md\n for sample usage.\n\n Please note, HIP does not support kernel launch with total work items defined in dimension with\n size gridDim x blockDim >= 2^32. So gridDim.x * blockDim.x, gridDim.y * blockDim.y\n and gridDim.z * blockDim.z are always less than 2^32.\n\n @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue"]
+    #[doc = " @brief launches kernel f with launch parameters and shared memory on stream with arguments passed\n to kernelparams or extra\n\n @param [in] f         Kernel to launch.\n @param [in] gridDimX  X grid dimension specified as multiple of blockDimX.\n @param [in] gridDimY  Y grid dimension specified as multiple of blockDimY.\n @param [in] gridDimZ  Z grid dimension specified as multiple of blockDimZ.\n @param [in] blockDimX X block dimensions specified in work-items\n @param [in] blockDimY Y grid dimension specified in work-items\n @param [in] blockDimZ Z grid dimension specified in work-items\n @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The\n HIP-Clang compiler provides support for extern shared declarations.\n @param [in] stream    Stream where the kernel should be dispatched.  May be 0, in which case th\n default stream is used with associated synchronization rules.\n @param [in] kernelParams  Kernel parameters to launch\n @param [in] extra     Pointer to kernel arguments.   These are passed directly to the kernel and\n must be in the memory layout and alignment expected by the kernel.\n All passed arguments must be naturally aligned according to their type. The memory address of each\n argument should be a multiple of its size in bytes. Please refer to hip_porting_driver_api.md\n for sample usage.\n\n Please note, HIP does not support kernel launch with total work items defined in dimension with\n size gridDim x blockDim >= 2^32. So gridDim.x * blockDim.x, gridDim.y * blockDim.y\n and gridDim.z * blockDim.z are always less than 2^32.\n\n @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue"]
     pub fn hipModuleLaunchKernel(
         f: hipFunction_t,
         gridDimX: ::std::os::raw::c_uint,
@@ -5191,7 +5476,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief launches kernel f with launch parameters and shared memory on stream with arguments passed\n to kernelParams, where thread blocks can cooperate and synchronize as they execute\n\n @param [in] f              Kernel to launch.\n @param [in] gridDimX       X grid dimension specified as multiple of blockDimX.\n @param [in] gridDimY       Y grid dimension specified as multiple of blockDimY.\n @param [in] gridDimZ       Z grid dimension specified as multiple of blockDimZ.\n @param [in] blockDimX      X block dimension specified in work-items.\n @param [in] blockDimY      Y block dimension specified in work-items.\n @param [in] blockDimZ      Z block dimension specified in work-items.\n @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The\n HIP-Clang compiler provides support for extern shared declarations.\n @param [in] stream         Stream where the kernel should be dispatched. May be 0,\n in which case the default stream is used with associated synchronization rules.\n @param [in] kernelParams   A list of kernel arguments.\n\n Please note, HIP does not support kernel launch with total work items defined in dimension with\n size gridDim x blockDim >= 2^32.\n\n @returns hipSuccess, hipErrorDeinitialized, hipErrorNotInitialized, hipErrorInvalidContext,\n hipErrorInvalidHandle, hipErrorInvalidImage, hipErrorInvalidValue, hipInvalidDevice,\n hipErrorInvalidConfiguration, hipErrorLaunchFailure, hipErrorLaunchOutOfResources,\n hipErrorLaunchTimeOut, hipErrorCooperativeLaunchTooLarge, hipErrorSharedObjectInitFailed"]
+    #[doc = " @brief launches kernel f with launch parameters and shared memory on stream with arguments passed\n to kernelParams, where thread blocks can cooperate and synchronize as they execute\n\n @param [in] f              Kernel to launch.\n @param [in] gridDimX       X grid dimension specified as multiple of blockDimX.\n @param [in] gridDimY       Y grid dimension specified as multiple of blockDimY.\n @param [in] gridDimZ       Z grid dimension specified as multiple of blockDimZ.\n @param [in] blockDimX      X block dimension specified in work-items.\n @param [in] blockDimY      Y block dimension specified in work-items.\n @param [in] blockDimZ      Z block dimension specified in work-items.\n @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The\n HIP-Clang compiler provides support for extern shared declarations.\n @param [in] stream         Stream where the kernel should be dispatched. May be 0,\n in which case the default stream is used with associated synchronization rules.\n @param [in] kernelParams   A list of kernel arguments.\n\n Please note, HIP does not support kernel launch with total work items defined in dimension with\n size gridDim x blockDim >= 2^32.\n\n @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,\n #hipErrorInvalidHandle, #hipErrorInvalidImage, #hipErrorInvalidValue,\n #hipErrorInvalidConfiguration, #hipErrorLaunchFailure, #hipErrorLaunchOutOfResources,\n #hipErrorLaunchTimeOut, #hipErrorCooperativeLaunchTooLarge, #hipErrorSharedObjectInitFailed"]
     pub fn hipModuleLaunchCooperativeKernel(
         f: hipFunction_t,
         gridDimX: ::std::os::raw::c_uint,
@@ -5207,7 +5492,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Launches kernels on multiple devices where thread blocks can cooperate and\n synchronize as they execute.\n\n @param [in] launchParamsList         List of launch parameters, one per device.\n @param [in] numDevices               Size of the launchParamsList array.\n @param [in] flags                    Flags to control launch behavior.\n\n @returns hipSuccess, hipErrorDeinitialized, hipErrorNotInitialized, hipErrorInvalidContext,\n hipErrorInvalidHandle, hipErrorInvalidImage, hipErrorInvalidValue, hipInvalidDevice,\n hipErrorInvalidConfiguration, hipErrorInvalidResourceHandle, hipErrorLaunchFailure,\n hipErrorLaunchOutOfResources, hipErrorLaunchTimeOut, hipErrorCooperativeLaunchTooLarge,\n hipErrorSharedObjectInitFailed"]
+    #[doc = " @brief Launches kernels on multiple devices where thread blocks can cooperate and\n synchronize as they execute.\n\n @param [in] launchParamsList         List of launch parameters, one per device.\n @param [in] numDevices               Size of the launchParamsList array.\n @param [in] flags                    Flags to control launch behavior.\n\n @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,\n #hipErrorInvalidHandle, #hipErrorInvalidImage, #hipErrorInvalidValue,\n #hipErrorInvalidConfiguration, #hipErrorInvalidResourceHandle, #hipErrorLaunchFailure,\n #hipErrorLaunchOutOfResources, #hipErrorLaunchTimeOut, #hipErrorCooperativeLaunchTooLarge,\n #hipErrorSharedObjectInitFailed"]
     pub fn hipModuleLaunchCooperativeKernelMultiDevice(
         launchParamsList: *mut hipFunctionLaunchParams,
         numDevices: ::std::os::raw::c_uint,
@@ -5216,7 +5501,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief launches kernel f with launch parameters and shared memory on stream with arguments passed\n to kernelparams or extra, where thread blocks can cooperate and synchronize as they execute\n\n @param [in] f         Kernel to launch.\n @param [in] gridDim   Grid dimensions specified as multiple of blockDim.\n @param [in] blockDim  Block dimensions specified in work-items\n @param [in] kernelParams A list of kernel arguments\n @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The\n HIP-Clang compiler provides support for extern shared declarations.\n @param [in] stream    Stream where the kernel should be dispatched.  May be 0, in which case th\n default stream is used with associated synchronization rules.\n\n Please note, HIP does not support kernel launch with total work items defined in dimension with\n size gridDim x blockDim >= 2^32.\n\n @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue, hipErrorCooperativeLaunchTooLarge"]
+    #[doc = " @brief launches kernel f with launch parameters and shared memory on stream with arguments passed\n to kernelparams or extra, where thread blocks can cooperate and synchronize as they execute\n\n @param [in] f         Kernel to launch.\n @param [in] gridDim   Grid dimensions specified as multiple of blockDim.\n @param [in] blockDimX  Block dimensions specified in work-items\n @param [in] kernelParams A list of kernel arguments\n @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The\n HIP-Clang compiler provides support for extern shared declarations.\n @param [in] stream    Stream where the kernel should be dispatched.  May be 0, in which case th\n default stream is used with associated synchronization rules.\n\n Please note, HIP does not support kernel launch with total work items defined in dimension with\n size gridDim x blockDim >= 2^32.\n\n @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue, #hipErrorCooperativeLaunchTooLarge"]
     pub fn hipLaunchCooperativeKernel(
         f: *const ::std::os::raw::c_void,
         gridDim: dim3,
@@ -5228,7 +5513,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Launches kernels on multiple devices where thread blocks can cooperate and\n synchronize as they execute.\n\n @param [in] launchParamsList         List of launch parameters, one per device.\n @param [in] numDevices               Size of the launchParamsList array.\n @param [in] flags                    Flags to control launch behavior.\n\n @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue, hipErrorCooperativeLaunchTooLarge"]
+    #[doc = " @brief Launches kernels on multiple devices where thread blocks can cooperate and\n synchronize as they execute.\n\n @param [in] launchParamsList         List of launch parameters, one per device.\n @param [in] numDevices               Size of the launchParamsList array.\n @param [in] flags                    Flags to control launch behavior.\n\n @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue,\n  #hipErrorCooperativeLaunchTooLarge"]
     pub fn hipLaunchCooperativeKernelMultiDevice(
         launchParamsList: *mut hipLaunchParams,
         numDevices: ::std::os::raw::c_int,
@@ -5237,7 +5522,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Launches kernels on multiple devices and guarantees all specified kernels are dispatched\n on respective streams before enqueuing any other work on the specified streams from any other threads\n\n\n @param [in] hipLaunchParams          List of launch parameters, one per device.\n @param [in] numDevices               Size of the launchParamsList array.\n @param [in] flags                    Flags to control launch behavior.\n\n @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue"]
+    #[doc = " @brief Launches kernels on multiple devices and guarantees all specified kernels are dispatched\n on respective streams before enqueuing any other work on the specified streams from any other threads\n\n\n @param [in] launchParamsList          List of launch parameters, one per device.\n @param [in] numDevices               Size of the launchParamsList array.\n @param [in] flags                    Flags to control launch behavior.\n\n @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue"]
     pub fn hipExtLaunchMultiKernelMultiDevice(
         launchParamsList: *mut hipLaunchParams,
         numDevices: ::std::os::raw::c_int,
@@ -5246,7 +5531,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = "-------------------------------------------------------------------------------------------------\n-------------------------------------------------------------------------------------------------\n  @defgroup Occupancy Occupancy\n  @{\n  This section describes the occupancy functions of HIP runtime API.\n\n/\n/**\n @brief determine the grid and block sizes to achieves maximum occupancy for a kernel\n\n @param [out] gridSize           minimum grid size for maximum potential occupancy\n @param [out] blockSize          block size for maximum potential occupancy\n @param [in]  f                  kernel function for which occupancy is calulated\n @param [in]  dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block\n @param [in]  blockSizeLimit     the maximum block size for the kernel, use 0 for no limit\n\n Please note, HIP does not support kernel launch with total work items defined in dimension with\n size gridDim x blockDim >= 2^32.\n\n @returns hipSuccess, hipInvalidDevice, hipErrorInvalidValue"]
+    #[doc = "-------------------------------------------------------------------------------------------------\n-------------------------------------------------------------------------------------------------\n  @defgroup Occupancy Occupancy\n  @{\n  This section describes the occupancy functions of HIP runtime API.\n\n/\n/**\n @brief determine the grid and block sizes to achieves maximum occupancy for a kernel\n\n @param [out] gridSize           minimum grid size for maximum potential occupancy\n @param [out] blockSize          block size for maximum potential occupancy\n @param [in]  f                  kernel function for which occupancy is calulated\n @param [in]  dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block\n @param [in]  blockSizeLimit     the maximum block size for the kernel, use 0 for no limit\n\n Please note, HIP does not support kernel launch with total work items defined in dimension with\n size gridDim x blockDim >= 2^32.\n\n @returns #hipSuccess, #hipErrorInvalidValue"]
     pub fn hipModuleOccupancyMaxPotentialBlockSize(
         gridSize: *mut ::std::os::raw::c_int,
         blockSize: *mut ::std::os::raw::c_int,
@@ -5257,7 +5542,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief determine the grid and block sizes to achieves maximum occupancy for a kernel\n\n @param [out] gridSize           minimum grid size for maximum potential occupancy\n @param [out] blockSize          block size for maximum potential occupancy\n @param [in]  f                  kernel function for which occupancy is calulated\n @param [in]  dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block\n @param [in]  blockSizeLimit     the maximum block size for the kernel, use 0 for no limit\n @param [in]  flags            Extra flags for occupancy calculation (only default supported)\n\n Please note, HIP does not support kernel launch with total work items defined in dimension with\n size gridDim x blockDim >= 2^32.\n\n @returns hipSuccess, hipInvalidDevice, hipErrorInvalidValue"]
+    #[doc = " @brief determine the grid and block sizes to achieves maximum occupancy for a kernel\n\n @param [out] gridSize           minimum grid size for maximum potential occupancy\n @param [out] blockSize          block size for maximum potential occupancy\n @param [in]  f                  kernel function for which occupancy is calulated\n @param [in]  dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block\n @param [in]  blockSizeLimit     the maximum block size for the kernel, use 0 for no limit\n @param [in]  flags            Extra flags for occupancy calculation (only default supported)\n\n Please note, HIP does not support kernel launch with total work items defined in dimension with\n size gridDim x blockDim >= 2^32.\n\n @returns #hipSuccess, #hipErrorInvalidValue"]
     pub fn hipModuleOccupancyMaxPotentialBlockSizeWithFlags(
         gridSize: *mut ::std::os::raw::c_int,
         blockSize: *mut ::std::os::raw::c_int,
@@ -5269,7 +5554,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Returns occupancy for a device function.\n\n @param [out] numBlocks        Returned occupancy\n @param [in]  func             Kernel function (hipFunction) for which occupancy is calulated\n @param [in]  blockSize        Block size the kernel is intended to be launched with\n @param [in]  dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block"]
+    #[doc = " @brief Returns occupancy for a device function.\n\n @param [out] numBlocks        Returned occupancy\n @param [in]  f                Kernel function (hipFunction) for which occupancy is calulated\n @param [in]  blockSize        Block size the kernel is intended to be launched with\n @param [in]  dynSharedMemPerBlk Dynamic shared memory usage (in bytes) intended for each block\n @returns  #hipSuccess, #hipErrorInvalidValue"]
     pub fn hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(
         numBlocks: *mut ::std::os::raw::c_int,
         f: hipFunction_t,
@@ -5279,7 +5564,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Returns occupancy for a device function.\n\n @param [out] numBlocks        Returned occupancy\n @param [in]  f                Kernel function(hipFunction_t) for which occupancy is calulated\n @param [in]  blockSize        Block size the kernel is intended to be launched with\n @param [in]  dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block\n @param [in]  flags            Extra flags for occupancy calculation (only default supported)"]
+    #[doc = " @brief Returns occupancy for a device function.\n\n @param [out] numBlocks        Returned occupancy\n @param [in]  f                Kernel function(hipFunction_t) for which occupancy is calulated\n @param [in]  blockSize        Block size the kernel is intended to be launched with\n @param [in]  dynSharedMemPerBlk Dynamic shared memory usage (in bytes) intended for each block\n @param [in]  flags            Extra flags for occupancy calculation (only default supported)\n @returns  #hipSuccess, #hipErrorInvalidValue"]
     pub fn hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
         numBlocks: *mut ::std::os::raw::c_int,
         f: hipFunction_t,
@@ -5290,7 +5575,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Returns occupancy for a device function.\n\n @param [out] numBlocks        Returned occupancy\n @param [in]  func             Kernel function for which occupancy is calulated\n @param [in]  blockSize        Block size the kernel is intended to be launched with\n @param [in]  dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block"]
+    #[doc = " @brief Returns occupancy for a device function.\n\n @param [out] numBlocks        Returned occupancy\n @param [in]  f                Kernel function for which occupancy is calulated\n @param [in]  blockSize        Block size the kernel is intended to be launched with\n @param [in]  dynSharedMemPerBlk Dynamic shared memory usage (in bytes) intended for each block\n @returns  #hipSuccess, #hipErrorInvalidDeviceFunction, #hipErrorInvalidValue"]
     pub fn hipOccupancyMaxActiveBlocksPerMultiprocessor(
         numBlocks: *mut ::std::os::raw::c_int,
         f: *const ::std::os::raw::c_void,
@@ -5300,7 +5585,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Returns occupancy for a device function.\n\n @param [out] numBlocks        Returned occupancy\n @param [in]  f                Kernel function for which occupancy is calulated\n @param [in]  blockSize        Block size the kernel is intended to be launched with\n @param [in]  dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block\n @param [in]  flags            Extra flags for occupancy calculation (currently ignored)"]
+    #[doc = " @brief Returns occupancy for a device function.\n\n @param [out] numBlocks        Returned occupancy\n @param [in]  f                Kernel function for which occupancy is calulated\n @param [in]  blockSize        Block size the kernel is intended to be launched with\n @param [in]  dynSharedMemPerBlk Dynamic shared memory usage (in bytes) intended for each block\n @param [in]  flags            Extra flags for occupancy calculation (currently ignored)\n @returns  #hipSuccess, #hipErrorInvalidDeviceFunction, #hipErrorInvalidValue"]
     pub fn hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
         numBlocks: *mut ::std::os::raw::c_int,
         f: *const ::std::os::raw::c_void,
@@ -5311,7 +5596,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief determine the grid and block sizes to achieves maximum occupancy for a kernel\n\n @param [out] gridSize           minimum grid size for maximum potential occupancy\n @param [out] blockSize          block size for maximum potential occupancy\n @param [in]  f                  kernel function for which occupancy is calulated\n @param [in]  dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block\n @param [in]  blockSizeLimit     the maximum block size for the kernel, use 0 for no limit\n\n Please note, HIP does not support kernel launch with total work items defined in dimension with\n size gridDim x blockDim >= 2^32.\n\n @returns hipSuccess, hipInvalidDevice, hipErrorInvalidValue"]
+    #[doc = " @brief determine the grid and block sizes to achieves maximum occupancy for a kernel\n\n @param [out] gridSize           minimum grid size for maximum potential occupancy\n @param [out] blockSize          block size for maximum potential occupancy\n @param [in]  f                  kernel function for which occupancy is calulated\n @param [in]  dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block\n @param [in]  blockSizeLimit     the maximum block size for the kernel, use 0 for no limit\n\n Please note, HIP does not support kernel launch with total work items defined in dimension with\n size gridDim x blockDim >= 2^32.\n\n @returns #hipSuccess, #hipErrorInvalidValue"]
     pub fn hipOccupancyMaxPotentialBlockSize(
         gridSize: *mut ::std::os::raw::c_int,
         blockSize: *mut ::std::os::raw::c_int,
@@ -5322,17 +5607,17 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Start recording of profiling information.\n When using this API, start the profiler with profiling disabled.  (--startdisabled)\n @warning  hipProfilerStart API is deprecated, use roctracer/rocTX instead."]
+    #[doc = " @brief Start recording of profiling information\n When using this API, start the profiler with profiling disabled.  (--startdisabled)\n @returns  #hipErrorNotSupported\n @warning : hipProfilerStart API is deprecated, use roctracer/rocTX instead."]
     pub fn hipProfilerStart() -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Stop recording of profiling information.\n When using this API, start the profiler with profiling disabled.  (--startdisabled)\n @warning  hipProfilerStop API is deprecated, use roctracer/rocTX instead."]
+    #[doc = " @brief Stop recording of profiling information.\n When using this API, start the profiler with profiling disabled.  (--startdisabled)\n @returns  #hipErrorNotSupported\n @warning  hipProfilerStart API is deprecated, use roctracer/rocTX instead."]
     pub fn hipProfilerStop() -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @}\n/\n/**\n-------------------------------------------------------------------------------------------------\n-------------------------------------------------------------------------------------------------\n  @defgroup Clang Launch API to support the triple-chevron syntax\n  @{\n  This section describes the API to support the triple-chevron syntax.\n/\n/**\n @brief Configure a kernel launch.\n\n @param [in] gridDim   grid dimension specified as multiple of blockDim.\n @param [in] blockDim  block dimensions specified in work-items\n @param [in] sharedMem Amount of dynamic shared memory to allocate for this kernel. The\n HIP-Clang compiler provides support for extern shared declarations.\n @param [in] stream    Stream where the kernel should be dispatched.  May be 0, in which case the\n default stream is used with associated synchronization rules.\n\n Please note, HIP does not support kernel launch with total work items defined in dimension with\n size gridDim x blockDim >= 2^32.\n\n @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue\n"]
+    #[doc = " @}\n/\n/**\n-------------------------------------------------------------------------------------------------\n-------------------------------------------------------------------------------------------------\n  @defgroup Clang Launch API to support the triple-chevron syntax\n  @{\n  This section describes the API to support the triple-chevron syntax.\n/\n/**\n @brief Configure a kernel launch.\n\n @param [in] gridDim   grid dimension specified as multiple of blockDim.\n @param [in] blockDim  block dimensions specified in work-items\n @param [in] sharedMem Amount of dynamic shared memory to allocate for this kernel. The\n HIP-Clang compiler provides support for extern shared declarations.\n @param [in] stream    Stream where the kernel should be dispatched.  May be 0, in which case the\n default stream is used with associated synchronization rules.\n\n Please note, HIP does not support kernel launch with total work items defined in dimension with\n size gridDim x blockDim >= 2^32.\n\n @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue\n"]
     pub fn hipConfigureCall(
         gridDim: dim3,
         blockDim: dim3,
@@ -5342,7 +5627,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Set a kernel argument.\n\n @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue\n\n @param [in] arg    Pointer the argument in host memory.\n @param [in] size   Size of the argument.\n @param [in] offset Offset of the argument on the argument stack.\n"]
+    #[doc = " @brief Set a kernel argument.\n\n @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue\n\n @param [in] arg    Pointer the argument in host memory.\n @param [in] size   Size of the argument.\n @param [in] offset Offset of the argument on the argument stack.\n"]
     pub fn hipSetupArgument(
         arg: *const ::std::os::raw::c_void,
         size: usize,
@@ -5351,12 +5636,12 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Launch a kernel.\n\n @param [in] func Kernel to launch.\n\n @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue\n"]
+    #[doc = " @brief Launch a kernel.\n\n @param [in] func Kernel to launch.\n\n @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue\n"]
     pub fn hipLaunchByPtr(func: *const ::std::os::raw::c_void) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief C compliant kernel launch API\n\n @param [in] function_address - kernel stub function pointer.\n @param [in] numBlocks - number of blocks\n @param [in] dimBlocks - dimension of a block\n @param [in] args - kernel arguments\n @param [in] sharedMemBytes - Amount of dynamic shared memory to allocate for this kernel. The\n HIP-Clang compiler provides support for extern shared declarations.\n @param [in] stream - Stream where the kernel should be dispatched.  May be 0, in which case th\n  default stream is used with associated synchronization rules.\n\n @returns #hipSuccess, #hipErrorInvalidValue, hipInvalidDevice\n"]
+    #[doc = " @brief C compliant kernel launch API\n\n @param [in] function_address - kernel stub function pointer.\n @param [in] numBlocks - number of blocks\n @param [in] dimBlocks - dimension of a block\n @param [in] args - kernel arguments\n @param [in] sharedMemBytes - Amount of dynamic shared memory to allocate for this kernel. The\n HIP-Clang compiler provides support for extern shared declarations.\n @param [in] stream - Stream where the kernel should be dispatched.  May be 0, in which case th\n  default stream is used with associated synchronization rules.\n\n @returns #hipSuccess, #hipErrorInvalidValue\n"]
     pub fn hipLaunchKernel(
         function_address: *const ::std::os::raw::c_void,
         numBlocks: dim3,
@@ -5382,7 +5667,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Launches kernel from the pointer address, with arguments and shared memory on stream.\n\n @param [in] function_address pointer to the Kernel to launch.\n @param [in] numBlocks number of blocks.\n @param [in] dimBlocks dimension of a block.\n @param [in] args pointer to kernel arguments.\n @param [in] sharedMemBytes  Amount of dynamic shared memory to allocate for this kernel.\n HIP-Clang compiler provides support for extern shared declarations.\n @param [in] stream  Stream where the kernel should be dispatched.\n May be 0, in which case the default stream is used with associated synchronization rules.\n @param [in] startEvent  If non-null, specified event will be updated to track the start time of\n the kernel launch. The event must be created before calling this API.\n @param [in] stopEvent  If non-null, specified event will be updated to track the stop time of\n the kernel launch. The event must be created before calling this API.\n @param [in] flags. The value of hipExtAnyOrderLaunch, signifies if kernel can be\n launched in any order.\n @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue.\n"]
+    #[doc = " @brief Launches kernel from the pointer address, with arguments and shared memory on stream.\n\n @param [in] function_address pointer to the Kernel to launch.\n @param [in] numBlocks number of blocks.\n @param [in] dimBlocks dimension of a block.\n @param [in] args pointer to kernel arguments.\n @param [in] sharedMemBytes  Amount of dynamic shared memory to allocate for this kernel.\n HIP-Clang compiler provides support for extern shared declarations.\n @param [in] stream  Stream where the kernel should be dispatched.\n May be 0, in which case the default stream is used with associated synchronization rules.\n @param [in] startEvent  If non-null, specified event will be updated to track the start time of\n the kernel launch. The event must be created before calling this API.\n @param [in] stopEvent  If non-null, specified event will be updated to track the stop time of\n the kernel launch. The event must be created before calling this API.\n @param [in] flags  The value of hipExtAnyOrderLaunch, signifies if kernel can be\n launched in any order.\n @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue.\n"]
     pub fn hipExtLaunchKernel(
         function_address: *const ::std::os::raw::c_void,
         numBlocks: dim3,
@@ -5546,6 +5831,22 @@ extern "C" {
         symbol: *const ::std::os::raw::c_void,
     ) -> hipError_t;
 }
+extern "C" {
+    #[must_use]
+    #[doc = " @brief Gets the border color used by a texture reference.\n\n @param [out] pBorderColor  Returned Type and Value of RGBA color.\n @param [in] texRef  Texture reference.\n\n @returns #hipSuccess, #hipErrorInvalidValue\n @warning This API is deprecated.\n"]
+    pub fn hipTexRefGetBorderColor(
+        pBorderColor: *mut f32,
+        texRef: *const textureReference,
+    ) -> hipError_t;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @brief Gets the array bound to a texture reference.\n\n\n @param [in] pArray  Returned array.\n @param [in] texRef  texture reference.\n\n @returns #hipSuccess, #hipErrorInvalidValue\n @warning This API is deprecated.\n"]
+    pub fn hipTexRefGetArray(
+        pArray: *mut hipArray_t,
+        texRef: *const textureReference,
+    ) -> hipError_t;
+}
 extern "C" {
     #[must_use]
     #[doc = " @brief Sets address mode for a texture reference.\n\n @param [in] texRef  texture reference.\n @param [in] dim  Dimension of the texture.\n @param [in] am  Value of the texture address mode.\n\n @returns #hipSuccess, #hipErrorInvalidValue\n @warning This API is deprecated.\n"]
@@ -5591,7 +5892,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Binds a memory area to a texture.\n\n @param [in] offset  Offset in bytes.\n @param [in] tex  Texture to bind.\n @param [in] devPtr  Pointer of memory on the device.\n @param [in] desc  Pointer of channel format descriptor.\n @param [in] size  Size of memory in bites.\n\n @warning This API is deprecated.\n"]
+    #[doc = " @brief Binds a memory area to a texture.\n\n @param [in] offset  Offset in bytes.\n @param [in] tex  Texture to bind.\n @param [in] devPtr  Pointer of memory on the device.\n @param [in] desc  Pointer of channel format descriptor.\n @param [in] size  Size of memory in bites.\n\n @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported\n\n @warning This API is deprecated.\n"]
     pub fn hipBindTexture(
         offset: *mut usize,
         tex: *const textureReference,
@@ -5602,7 +5903,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Binds a 2D memory area to a texture.\n\n @param [in] offset  Offset in bytes.\n @param [in] tex  Texture to bind.\n @param [in] devPtr  Pointer of 2D memory area on the device.\n @param [in] desc  Pointer of channel format descriptor.\n @param [in] width  Width in texel units.\n @param [in] height  Height in texel units.\n @param [in] pitch  Pitch in bytes.\n\n @warning This API is deprecated.\n"]
+    #[doc = " @brief Binds a 2D memory area to a texture.\n\n @param [in] offset  Offset in bytes.\n @param [in] tex  Texture to bind.\n @param [in] devPtr  Pointer of 2D memory area on the device.\n @param [in] desc  Pointer of channel format descriptor.\n @param [in] width  Width in texel units.\n @param [in] height  Height in texel units.\n @param [in] pitch  Pitch in bytes.\n\n @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported\n\n @warning This API is deprecated.\n"]
     pub fn hipBindTexture2D(
         offset: *mut usize,
         tex: *const textureReference,
@@ -5615,7 +5916,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Binds a memory area to a texture.\n\n @param [in] tex  Pointer of texture reference.\n @param [in] array  Array to bind.\n @param [in] desc  Pointer of channel format descriptor.\n\n @warning This API is deprecated.\n"]
+    #[doc = " @brief Binds a memory area to a texture.\n\n @param [in] tex  Pointer of texture reference.\n @param [in] array  Array to bind.\n @param [in] desc  Pointer of channel format descriptor.\n\n @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported\n\n @warning This API is deprecated.\n"]
     pub fn hipBindTextureToArray(
         tex: *const textureReference,
         array: hipArray_const_t,
@@ -5624,7 +5925,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Get the offset of the alignment in a texture.\n\n @param [in] offset  Offset in bytes.\n @param [in] texref  Pointer of texture reference.\n\n @warning This API is deprecated.\n"]
+    #[doc = " @brief Get the offset of the alignment in a texture.\n\n @param [in] offset  Offset in bytes.\n @param [in] texref  Pointer of texture reference.\n\n @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported\n\n @warning This API is deprecated.\n"]
     pub fn hipGetTextureAlignmentOffset(
         offset: *mut usize,
         texref: *const textureReference,
@@ -5632,12 +5933,12 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Unbinds a texture.\n\n @param [in] tex  Texture to unbind.\n\n @warning This API is deprecated.\n"]
+    #[doc = " @brief Unbinds a texture.\n\n @param [in] tex  Texture to unbind.\n\n @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported\n\n @warning This API is deprecated.\n"]
     pub fn hipUnbindTexture(tex: *const textureReference) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Gets the the address for a texture reference.\n\n @param [out] dev_ptr  Pointer of device address.\n @param [in] texRef  Pointer of texture reference.\n\n @warning This API is deprecated.\n"]
+    #[doc = " @brief Gets the the address for a texture reference.\n\n @param [out] dev_ptr  Pointer of device address.\n @param [in] texRef  Pointer of texture reference.\n\n @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported\n\n @warning This API is deprecated.\n"]
     pub fn hipTexRefGetAddress(
         dev_ptr: *mut hipDeviceptr_t,
         texRef: *const textureReference,
@@ -5645,7 +5946,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Gets the address mode for a texture reference.\n\n @param [out] pam  Pointer of address mode.\n @param [in] texRef  Pointer of texture reference.\n @param [in] dim  Dimension.\n\n @warning This API is deprecated.\n"]
+    #[doc = " @brief Gets the address mode for a texture reference.\n\n @param [out] pam  Pointer of address mode.\n @param [in] texRef  Pointer of texture reference.\n @param [in] dim  Dimension.\n\n @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported\n\n @warning This API is deprecated.\n"]
     pub fn hipTexRefGetAddressMode(
         pam: *mut hipTextureAddressMode,
         texRef: *const textureReference,
@@ -5654,7 +5955,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Gets filter mode for a texture reference.\n\n @param [out] pfm  Pointer of filter mode.\n @param [in] texRef  Pointer of texture reference.\n\n @warning This API is deprecated.\n"]
+    #[doc = " @brief Gets filter mode for a texture reference.\n\n @param [out] pfm  Pointer of filter mode.\n @param [in] texRef  Pointer of texture reference.\n\n @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported\n\n @warning This API is deprecated.\n"]
     pub fn hipTexRefGetFilterMode(
         pfm: *mut hipTextureFilterMode,
         texRef: *const textureReference,
@@ -5662,7 +5963,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Gets flags for a texture reference.\n\n @param [out] pFlags  Pointer of flags.\n @param [in] texRef  Pointer of texture reference.\n\n @warning This API is deprecated.\n"]
+    #[doc = " @brief Gets flags for a texture reference.\n\n @param [out] pFlags  Pointer of flags.\n @param [in] texRef  Pointer of texture reference.\n\n @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported\n\n @warning This API is deprecated.\n"]
     pub fn hipTexRefGetFlags(
         pFlags: *mut ::std::os::raw::c_uint,
         texRef: *const textureReference,
@@ -5670,7 +5971,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Gets texture format for a texture reference.\n\n @param [out] pFormat  Pointer of the format.\n @param [out] pNumChannels  Pointer of number of channels.\n @param [in] texRef  Pointer of texture reference.\n\n @warning This API is deprecated.\n"]
+    #[doc = " @brief Gets texture format for a texture reference.\n\n @param [out] pFormat  Pointer of the format.\n @param [out] pNumChannels  Pointer of number of channels.\n @param [in] texRef  Pointer of texture reference.\n\n @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported\n\n @warning This API is deprecated.\n"]
     pub fn hipTexRefGetFormat(
         pFormat: *mut hipArray_Format,
         pNumChannels: *mut ::std::os::raw::c_int,
@@ -5679,7 +5980,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Gets the maximum anisotropy for a texture reference.\n\n @param [out] pmaxAnsio  Pointer of the maximum anisotropy.\n @param [in] texRef  Pointer of texture reference.\n\n @warning This API is deprecated.\n"]
+    #[doc = " @brief Gets the maximum anisotropy for a texture reference.\n\n @param [out] pmaxAnsio  Pointer of the maximum anisotropy.\n @param [in] texRef  Pointer of texture reference.\n\n @returns #hipErrorInvalidValue, #hipErrorNotSupported\n\n @warning This API is deprecated.\n"]
     pub fn hipTexRefGetMaxAnisotropy(
         pmaxAnsio: *mut ::std::os::raw::c_int,
         texRef: *const textureReference,
@@ -5687,7 +5988,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Gets the mipmap filter mode for a texture reference.\n\n @param [out] pfm  Pointer of the mipmap filter mode.\n @param [in] texRef  Pointer of texture reference.\n\n @warning This API is deprecated.\n"]
+    #[doc = " @brief Gets the mipmap filter mode for a texture reference.\n\n @param [out] pfm  Pointer of the mipmap filter mode.\n @param [in] texRef  Pointer of texture reference.\n\n @returns #hipErrorInvalidValue, #hipErrorNotSupported\n\n @warning This API is deprecated.\n"]
     pub fn hipTexRefGetMipmapFilterMode(
         pfm: *mut hipTextureFilterMode,
         texRef: *const textureReference,
@@ -5695,7 +5996,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Gets the mipmap level bias for a texture reference.\n\n @param [out] pbias  Pointer of the mipmap level bias.\n @param [in] texRef  Pointer of texture reference.\n\n @warning This API is deprecated.\n"]
+    #[doc = " @brief Gets the mipmap level bias for a texture reference.\n\n @param [out] pbias  Pointer of the mipmap level bias.\n @param [in] texRef  Pointer of texture reference.\n\n @returns #hipErrorInvalidValue, #hipErrorNotSupported\n\n @warning This API is deprecated.\n"]
     pub fn hipTexRefGetMipmapLevelBias(
         pbias: *mut f32,
         texRef: *const textureReference,
@@ -5703,7 +6004,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Gets the minimum and maximum mipmap level clamps for a texture reference.\n\n @param [out] pminMipmapLevelClamp  Pointer of the minimum mipmap level clamp.\n @param [out] pmaxMipmapLevelClamp  Pointer of the maximum mipmap level clamp.\n @param [in] texRef  Pointer of texture reference.\n\n @warning This API is deprecated.\n"]
+    #[doc = " @brief Gets the minimum and maximum mipmap level clamps for a texture reference.\n\n @param [out] pminMipmapLevelClamp  Pointer of the minimum mipmap level clamp.\n @param [out] pmaxMipmapLevelClamp  Pointer of the maximum mipmap level clamp.\n @param [in] texRef  Pointer of texture reference.\n\n @returns #hipErrorInvalidValue, #hipErrorNotSupported\n\n @warning This API is deprecated.\n"]
     pub fn hipTexRefGetMipmapLevelClamp(
         pminMipmapLevelClamp: *mut f32,
         pmaxMipmapLevelClamp: *mut f32,
@@ -5712,7 +6013,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Gets the mipmapped array bound to a texture reference.\n\n @param [out] pArray  Pointer of the mipmapped array.\n @param [in] texRef  Pointer of texture reference.\n\n @warning This API is deprecated.\n"]
+    #[doc = " @brief Gets the mipmapped array bound to a texture reference.\n\n @param [out] pArray  Pointer of the mipmapped array.\n @param [in] texRef  Pointer of texture reference.\n\n @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported\n\n @warning This API is deprecated.\n"]
     pub fn hipTexRefGetMipMappedArray(
         pArray: *mut hipMipmappedArray_t,
         texRef: *const textureReference,
@@ -5720,7 +6021,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Sets an bound address for a texture reference.\n\n @param [out] ByteOffset  Pointer of the offset in bytes.\n @param [in] texRef  Pointer of texture reference.\n @param [in] dptr  Pointer of device address to bind.\n @param [in] bytes  Size in bytes.\n\n @warning This API is deprecated.\n"]
+    #[doc = " @brief Sets an bound address for a texture reference.\n\n @param [out] ByteOffset  Pointer of the offset in bytes.\n @param [in] texRef  Pointer of texture reference.\n @param [in] dptr  Pointer of device address to bind.\n @param [in] bytes  Size in bytes.\n\n @returns #hipSuccess, #hipErrorInvalidValue\n\n @warning This API is deprecated.\n"]
     pub fn hipTexRefSetAddress(
         ByteOffset: *mut usize,
         texRef: *mut textureReference,
@@ -5730,7 +6031,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Set a bind an address as a 2D texture reference.\n\n @param [in] texRef  Pointer of texture reference.\n @param [in] desc  Pointer of array descriptor.\n @param [in] dptr  Pointer of device address to bind.\n @param [in] Pitch  Pitch in bytes.\n\n @warning This API is deprecated.\n"]
+    #[doc = " @brief Set a bind an address as a 2D texture reference.\n\n @param [in] texRef  Pointer of texture reference.\n @param [in] desc  Pointer of array descriptor.\n @param [in] dptr  Pointer of device address to bind.\n @param [in] Pitch  Pitch in bytes.\n\n @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported\n\n @warning This API is deprecated.\n"]
     pub fn hipTexRefSetAddress2D(
         texRef: *mut textureReference,
         desc: *const HIP_ARRAY_DESCRIPTOR,
@@ -5740,7 +6041,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Sets the maximum anisotropy for a texture reference.\n\n @param [in] texRef  Pointer of texture reference.\n @param [out] maxAniso  Value of the maximum anisotropy.\n\n @warning This API is deprecated.\n"]
+    #[doc = " @brief Sets the maximum anisotropy for a texture reference.\n\n @param [in] texRef  Pointer of texture reference.\n @param [out] maxAniso  Value of the maximum anisotropy.\n\n @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported\n\n @warning This API is deprecated.\n"]
     pub fn hipTexRefSetMaxAnisotropy(
         texRef: *mut textureReference,
         maxAniso: ::std::os::raw::c_uint,
@@ -5748,7 +6049,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Sets border color for a texture reference.\n\n @param [in] texRef  Pointer of texture reference.\n @param [in] pBorderColor  Pointer of border color.\n\n @warning This API is deprecated.\n"]
+    #[doc = " @brief Sets border color for a texture reference.\n\n @param [in] texRef  Pointer of texture reference.\n @param [in] pBorderColor  Pointer of border color.\n\n @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported\n\n @warning This API is deprecated.\n"]
     pub fn hipTexRefSetBorderColor(
         texRef: *mut textureReference,
         pBorderColor: *mut f32,
@@ -5756,7 +6057,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Sets mipmap filter mode for a texture reference.\n\n @param [in] texRef  Pointer of texture reference.\n @param [in] fm  Value of filter mode.\n\n @warning This API is deprecated.\n"]
+    #[doc = " @brief Sets mipmap filter mode for a texture reference.\n\n @param [in] texRef  Pointer of texture reference.\n @param [in] fm  Value of filter mode.\n\n @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported\n\n @warning This API is deprecated.\n"]
     pub fn hipTexRefSetMipmapFilterMode(
         texRef: *mut textureReference,
         fm: hipTextureFilterMode,
@@ -5764,12 +6065,12 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Sets mipmap level bias for a texture reference.\n\n @param [in] texRef  Pointer of texture reference.\n @param [in] bias  Value of mipmap bias.\n\n @warning This API is deprecated.\n"]
+    #[doc = " @brief Sets mipmap level bias for a texture reference.\n\n @param [in] texRef  Pointer of texture reference.\n @param [in] bias  Value of mipmap bias.\n\n @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported\n\n @warning This API is deprecated.\n"]
     pub fn hipTexRefSetMipmapLevelBias(texRef: *mut textureReference, bias: f32) -> hipError_t;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Sets mipmap level clamp for a texture reference.\n\n @param [in] texRef  Pointer of texture reference.\n @param [in] minMipMapLevelClamp  Value of minimum mipmap level clamp.\n @param [in] maxMipMapLevelClamp  Value of maximum mipmap level clamp.\n\n @warning This API is deprecated.\n"]
+    #[doc = " @brief Sets mipmap level clamp for a texture reference.\n\n @param [in] texRef  Pointer of texture reference.\n @param [in] minMipMapLevelClamp  Value of minimum mipmap level clamp.\n @param [in] maxMipMapLevelClamp  Value of maximum mipmap level clamp.\n\n @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported\n\n @warning This API is deprecated.\n"]
     pub fn hipTexRefSetMipmapLevelClamp(
         texRef: *mut textureReference,
         minMipMapLevelClamp: f32,
@@ -5778,7 +6079,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Binds mipmapped array to a texture reference.\n\n @param [in] texRef  Pointer of texture reference to bind.\n @param [in] mipmappedArray  Pointer of mipmapped array to bind.\n @param [in] Flags  Flags should be set as HIP_TRSA_OVERRIDE_FORMAT, as a valid value.\n\n @warning This API is deprecated.\n"]
+    #[doc = " @brief Binds mipmapped array to a texture reference.\n\n @param [in] texRef  Pointer of texture reference to bind.\n @param [in] mipmappedArray  Pointer of mipmapped array to bind.\n @param [in] Flags  Flags should be set as HIP_TRSA_OVERRIDE_FORMAT, as a valid value.\n\n @returns #hipSuccess, #hipErrorInvalidValue\n\n @warning This API is deprecated.\n"]
     pub fn hipTexRefSetMipmappedArray(
         texRef: *mut textureReference,
         mipmappedArray: *mut hipMipmappedArray,
@@ -5786,22 +6087,22 @@ extern "C" {
     ) -> hipError_t;
 }
 extern "C" {
-    #[doc = "  @defgroup Callback Callback Activity APIs\n  @{\n  This section describes the callback/Activity of HIP runtime API.\n/\n/**\n @brief Returns HIP API name by ID.\n\n @param [in] id ID of HIP API\n\n @returns hipSuccess, hipErrorInvalidValue\n"]
+    #[doc = "  @defgroup Callback Callback Activity APIs\n  @{\n  This section describes the callback/Activity of HIP runtime API.\n/\n/**\n @brief Returns HIP API name by ID.\n\n @param [in] id ID of HIP API\n\n @returns #hipSuccess, #hipErrorInvalidValue\n"]
     pub fn hipApiName(id: u32) -> *const ::std::os::raw::c_char;
 }
 extern "C" {
-    #[doc = " @brief Returns kernel name reference by function name.\n\n @param [in] f name of function\n\n @returns hipSuccess, hipErrorInvalidValue\n"]
+    #[doc = " @brief Returns kernel name reference by function name.\n\n @param [in] f Name of function\n\n @returns #hipSuccess, #hipErrorInvalidValue\n"]
     pub fn hipKernelNameRef(f: hipFunction_t) -> *const ::std::os::raw::c_char;
 }
 extern "C" {
-    #[doc = " @brief Retrives kernel for a given host pointer, unless stated otherwise.\n\n @param [in] hostFunction Pointer of host function.\n @param [in] stream stream the kernel is executed on.\n\n @returns hipSuccess, hipErrorInvalidValue\n"]
+    #[doc = " @brief Retrives kernel for a given host pointer, unless stated otherwise.\n\n @param [in] hostFunction Pointer of host function.\n @param [in] stream Stream the kernel is executed on.\n\n @returns #hipSuccess, #hipErrorInvalidValue\n"]
     pub fn hipKernelNameRefByPtr(
         hostFunction: *const ::std::os::raw::c_void,
         stream: hipStream_t,
     ) -> *const ::std::os::raw::c_char;
 }
 extern "C" {
-    #[doc = " @brief Returns device ID on the stream.\n\n @param [in] stream stream of device executed on.\n\n @returns hipSuccess, hipErrorInvalidValue\n"]
+    #[doc = " @brief Returns device ID on the stream.\n\n @param [in] stream Stream of device executed on.\n\n @returns #hipSuccess, #hipErrorInvalidValue\n"]
     pub fn hipGetStreamDeviceId(stream: hipStream_t) -> ::std::os::raw::c_int;
 }
 extern "C" {
@@ -5845,7 +6146,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Update the set of dependencies in a capturing stream\n\n @param [in] stream - Stream under capture.\n @param [in] dependencies - pointer to an array of nodes to Add/Replace.\n @param [in] numDependencies - size of the array in dependencies.\n @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorIllegalState\n\n @warning : This API is marked as beta, meaning, while this is feature complete,\n it is still open to changes and may have outstanding issues.\n"]
+    #[doc = " @brief Update the set of dependencies in a capturing stream\n\n @param [in] stream  Stream under capture.\n @param [in] dependencies  pointer to an array of nodes to Add/Replace.\n @param [in] numDependencies  size of the array in dependencies.\n @param [in] flags  Flag how to update dependency set. Should be one of value in enum\n #hipStreamUpdateCaptureDependenciesFlags\n @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorIllegalState\n\n @warning : This API is marked as beta, meaning, while this is feature complete,\n it is still open to changes and may have outstanding issues.\n"]
     pub fn hipStreamUpdateCaptureDependencies(
         stream: hipStream_t,
         dependencies: *mut hipGraphNode_t,
@@ -6039,6 +6340,18 @@ extern "C" {
         pNodeParams: *const hipKernelNodeParams,
     ) -> hipError_t;
 }
+extern "C" {
+    #[must_use]
+    #[doc = " @brief Creates a memcpy node and adds it to a graph.\n\n @param [out] phGraphNode - pointer to graph node to create.\n @param [in] hGraph - instance of graph to add the created node.\n @param [in] dependencies - const pointer to the dependencies on the memcpy execution node.\n @param [in] numDependencies - the number of the dependencies.\n @param [in] copyParams - const pointer to the parameters for the memory copy.\n @param [in] ctx - cotext related to current device.\n @returns #hipSuccess, #hipErrorInvalidValue\n @warning : This API is marked as beta, meaning, while this is feature complete,\n it is still open to changes and may have outstanding issues."]
+    pub fn hipDrvGraphAddMemcpyNode(
+        phGraphNode: *mut hipGraphNode_t,
+        hGraph: hipGraph_t,
+        dependencies: *const hipGraphNode_t,
+        numDependencies: usize,
+        copyParams: *const HIP_MEMCPY3D,
+        ctx: hipCtx_t,
+    ) -> hipError_t;
+}
 extern "C" {
     #[must_use]
     #[doc = " @brief Creates a memcpy node and adds it to a graph.\n\n @param [out] pGraphNode - pointer to graph node to create.\n @param [in] graph - instance of graph to add the created node.\n @param [in] pDependencies - const pointer to the dependencies on the memcpy execution node.\n @param [in] numDependencies - the number of the dependencies.\n @param [in] pCopyParams - const pointer to the parameters for the memory copy.\n @returns #hipSuccess, #hipErrorInvalidValue\n @warning : This API is marked as beta, meaning, while this is feature complete,\n it is still open to changes and may have outstanding issues."]
@@ -6444,7 +6757,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @brief Free unused memory on specific device used for graph back to OS.\n\n @param [in] device - device the memory is used for graphs\n @warning : This API is marked as beta, meaning, while this is feature complete,\n it is still open to changes and may have outstanding issues."]
+    #[doc = " @brief Free unused memory on specific device used for graph back to OS.\n\n @param [in] device - device the memory is used for graphs\n @returns #hipSuccess, #hipErrorInvalidDevice\n\n @warning : This API is marked as beta, meaning, while this is feature complete,\n it is still open to changes and may have outstanding issues."]
     pub fn hipDeviceGraphMemTrim(device: ::std::os::raw::c_int) -> hipError_t;
 }
 extern "C" {
@@ -6571,7 +6884,7 @@ extern "C" {
     #[doc = " @brief Returns external semaphore signal node params.\n\n @param [in]   hNode       - Node from the graph from which graphExec was instantiated.\n @param [out]  params_out  - Pointer to params.\n @returns #hipSuccess, #hipErrorInvalidValue\n @warning : This API is marked as beta, meaning, while this is feature complete,\n it is still open to changes and may have outstanding issues."]
     pub fn hipGraphExternalSemaphoresSignalNodeGetParams(
         hNode: hipGraphNode_t,
-        params_out: *const hipExternalSemaphoreSignalNodeParams,
+        params_out: *mut hipExternalSemaphoreSignalNodeParams,
     ) -> hipError_t;
 }
 extern "C" {
@@ -6579,7 +6892,7 @@ extern "C" {
     #[doc = " @brief Returns external semaphore wait node params.\n\n @param [in]   hNode       - Node from the graph from which graphExec was instantiated.\n @param [out]  params_out  - Pointer to params.\n @returns #hipSuccess, #hipErrorInvalidValue\n @warning : This API is marked as beta, meaning, while this is feature complete,\n it is still open to changes and may have outstanding issues."]
     pub fn hipGraphExternalSemaphoresWaitNodeGetParams(
         hNode: hipGraphNode_t,
-        params_out: *const hipExternalSemaphoreWaitNodeParams,
+        params_out: *mut hipExternalSemaphoreWaitNodeParams,
     ) -> hipError_t;
 }
 extern "C" {
@@ -6600,6 +6913,18 @@ extern "C" {
         nodeParams: *const hipExternalSemaphoreWaitNodeParams,
     ) -> hipError_t;
 }
+extern "C" {
+    #[must_use]
+    #[doc = " @brief Creates a memset node and adds it to a graph.\n\n @param [out] phGraphNode - pointer to graph node to create.\n @param [in] hGraph - instance of graph to add the created node to.\n @param [in] dependencies - const pointer to the dependencies on the memset execution node.\n @param [in] numDependencies - number of the dependencies.\n @param [in] memsetParams - const pointer to the parameters for the memory set.\n @param [in] ctx - cotext related to current device.\n @returns #hipSuccess, #hipErrorInvalidValue\n @warning : This API is marked as beta, meaning, while this is feature complete,\n it is still open to changes and may have outstanding issues."]
+    pub fn hipDrvGraphAddMemsetNode(
+        phGraphNode: *mut hipGraphNode_t,
+        hGraph: hipGraph_t,
+        dependencies: *const hipGraphNode_t,
+        numDependencies: usize,
+        memsetParams: *const HIP_MEMSET_NODE_PARAMS,
+        ctx: hipCtx_t,
+    ) -> hipError_t;
+}
 extern "C" {
     #[must_use]
     #[doc = " @brief Frees an address range reservation made via hipMemAddressReserve\n\n @param [in] devPtr - starting address of the range.\n @param [in] size - size of the range.\n @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported\n @warning : This API is marked as beta, meaning, while this is feature complete,\n it is still open to changes and may have outstanding issues.\n\n @note  This API is implemented on Linux, under development on Windows."]
@@ -6719,13 +7044,29 @@ extern "C" {
     #[doc = " @brief Unmap memory allocation of a given address range.\n\n @param [in] ptr - starting address of the range to unmap.\n @param [in] size - size of the virtual address range.\n @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported\n @warning : This API is marked as beta, meaning, while this is feature complete,\n it is still open to changes and may have outstanding issues.\n\n @note  This API is implemented on Linux, under development on Windows."]
     pub fn hipMemUnmap(ptr: *mut ::std::os::raw::c_void, size: usize) -> hipError_t;
 }
-#[doc = "-------------------------------------------------------------------------------------------------\n-------------------------------------------------------------------------------------------------\n  @defgroup GL OpenGL Interop\n  @{\n  This section describes the OpenGL and graphics interoperability functions of HIP runtime API.\n/\n/** GLuint as uint."]
+impl hipGLDeviceList {
+    #[doc = "< All hip devices used by current OpenGL context."]
+    pub const hipGLDeviceListAll: hipGLDeviceList = hipGLDeviceList(1);
+}
+impl hipGLDeviceList {
+    #[doc = "< Hip devices used by current OpenGL context in current\n< frame"]
+    pub const hipGLDeviceListCurrentFrame: hipGLDeviceList = hipGLDeviceList(2);
+}
+impl hipGLDeviceList {
+    #[doc = "< Hip devices used by current OpenGL context in next\n< frame."]
+    pub const hipGLDeviceListNextFrame: hipGLDeviceList = hipGLDeviceList(3);
+}
+#[repr(transparent)]
+#[doc = " HIP Devices used by current OpenGL Context."]
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct hipGLDeviceList(pub ::std::os::raw::c_int);
+#[doc = " GLuint as uint."]
 pub type GLuint = ::std::os::raw::c_uint;
 #[doc = " GLenum as uint."]
 pub type GLenum = ::std::os::raw::c_uint;
 extern "C" {
     #[must_use]
-    #[doc = " @brief Queries devices associated with the current OpenGL context.\n\n @param [out] pHipDeviceCount - Pointer of number of devices on the current GL context.\n @param [out] pHipDevices - Pointer of devices on the current OpenGL context.\n @param [in] hipDeviceCount - Size of device.\n @param [in] deviceList - The setting of devices. It could be either hipGLDeviceListCurrentFrame\n for the devices used to render the current frame, or hipGLDeviceListAll for all devices.\n The default setting is Invalid deviceList value.\n\n @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported\n"]
+    #[doc = "  @ingroup GL\n  @{\n\n/\n/**\n @brief Queries devices associated with the current OpenGL context.\n\n @param [out] pHipDeviceCount - Pointer of number of devices on the current GL context.\n @param [out] pHipDevices - Pointer of devices on the current OpenGL context.\n @param [in] hipDeviceCount - Size of device.\n @param [in] deviceList - The setting of devices. It could be either hipGLDeviceListCurrentFrame\n for the devices used to render the current frame, or hipGLDeviceListAll for all devices.\n The default setting is Invalid deviceList value.\n\n @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported\n"]
     pub fn hipGLGetDevices(
         pHipDeviceCount: *mut ::std::os::raw::c_uint,
         pHipDevices: *mut ::std::os::raw::c_int,
@@ -6983,7 +7324,7 @@ extern "C" {
 extern "C" {
     #[must_use]
     pub fn hipMemcpy2DToArray_spt(
-        dst: *mut hipArray,
+        dst: hipArray_t,
         wOffset: usize,
         hOffset: usize,
         src: *const ::std::os::raw::c_void,
@@ -7010,7 +7351,7 @@ extern "C" {
 extern "C" {
     #[must_use]
     pub fn hipMemcpy2DToArrayAsync_spt(
-        dst: *mut hipArray,
+        dst: hipArray_t,
         wOffset: usize,
         hOffset: usize,
         src: *const ::std::os::raw::c_void,
diff --git a/hipfft-sys/README b/hipfft-sys/README
index 7326df4..8df86af 100644
--- a/hipfft-sys/README
+++ b/hipfft-sys/README
@@ -1 +1 @@
-bindgen /opt/rocm/include/hipfft/hipfft.h -o src/hipfft.rs --no-layout-tests --default-enum-style=newtype --no-derive-debug --allowlist-function "hipfft.*" --must-use-type hipfftResult_t -- -I/opt/rocm/include -D__HIP_PLATFORM_AMD__
\ No newline at end of file
+bindgen $Env:HIP_PATH/include/hipfft/hipfft.h -o src/hipfft.rs --no-layout-tests --default-enum-style=newtype --no-derive-debug --allowlist-function "hipfft.*" --must-use-type hipfftResult_t -- -I"$Env:HIP_PATH/include" -D__HIP_PLATFORM_AMD__
\ No newline at end of file
diff --git a/hipfft-sys/src/hipfft.rs b/hipfft-sys/src/hipfft.rs
index bac893e..5add549 100644
--- a/hipfft-sys/src/hipfft.rs
+++ b/hipfft-sys/src/hipfft.rs
@@ -1,4 +1,4 @@
-/* automatically generated by rust-bindgen 0.66.1 */
+/* automatically generated by rust-bindgen 0.69.4 */
 
 #[repr(C)]
 #[derive(Copy, Clone)]
@@ -89,7 +89,7 @@ impl hipfftResult_t {
 #[doc = " @brief Result/status/error codes"]
 #[must_use]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct hipfftResult_t(pub ::std::os::raw::c_uint);
+pub struct hipfftResult_t(pub ::std::os::raw::c_int);
 #[doc = " @brief Result/status/error codes"]
 pub use self::hipfftResult_t as hipfftResult;
 impl hipfftType_t {
@@ -119,7 +119,7 @@ impl hipfftType_t {
 #[repr(transparent)]
 #[doc = " @brief Transform type\n  @details This type is used to declare the Fourier transform type that will be executed."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct hipfftType_t(pub ::std::os::raw::c_uint);
+pub struct hipfftType_t(pub ::std::os::raw::c_int);
 #[doc = " @brief Transform type\n  @details This type is used to declare the Fourier transform type that will be executed."]
 pub use self::hipfftType_t as hipfftType;
 impl hipfftLibraryPropertyType_t {
@@ -133,7 +133,7 @@ impl hipfftLibraryPropertyType_t {
 }
 #[repr(transparent)]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct hipfftLibraryPropertyType_t(pub ::std::os::raw::c_uint);
+pub struct hipfftLibraryPropertyType_t(pub ::std::os::raw::c_int);
 pub use self::hipfftLibraryPropertyType_t as hipfftLibraryPropertyType;
 #[repr(C)]
 #[derive(Copy, Clone)]
@@ -198,7 +198,7 @@ extern "C" {
     pub fn hipfftExtPlanScaleFactor(plan: hipfftHandle, scalefactor: f64) -> hipfftResult;
 }
 extern "C" {
-    #[doc = " @brief Initialize a new one-dimensional FFT plan.\n\n  @details Assumes that the plan has been created already, and\n  modifies the plan associated with the plan handle.\n\n  @param[in] plan Handle of the FFT plan.\n  @param[in] nx FFT length.\n  @param[in] type FFT type.\n  @param[in] batch Number of batched transforms to compute."]
+    #[doc = " @brief Initialize a new one-dimensional FFT plan.\n\n  @details Assumes that the plan has been created already, and\n  modifies the plan associated with the plan handle.\n\n  @param[in] plan Handle of the FFT plan.\n  @param[in] nx FFT length.\n  @param[in] type FFT type.\n  @param[in] batch Number of batched transforms to compute.\n  @param[out] workSize Pointer to work area size (returned value)."]
     pub fn hipfftMakePlan1d(
         plan: hipfftHandle,
         nx: ::std::os::raw::c_int,
@@ -262,7 +262,7 @@ extern "C" {
     ) -> hipfftResult;
 }
 extern "C" {
-    #[doc = " @brief Return an estimate of the work area size required for a 1D plan.\n\n  @param[in] nx Number of elements in the x-direction.\n  @param[in] type FFT type.\n  @param[out] workSize Pointer to work area size (returned value)."]
+    #[doc = " @brief Return an estimate of the work area size required for a 1D plan.\n\n  @param[in] nx Number of elements in the x-direction.\n  @param[in] type FFT type.\n  @param[in] batch Number of batched transforms to perform.\n  @param[out] workSize Pointer to work area size (returned value)."]
     pub fn hipfftEstimate1d(
         nx: ::std::os::raw::c_int,
         type_: hipfftType,
@@ -306,7 +306,7 @@ extern "C" {
     ) -> hipfftResult;
 }
 extern "C" {
-    #[doc = " @brief Return size of the work area size required for a 1D plan.\n\n  @param[in] plan Pointer to the FFT plan.\n  @param[in] nx Number of elements in the x-direction.\n  @param[in] type FFT type.\n  @param[out] workSize Pointer to work area size (returned value)."]
+    #[doc = " @brief Return size of the work area size required for a 1D plan.\n\n  @param[in] plan Pointer to the FFT plan.\n  @param[in] nx Number of elements in the x-direction.\n  @param[in] type FFT type.\n  @param[in] batch Number of batched transforms to perform.\n  @param[out] workSize Pointer to work area size (returned value)."]
     pub fn hipfftGetSize1d(
         plan: hipfftHandle,
         nx: ::std::os::raw::c_int,
@@ -370,7 +370,7 @@ extern "C" {
     ) -> hipfftResult;
 }
 extern "C" {
-    #[doc = " @brief Return size of the work area size required for a rank-dimensional plan.\n\n  @param[in] plan Pointer to the FFT plan."]
+    #[doc = " @brief Return size of the work area size required for a rank-dimensional plan.\n\n  @param[in] plan Pointer to the FFT plan.\n  @param[out] workSize Pointer to work area size (returned value)."]
     pub fn hipfftGetSize(plan: hipfftHandle, workSize: *mut usize) -> hipfftResult;
 }
 extern "C" {
diff --git a/hiprtc-sys/README b/hiprtc-sys/README
index 671719c..45317ee 100644
--- a/hiprtc-sys/README
+++ b/hiprtc-sys/README
@@ -1 +1 @@
-bindgen $Env:HIP_PATH/include/hip/hiprtc.h -o src/hiprtc.rs --no-layout-tests --default-enum-style=newtype --no-derive-debug --allowlist-function "hiprtc.*" --must-use-type hiprtcResult_t -- -I$Env:HIP_PATH/include -D__HIP_PLATFORM_AMD__
\ No newline at end of file
+bindgen $Env:HIP_PATH/include/hip/hiprtc.h -o src/hiprtc.rs --no-layout-tests --default-enum-style=newtype --no-derive-debug --allowlist-function "hiprtc.*" --must-use-type hiprtcResult_t -- -I"$Env:HIP_PATH/include" -D__HIP_PLATFORM_AMD__
\ No newline at end of file
diff --git a/hiprtc-sys/src/hiprtc.rs b/hiprtc-sys/src/hiprtc.rs
index a24fef4..66baf4a 100644
--- a/hiprtc-sys/src/hiprtc.rs
+++ b/hiprtc-sys/src/hiprtc.rs
@@ -57,39 +57,39 @@ impl hiprtcResult {
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
 pub struct hiprtcResult(pub ::std::os::raw::c_int);
 impl hiprtcJIT_option {
-    #[doc = "< Maximum registers"]
+    #[doc = "< Maximum registers may be used in a thread, passed to compiler"]
     pub const HIPRTC_JIT_MAX_REGISTERS: hiprtcJIT_option = hiprtcJIT_option(0);
 }
 impl hiprtcJIT_option {
-    #[doc = "< Thread per block"]
+    #[doc = "< Number of thread per block"]
     pub const HIPRTC_JIT_THREADS_PER_BLOCK: hiprtcJIT_option = hiprtcJIT_option(1);
 }
 impl hiprtcJIT_option {
-    #[doc = "< Time from aall clock"]
+    #[doc = "< Value for total wall clock time"]
     pub const HIPRTC_JIT_WALL_TIME: hiprtcJIT_option = hiprtcJIT_option(2);
 }
 impl hiprtcJIT_option {
-    #[doc = "< Log buffer info"]
+    #[doc = "< Pointer to the buffer with logged information"]
     pub const HIPRTC_JIT_INFO_LOG_BUFFER: hiprtcJIT_option = hiprtcJIT_option(3);
 }
 impl hiprtcJIT_option {
-    #[doc = "< Log buffer size in bytes"]
+    #[doc = "< Size of the buffer in bytes for logged info"]
     pub const HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES: hiprtcJIT_option = hiprtcJIT_option(4);
 }
 impl hiprtcJIT_option {
-    #[doc = "< Log buffer error"]
+    #[doc = "< Pointer to the buffer with logged error(s)"]
     pub const HIPRTC_JIT_ERROR_LOG_BUFFER: hiprtcJIT_option = hiprtcJIT_option(5);
 }
 impl hiprtcJIT_option {
-    #[doc = "< Log buffer size in bytes"]
+    #[doc = "< Size of the buffer in bytes for logged error(s)"]
     pub const HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES: hiprtcJIT_option = hiprtcJIT_option(6);
 }
 impl hiprtcJIT_option {
-    #[doc = "< Optimization level"]
+    #[doc = "< Value of optimization level for generated codes"]
     pub const HIPRTC_JIT_OPTIMIZATION_LEVEL: hiprtcJIT_option = hiprtcJIT_option(7);
 }
 impl hiprtcJIT_option {
-    #[doc = "<"]
+    #[doc = "< The target context, which is the default"]
     pub const HIPRTC_JIT_TARGET_FROM_HIPCONTEXT: hiprtcJIT_option = hiprtcJIT_option(8);
 }
 impl hiprtcJIT_option {
@@ -105,55 +105,55 @@ impl hiprtcJIT_option {
     pub const HIPRTC_JIT_GENERATE_DEBUG_INFO: hiprtcJIT_option = hiprtcJIT_option(11);
 }
 impl hiprtcJIT_option {
-    #[doc = "< Log verbose"]
+    #[doc = "< Generate log verbose"]
     pub const HIPRTC_JIT_LOG_VERBOSE: hiprtcJIT_option = hiprtcJIT_option(12);
 }
 impl hiprtcJIT_option {
-    #[doc = "< Generate line information"]
+    #[doc = "< Generate line number information"]
     pub const HIPRTC_JIT_GENERATE_LINE_INFO: hiprtcJIT_option = hiprtcJIT_option(13);
 }
 impl hiprtcJIT_option {
-    #[doc = "< Cache mode"]
+    #[doc = "< Set cache mode"]
     pub const HIPRTC_JIT_CACHE_MODE: hiprtcJIT_option = hiprtcJIT_option(14);
 }
 impl hiprtcJIT_option {
-    #[doc = "< New SM3X option"]
+    #[doc = "< @deprecated  New SM3X option."]
     pub const HIPRTC_JIT_NEW_SM3X_OPT: hiprtcJIT_option = hiprtcJIT_option(15);
 }
 impl hiprtcJIT_option {
-    #[doc = "< Fast compile"]
+    #[doc = "< Set fast compile"]
     pub const HIPRTC_JIT_FAST_COMPILE: hiprtcJIT_option = hiprtcJIT_option(16);
 }
 impl hiprtcJIT_option {
-    #[doc = "< Global symbol names"]
+    #[doc = "< Array of device symbol names to be relocated to the host"]
     pub const HIPRTC_JIT_GLOBAL_SYMBOL_NAMES: hiprtcJIT_option = hiprtcJIT_option(17);
 }
 impl hiprtcJIT_option {
-    #[doc = "< Global symbol address"]
+    #[doc = "< Array of host addresses to be relocated to the device"]
     pub const HIPRTC_JIT_GLOBAL_SYMBOL_ADDRESS: hiprtcJIT_option = hiprtcJIT_option(18);
 }
 impl hiprtcJIT_option {
-    #[doc = "< Global symbol count"]
+    #[doc = "< Number of symbol count."]
     pub const HIPRTC_JIT_GLOBAL_SYMBOL_COUNT: hiprtcJIT_option = hiprtcJIT_option(19);
 }
 impl hiprtcJIT_option {
-    #[doc = "< LTO"]
+    #[doc = "< @deprecated  Enable link-time optimization for device code"]
     pub const HIPRTC_JIT_LTO: hiprtcJIT_option = hiprtcJIT_option(20);
 }
 impl hiprtcJIT_option {
-    #[doc = "< FTZ"]
+    #[doc = "< @deprecated  Set single-precision denormals."]
     pub const HIPRTC_JIT_FTZ: hiprtcJIT_option = hiprtcJIT_option(21);
 }
 impl hiprtcJIT_option {
-    #[doc = "< Prec_VIV"]
+    #[doc = "< @deprecated  Set single-precision floating-point division and\n< reciprocals"]
     pub const HIPRTC_JIT_PREC_DIV: hiprtcJIT_option = hiprtcJIT_option(22);
 }
 impl hiprtcJIT_option {
-    #[doc = "< PREC_SQRT"]
+    #[doc = "< @deprecated  Set single-precision floating-point square root"]
     pub const HIPRTC_JIT_PREC_SQRT: hiprtcJIT_option = hiprtcJIT_option(23);
 }
 impl hiprtcJIT_option {
-    #[doc = "< FMA"]
+    #[doc = "< @deprecated  Enable floating-point multiplies and adds/subtracts operations"]
     pub const HIPRTC_JIT_FMA: hiprtcJIT_option = hiprtcJIT_option(24);
 }
 impl hiprtcJIT_option {
@@ -161,11 +161,11 @@ impl hiprtcJIT_option {
     pub const HIPRTC_JIT_NUM_OPTIONS: hiprtcJIT_option = hiprtcJIT_option(25);
 }
 impl hiprtcJIT_option {
-    #[doc = "< AMD only. Linker options to be passed on to"]
+    #[doc = "< Linker options to be passed on to compiler"]
     pub const HIPRTC_JIT_IR_TO_ISA_OPT_EXT: hiprtcJIT_option = hiprtcJIT_option(10000);
 }
 impl hiprtcJIT_option {
-    #[doc = "< AMD only. Count of linker options"]
+    #[doc = "< Count of linker options to be passed on to\n< compiler  @note  Only supported for the AMD platform"]
     pub const HIPRTC_JIT_IR_TO_ISA_OPT_COUNT_EXT: hiprtcJIT_option = hiprtcJIT_option(10001);
 }
 #[repr(transparent)]
@@ -201,7 +201,7 @@ impl hiprtcJITInputType {
     pub const HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES: hiprtcJITInputType = hiprtcJITInputType(6);
 }
 impl hiprtcJITInputType {
-    #[doc = "< LLVM bitcode"]
+    #[doc = "< LLVM bitcode or IR assembly"]
     pub const HIPRTC_JIT_INPUT_LLVM_BITCODE: hiprtcJITInputType = hiprtcJITInputType(100);
 }
 impl hiprtcJITInputType {
diff --git a/ptx/src/emit.rs b/ptx/src/emit.rs
index 1c88cd0..215745f 100644
--- a/ptx/src/emit.rs
+++ b/ptx/src/emit.rs
@@ -1730,12 +1730,12 @@ fn emit_inst_sqrt(
         (ast::ScalarType::F64, ast::RcpSqrtKind::Approx) => {
             (&b"llvm.sqrt.f64\0"[..], FastMathFlags::ApproxFunc)
         }
-        // TODO: Go back to llvm.sqrt.f64 when this commit lands:
-        //       https://github.com/RadeonOpenCompute/llvm-project/commit/e3fd8f83a801b1918508c7c0a71cc31bc95ad4d2
-        //       It's not yet present as of ROCm 5.7.1
-        // TODO: support correct rounding
-        (ast::ScalarType::F32, _) => (&b"__ocml_sqrt_f32\0"[..], FastMathFlags::empty()),
-        (ast::ScalarType::F64, _) => (&b"__ocml_sqrt_f64\0"[..], FastMathFlags::empty()),
+        (ast::ScalarType::F32, _) => {
+            (&b"llvm.sqrt.f32\0"[..], FastMathFlags::empty())
+        },
+        (ast::ScalarType::F64, _) => {
+            (&b"llvm.sqrt.f64\0"[..], FastMathFlags::empty())
+        },
         _ => return Err(TranslateError::unreachable()),
     };
     let sqrt_result = emit_intrinsic_arg2(
diff --git a/ptx/src/test/spirv_run/mod.rs b/ptx/src/test/spirv_run/mod.rs
index 71dbd06..7c8f66b 100644
--- a/ptx/src/test/spirv_run/mod.rs
+++ b/ptx/src/test/spirv_run/mod.rs
@@ -671,7 +671,7 @@ fn run_hip<Input: From<u8> + Copy + Debug, Output: From<u8> + Copy + Debug + Def
         let mut stream = ptr::null_mut();
         hip_call! { hipStreamCreateWithFlags(&mut stream, hipStreamNonBlocking) };
         let mut dev_props = unsafe { mem::zeroed() };
-        hip_call! { hipGetDeviceProperties(&mut dev_props, dev) };
+        hip_call! { hipGetDevicePropertiesR0600(&mut dev_props, dev) };
         let mut module = ptr::null_mut();
         hip_call! { hipModuleLoadData(&mut module, compiled.as_ptr() as _) };
         let mut kernel = ptr::null_mut();
diff --git a/rocblas-sys/README b/rocblas-sys/README
index e6e0567..7fe68fc 100644
--- a/rocblas-sys/README
+++ b/rocblas-sys/README
@@ -1 +1 @@
-bindgen /opt/rocm/include/rocblas/rocblas.h -o src/rocblas.rs --no-layout-tests --size_t-is-usize --default-enum-style=newtype --no-derive-debug --allowlist-function "rocblas_.*" --allowlist-var "ROCBLAS_*" --must-use-type rocblas_status -- -I/opt/rocm/include
\ No newline at end of file
+bindgen $Env:HIP_PATH/include/rocblas/rocblas.h -o src/rocblas.rs --no-layout-tests --default-enum-style=newtype --no-derive-debug --allowlist-function "rocblas_.*" --allowlist-var "ROCBLAS_*" --must-use-type rocblas_status -- -I"$Env:HIP_PATH/include"
\ No newline at end of file
diff --git a/rocblas-sys/src/rocblas.rs b/rocblas-sys/src/rocblas.rs
index 6fba5d2..6b5c009 100644
--- a/rocblas-sys/src/rocblas.rs
+++ b/rocblas-sys/src/rocblas.rs
@@ -1,4 +1,4 @@
-/* automatically generated by rust-bindgen 0.64.0 */
+/* automatically generated by rust-bindgen 0.69.4 */
 
 #[doc = " \\brief Struct to represent a 16 bit Brain floating-point number."]
 #[repr(C)]
@@ -34,6 +34,7 @@ pub struct rocblas_device_malloc_base {
     _unused: [u8; 0],
 }
 pub type rocblas_int = i32;
+#[doc = " \\brief Stride between matrices or vectors in strided_batched functions"]
 pub type rocblas_stride = i64;
 #[doc = " \\brief Structure definition for rocblas_half"]
 #[repr(C)]
@@ -69,7 +70,7 @@ impl rocblas_operation_ {
 #[repr(transparent)]
 #[doc = " \\brief Used to specify whether the matrix is to be transposed or not."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocblas_operation_(pub ::std::os::raw::c_uint);
+pub struct rocblas_operation_(pub ::std::os::raw::c_int);
 #[doc = " \\brief Used to specify whether the matrix is to be transposed or not."]
 pub use self::rocblas_operation_ as rocblas_operation;
 impl rocblas_fill_ {
@@ -86,7 +87,7 @@ impl rocblas_fill_ {
 #[repr(transparent)]
 #[doc = " \\brief Used by the Hermitian, symmetric and triangular matrix\n routines to specify whether the upper, or lower triangle is being referenced."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocblas_fill_(pub ::std::os::raw::c_uint);
+pub struct rocblas_fill_(pub ::std::os::raw::c_int);
 #[doc = " \\brief Used by the Hermitian, symmetric and triangular matrix\n routines to specify whether the upper, or lower triangle is being referenced."]
 pub use self::rocblas_fill_ as rocblas_fill;
 impl rocblas_diagonal_ {
@@ -100,7 +101,7 @@ impl rocblas_diagonal_ {
 #[repr(transparent)]
 #[doc = " \\brief It is used by the triangular matrix routines to specify whether the\n matrix is unit triangular."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocblas_diagonal_(pub ::std::os::raw::c_uint);
+pub struct rocblas_diagonal_(pub ::std::os::raw::c_int);
 #[doc = " \\brief It is used by the triangular matrix routines to specify whether the\n matrix is unit triangular."]
 pub use self::rocblas_diagonal_ as rocblas_diagonal;
 impl rocblas_side_ {
@@ -117,7 +118,7 @@ impl rocblas_side_ {
 #[repr(transparent)]
 #[doc = " \\brief Indicates the side matrix A is located relative to matrix B during multiplication."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocblas_side_(pub ::std::os::raw::c_uint);
+pub struct rocblas_side_(pub ::std::os::raw::c_int);
 #[doc = " \\brief Indicates the side matrix A is located relative to matrix B during multiplication."]
 pub use self::rocblas_side_ as rocblas_side;
 impl rocblas_datatype_ {
@@ -184,6 +185,14 @@ impl rocblas_datatype_ {
     #[doc = "< 16-bit bfloat, complex"]
     pub const rocblas_datatype_bf16_c: rocblas_datatype_ = rocblas_datatype_(169);
 }
+impl rocblas_datatype_ {
+    #[doc = "< 8 bit floating point, real"]
+    pub const rocblas_datatype_f8_r: rocblas_datatype_ = rocblas_datatype_(170);
+}
+impl rocblas_datatype_ {
+    #[doc = "< 8 bit bfloat, real"]
+    pub const rocblas_datatype_bf8_r: rocblas_datatype_ = rocblas_datatype_(171);
+}
 impl rocblas_datatype_ {
     #[doc = "< Invalid datatype value, do not use"]
     pub const rocblas_datatype_invalid: rocblas_datatype_ = rocblas_datatype_(255);
@@ -191,7 +200,7 @@ impl rocblas_datatype_ {
 #[repr(transparent)]
 #[doc = " \\brief Indicates the precision width of data stored in a blas type."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocblas_datatype_(pub ::std::os::raw::c_uint);
+pub struct rocblas_datatype_(pub ::std::os::raw::c_int);
 #[doc = " \\brief Indicates the precision width of data stored in a blas type."]
 pub use self::rocblas_datatype_ as rocblas_datatype;
 impl rocblas_status_ {
@@ -249,10 +258,16 @@ impl rocblas_status_ {
 impl rocblas_status_ {
     pub const rocblas_status_check_numerics_fail: rocblas_status_ = rocblas_status_(13);
 }
+impl rocblas_status_ {
+    pub const rocblas_status_excluded_from_build: rocblas_status_ = rocblas_status_(14);
+}
+impl rocblas_status_ {
+    pub const rocblas_status_arch_mismatch: rocblas_status_ = rocblas_status_(15);
+}
 #[repr(transparent)]
 #[doc = "   @brief rocblas status codes definition"]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocblas_status_(pub ::std::os::raw::c_uint);
+pub struct rocblas_status_(pub ::std::os::raw::c_int);
 #[doc = "   @brief rocblas status codes definition"]
 pub use self::rocblas_status_ as rocblas_status;
 impl rocblas_pointer_mode_ {
@@ -266,7 +281,7 @@ impl rocblas_pointer_mode_ {
 #[repr(transparent)]
 #[doc = " \\brief Indicates if scalar pointers are on host or device. This is used for\n    scalars alpha and beta and for scalar function return values."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocblas_pointer_mode_(pub ::std::os::raw::c_uint);
+pub struct rocblas_pointer_mode_(pub ::std::os::raw::c_int);
 #[doc = " \\brief Indicates if scalar pointers are on host or device. This is used for\n    scalars alpha and beta and for scalar function return values."]
 pub use self::rocblas_pointer_mode_ as rocblas_pointer_mode;
 impl rocblas_atomics_mode_ {
@@ -278,10 +293,10 @@ impl rocblas_atomics_mode_ {
     pub const rocblas_atomics_allowed: rocblas_atomics_mode_ = rocblas_atomics_mode_(1);
 }
 #[repr(transparent)]
-#[doc = " \\brief Indicates if atomics operations are allowed. Not allowing atomic operations\n    may generally improve determinism and repeatability of results at a cost of performance"]
+#[doc = " \\brief Indicates if atomics operations are allowed. Not allowing atomic operations\n    may generally improve determinism and repeatability of results at a cost of performance.\n    Defaults to rocblas_atomics_allowed."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocblas_atomics_mode_(pub ::std::os::raw::c_uint);
-#[doc = " \\brief Indicates if atomics operations are allowed. Not allowing atomic operations\n    may generally improve determinism and repeatability of results at a cost of performance"]
+pub struct rocblas_atomics_mode_(pub ::std::os::raw::c_int);
+#[doc = " \\brief Indicates if atomics operations are allowed. Not allowing atomic operations\n    may generally improve determinism and repeatability of results at a cost of performance.\n    Defaults to rocblas_atomics_allowed."]
 pub use self::rocblas_atomics_mode_ as rocblas_atomics_mode;
 impl rocblas_performance_metric_ {
     #[doc = " \\brief Use Tensile's default performance metric for solution selection"]
@@ -301,56 +316,45 @@ impl rocblas_performance_metric_ {
 #[repr(transparent)]
 #[doc = " \\brief Indicates which performance metric Tensile uses when selecting the optimal\n    solution for gemm problems."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocblas_performance_metric_(pub ::std::os::raw::c_uint);
+pub struct rocblas_performance_metric_(pub ::std::os::raw::c_int);
 #[doc = " \\brief Indicates which performance metric Tensile uses when selecting the optimal\n    solution for gemm problems."]
 pub use self::rocblas_performance_metric_ as rocblas_performance_metric;
 impl rocblas_gemm_algo_ {
     pub const rocblas_gemm_algo_standard: rocblas_gemm_algo_ = rocblas_gemm_algo_(0);
 }
+impl rocblas_gemm_algo_ {
+    pub const rocblas_gemm_algo_solution_index: rocblas_gemm_algo_ = rocblas_gemm_algo_(1);
+}
 #[repr(transparent)]
 #[doc = " \\brief Indicates if layer is active with bitmask"]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocblas_gemm_algo_(pub ::std::os::raw::c_uint);
+pub struct rocblas_gemm_algo_(pub ::std::os::raw::c_int);
 #[doc = " \\brief Indicates if layer is active with bitmask"]
 pub use self::rocblas_gemm_algo_ as rocblas_gemm_algo;
-impl rocblas_gemm_flags_ {
-    #[doc = " \\brief Default empty flags"]
-    pub const rocblas_gemm_flags_none: rocblas_gemm_flags_ = rocblas_gemm_flags_(0);
+impl rocblas_geam_ex_operation_ {
+    pub const rocblas_geam_ex_operation_min_plus: rocblas_geam_ex_operation_ =
+        rocblas_geam_ex_operation_(0);
 }
-impl rocblas_gemm_flags_ {
-    #[doc = " \\brief Before ROCm 4.2, this flags is not implemented and rocblas uses packed-Int8x4 by default.\n After ROCm 4.2, set flag is neccesary if we want packed-Int8x4. Default (0x0) uses unpacked."]
-    pub const rocblas_gemm_flags_pack_int8x4: rocblas_gemm_flags_ = rocblas_gemm_flags_(1);
-}
-impl rocblas_gemm_flags_ {
-    #[doc = " \\brief Select the gemm problem with the highest efficiency per compute unit used. Useful for running multiple smaller problems\n simultaneously. This takes precedence over the performance metric set in rocblas_handle and currently only works for\n gemm_*_ex problems."]
-    pub const rocblas_gemm_flags_use_cu_efficiency: rocblas_gemm_flags_ = rocblas_gemm_flags_(2);
-}
-impl rocblas_gemm_flags_ {
-    #[doc = " \\brief Select an alternate implementation for the MI200 FP16 HPA\n (High Precision Accumulate) GEMM kernel utilizing the BF16 matrix\n instructions with reduced accuracy in cases where computation cannot\n tolerate the FP16 matrix instructions flushing subnormal FP16\n input/output data to zero. See the \"MI200 (gfx90a) Considerations\"\n section for more details."]
-    pub const rocblas_gemm_flags_fp16_alt_impl: rocblas_gemm_flags_ = rocblas_gemm_flags_(4);
+impl rocblas_geam_ex_operation_ {
+    pub const rocblas_geam_ex_operation_plus_min: rocblas_geam_ex_operation_ =
+        rocblas_geam_ex_operation_(1);
 }
 #[repr(transparent)]
-#[doc = " \\brief Control flags passed into gemm algorithms invoked by Tensile Host"]
+#[doc = " \\brief Which mathematical geam-like operation to perform for geam_ex"]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocblas_gemm_flags_(pub ::std::os::raw::c_uint);
-#[doc = " \\brief Control flags passed into gemm algorithms invoked by Tensile Host"]
-pub use self::rocblas_gemm_flags_ as rocblas_gemm_flags;
-impl rocblas_int8_type_for_hipblas_ {
-    pub const rocblas_int8_type_for_hipblas_default: rocblas_int8_type_for_hipblas_ =
-        rocblas_int8_type_for_hipblas_(0);
+pub struct rocblas_geam_ex_operation_(pub ::std::os::raw::c_int);
+#[doc = " \\brief Which mathematical geam-like operation to perform for geam_ex"]
+pub use self::rocblas_geam_ex_operation_ as rocblas_geam_ex_operation;
+impl rocblas_math_mode_ {
+    pub const rocblas_default_math: rocblas_math_mode_ = rocblas_math_mode_(0);
 }
-impl rocblas_int8_type_for_hipblas_ {
-    pub const rocblas_int8_type_for_hipblas_int8: rocblas_int8_type_for_hipblas_ =
-        rocblas_int8_type_for_hipblas_(1);
-}
-impl rocblas_int8_type_for_hipblas_ {
-    pub const rocblas_int8_type_for_hipblas_pack_int8x4: rocblas_int8_type_for_hipblas_ =
-        rocblas_int8_type_for_hipblas_(2);
+impl rocblas_math_mode_ {
+    pub const rocblas_xf32_xdl_math_op: rocblas_math_mode_ = rocblas_math_mode_(1);
 }
 #[repr(transparent)]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocblas_int8_type_for_hipblas_(pub ::std::os::raw::c_uint);
-pub use self::rocblas_int8_type_for_hipblas_ as rocblas_int8_type_for_hipblas;
+pub struct rocblas_math_mode_(pub ::std::os::raw::c_int);
+pub use self::rocblas_math_mode_ as rocblas_math_mode;
 extern "C" {
     #[must_use]
     #[doc = " \\brief Create handle"]
@@ -389,23 +393,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\brief Set rocblas_int8_type_for_hipblas"]
-    pub fn rocblas_set_int8_type_for_hipblas(
-        handle: rocblas_handle,
-        int8_type: rocblas_int8_type_for_hipblas,
-    ) -> rocblas_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\brief Get rocblas_int8_type_for_hipblas"]
-    pub fn rocblas_get_int8_type_for_hipblas(
-        handle: rocblas_handle,
-        int8_type: *mut rocblas_int8_type_for_hipblas,
-    ) -> rocblas_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\brief Set rocblas_atomics_mode"]
+    #[doc = " \\brief Set rocblas_atomics_mode\n  \\details\n  Some rocBLAS functions may have implementations which use atomic operations to increase performance.\n  By using atomic operations, results are not guaranteed to be identical between multiple runs.\n  Results will be accurate with or without atomic operations, but if it is required to\n  have bit-wise reproducible results, atomic operations should not be used.\n\n  Atomic operations can be turned on or off for a handle by calling rocblas_set_atomics_mode.\n  By default, this is set to `rocblas_atomics_allowed`."]
     pub fn rocblas_set_atomics_mode(
         handle: rocblas_handle,
         atomics_mode: rocblas_atomics_mode,
@@ -421,10 +409,18 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\brief Query the preferable supported int8 input layout for gemm\n\\details\nIndicates the supported int8 input layout for gemm according to the device.\nIf the device supports packed-int8x4 (1) only, output flag is rocblas_gemm_flags_pack_int8x4\nand users must bitwise-or your flag with rocblas_gemm_flags_pack_int8x4.\nIf output flag is rocblas_gemm_flags_none (0), then unpacked int8 is preferable and suggested.\n@param[in]\nhandle      [rocblas_handle]\nthe handle of device\n@param[out]\nflag        pointer to rocblas_gemm_flags"]
-    pub fn rocblas_query_int8_layout_flag(
+    #[doc = " \\brief Set rocblas_math_mode"]
+    pub fn rocblas_set_math_mode(
         handle: rocblas_handle,
-        flag: *mut rocblas_gemm_flags,
+        math_mode: rocblas_math_mode,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief Get rocblas_math_mode"]
+    pub fn rocblas_get_math_mode(
+        handle: rocblas_handle,
+        math_mode: *mut rocblas_math_mode,
     ) -> rocblas_status;
 }
 extern "C" {
@@ -569,7 +565,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nscal  scales each element of vector x with scalar alpha:\n\nx := alpha * x\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x.\n@param[in]\nalpha     device pointer or host pointer for the scalar alpha.\n@param[inout]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nscal  scales each element of vector x with scalar alpha:\n\nx := alpha * x\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x.\n@param[in]\nalpha     device pointer or host pointer for the scalar alpha.\n@param[in, out]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n"]
     pub fn rocblas_sscal(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -630,7 +626,67 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nscal_batched  scales each element of vector x_i with scalar alpha, for i = 1, ... , batch_count:\n\nx_i := alpha * x_i,\nwhere (x_i) is the i-th instance of the batch.\n\n@param[in]\nhandle      [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn           [rocblas_int]\nthe number of elements in each x_i.\n@param[in]\nalpha       host pointer or device pointer for the scalar alpha.\n@param[inout]\nx           device array of device pointers storing each vector x_i.\n@param[in]\nincx        [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nbatch_count [rocblas_int]\nspecifies the number of batches in x."]
+    pub fn rocblas_sscal_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f32,
+        x: *mut f32,
+        incx: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dscal_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f64,
+        x: *mut f64,
+        incx: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cscal_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zscal_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csscal_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f32,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdscal_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f64,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nscal_batched  scales each element of vector x_i with scalar alpha, for i = 1, ... , batch_count:\n\nx_i := alpha * x_i,\nwhere (x_i) is the i-th instance of the batch.\n\n@param[in]\nhandle      [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn           [rocblas_int]\nthe number of elements in each x_i.\n@param[in]\nalpha       host pointer or device pointer for the scalar alpha.\n@param[in, out]\nx           device array of device pointers storing each vector x_i.\n@param[in]\nincx        [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nbatch_count [rocblas_int]\nspecifies the number of batches in x.\n"]
     pub fn rocblas_sscal_batched(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -697,7 +753,73 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nscal_strided_batched  scales each element of vector x_i with scalar alpha, for i = 1, ... , batch_count:\n\nx_i := alpha * x_i,\nwhere (x_i) is the i-th instance of the batch.\n\n@param[in]\nhandle      [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn           [rocblas_int]\nthe number of elements in each x_i.\n@param[in]\nalpha       host pointer or device pointer for the scalar alpha.\n@param[inout]\nx           device pointer to the first vector (x_1) in the batch.\n@param[in]\nincx        [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nstride_x    [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\nThere are no restrictions placed on stride_x. However, ensure that stride_x is of appropriate size, for a typical\ncase this means stride_x >= n * incx.\n@param[in]\nbatch_count [rocblas_int]\nspecifies the number of batches in x."]
+    pub fn rocblas_sscal_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f32,
+        x: *const *mut f32,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dscal_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f64,
+        x: *const *mut f64,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cscal_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const *mut rocblas_float_complex,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zscal_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const *mut rocblas_double_complex,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csscal_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f32,
+        x: *const *mut rocblas_float_complex,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdscal_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f64,
+        x: *const *mut rocblas_double_complex,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nscal_strided_batched  scales each element of vector x_i with scalar alpha, for i = 1, ... , batch_count:\n\nx_i := alpha * x_i,\nwhere (x_i) is the i-th instance of the batch.\n\n@param[in]\nhandle      [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn           [rocblas_int]\nthe number of elements in each x_i.\n@param[in]\nalpha       host pointer or device pointer for the scalar alpha.\n@param[in, out]\nx           device pointer to the first vector (x_1) in the batch.\n@param[in]\nincx        [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nstride_x    [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\nThere are no restrictions placed on stride_x. However, ensure that stride_x is of appropriate size, for a typical\ncase this means stride_x >= n * incx.\n@param[in]\nbatch_count [rocblas_int]\nspecifies the number of batches in x.\n"]
     pub fn rocblas_sscal_strided_batched(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -768,6 +890,78 @@ extern "C" {
         batch_count: rocblas_int,
     ) -> rocblas_status;
 }
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sscal_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f32,
+        x: *mut f32,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dscal_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f64,
+        x: *mut f64,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cscal_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zscal_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csscal_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f32,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdscal_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f64,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
 extern "C" {
     #[must_use]
     #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\ncopy  copies each element x[i] into y[i], for  i = 1 , ... , n:\n\ny := x\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x to be copied to y.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[out]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
@@ -813,6 +1007,50 @@ extern "C" {
         incy: rocblas_int,
     ) -> rocblas_status;
 }
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scopy_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f32,
+        incx: i64,
+        y: *mut f32,
+        incy: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dcopy_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f64,
+        incx: i64,
+        y: *mut f64,
+        incy: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ccopy_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zcopy_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+    ) -> rocblas_status;
+}
 extern "C" {
     #[must_use]
     #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\ncopy_batched copies each element x_i[j] into y_i[j], for  j = 1 , ... , n; i = 1 , ... , batch_count:\n\ny_i := x_i,\nwhere (x_i, y_i) is the i-th instance of the batch.\nx_i and y_i are vectors.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i to be copied to y_i.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each vector x_i.\n@param[out]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each vector y_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
@@ -862,6 +1100,54 @@ extern "C" {
         batch_count: rocblas_int,
     ) -> rocblas_status;
 }
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scopy_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const f32,
+        incx: i64,
+        y: *const *mut f32,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dcopy_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const f64,
+        incx: i64,
+        y: *const *mut f64,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ccopy_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        y: *const *mut rocblas_float_complex,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zcopy_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        y: *const *mut rocblas_double_complex,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
 extern "C" {
     #[must_use]
     #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\ncopy_strided_batched copies each element x_i[j] into y_i[j], for  j = 1 , ... , n; i = 1 , ... , batch_count:\n\ny_i := x_i,\nwhere (x_i, y_i) is the i-th instance of the batch.\nx_i and y_i are vectors.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i to be copied to y_i.\n@param[in]\nx         device pointer to the first vector (x_1) in the batch.\n@param[in]\nincx      [rocblas_int]\nspecifies the increments for the elements of vectors x_i.\n@param[in]\nstridex     [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\nThere are no restrictions placed on stride_x. However, the user should\ntake care to ensure that stride_x is of appropriate size. For a typical\ncase, this means stride_x >= n * incx.\n@param[out]\ny         device pointer to the first vector (y_1) in the batch.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of vectors y_i.\n@param[in]\nstridey     [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\nThere are no restrictions placed on stride_y, However, ensure that stride_y is of appropriate size, for a typical\ncase this means stride_y >= n * incy. stridey should be non zero.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
@@ -921,7 +1207,63 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\ndot(u)  performs the dot product of vectors x and y:\n\nresult = x * y;\n\ndotc  performs the dot product of the conjugate of complex vector x and complex vector y.\n\nresult = conjugate (x) * y;\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x and y.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[inout]\nresult\ndevice pointer or host pointer to store the dot product.\nreturn is 0.0 if n <= 0.\n"]
+    pub fn rocblas_scopy_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f32,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut f32,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dcopy_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f64,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut f64,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ccopy_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zcopy_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\ndot(u)  performs the dot product of vectors x and y:\n\nresult = x * y;\n\ndotc  performs the dot product of the conjugate of complex vector x and complex vector y.\n\nresult = conjugate (x) * y;\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x and y.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in, out]\nresult\ndevice pointer or host pointer to store the dot product.\nreturn is 0.0 if n <= 0.\n"]
     pub fn rocblas_sdot(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -1018,7 +1360,103 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\ndot_batched(u) performs a batch of dot products of vectors x and y:\n\nresult_i = x_i * y_i;\n\ndotc_batched  performs a batch of dot products of the conjugate of complex vector x and complex vector y\n\nresult_i = conjugate (x_i) * y_i;\nwhere (x_i, y_i) is the i-th instance of the batch.\nx_i and y_i are vectors, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i and y_i.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[inout]\nresult\ndevice array or host array of batch_count size to store the dot products of each batch.\nreturn 0.0 for each element if n <= 0.\n"]
+    pub fn rocblas_sdot_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f32,
+        incx: i64,
+        y: *const f32,
+        incy: i64,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ddot_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f64,
+        incx: i64,
+        y: *const f64,
+        incy: i64,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hdot_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_half,
+        incx: i64,
+        y: *const rocblas_half,
+        incy: i64,
+        result: *mut rocblas_half,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_bfdot_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_bfloat16,
+        incx: i64,
+        y: *const rocblas_bfloat16,
+        incy: i64,
+        result: *mut rocblas_bfloat16,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdotu_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        y: *const rocblas_float_complex,
+        incy: i64,
+        result: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdotu_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        y: *const rocblas_double_complex,
+        incy: i64,
+        result: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdotc_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        y: *const rocblas_float_complex,
+        incy: i64,
+        result: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdotc_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        y: *const rocblas_double_complex,
+        incy: i64,
+        result: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\ndot_batched(u) performs a batch of dot products of vectors x and y:\n\nresult_i = x_i * y_i;\n\ndotc_batched  performs a batch of dot products of the conjugate of complex vector x and complex vector y\n\nresult_i = conjugate (x_i) * y_i;\nwhere (x_i, y_i) is the i-th instance of the batch.\nx_i and y_i are vectors, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i and y_i.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[in, out]\nresult\ndevice array or host array of batch_count size to store the dot products of each batch.\nreturn 0.0 for each element if n <= 0.\n"]
     pub fn rocblas_sdot_batched(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -1123,7 +1561,111 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\ndot_strided_batched(u)  performs a batch of dot products of vectors x and y:\n\nresult_i = x_i * y_i;\n\ndotc_strided_batched  performs a batch of dot products of the conjugate of complex vector x and complex vector y\n\nresult_i = conjugate (x_i) * y_i;\nwhere (x_i, y_i) is the i-th instance of the batch.\nx_i and y_i are vectors, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i and y_i.\n@param[in]\nx         device pointer to the first vector (x_1) in the batch.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstridex     [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[in]\ny         device pointer to the first vector (y_1) in the batch.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nstridey     [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[inout]\nresult\ndevice array or host array of batch_count size to store the dot products of each batch.\nreturn 0.0 for each element if n <= 0.\n"]
+    pub fn rocblas_sdot_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const f32,
+        incx: i64,
+        y: *const *const f32,
+        incy: i64,
+        batch_count: i64,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ddot_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const f64,
+        incx: i64,
+        y: *const *const f64,
+        incy: i64,
+        batch_count: i64,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hdot_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_half,
+        incx: i64,
+        y: *const *const rocblas_half,
+        incy: i64,
+        batch_count: i64,
+        result: *mut rocblas_half,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_bfdot_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_bfloat16,
+        incx: i64,
+        y: *const *const rocblas_bfloat16,
+        incy: i64,
+        batch_count: i64,
+        result: *mut rocblas_bfloat16,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdotu_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        y: *const *const rocblas_float_complex,
+        incy: i64,
+        batch_count: i64,
+        result: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdotu_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        y: *const *const rocblas_double_complex,
+        incy: i64,
+        batch_count: i64,
+        result: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdotc_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        y: *const *const rocblas_float_complex,
+        incy: i64,
+        batch_count: i64,
+        result: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdotc_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        y: *const *const rocblas_double_complex,
+        incy: i64,
+        batch_count: i64,
+        result: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\ndot_strided_batched(u)  performs a batch of dot products of vectors x and y:\n\nresult_i = x_i * y_i;\n\ndotc_strided_batched  performs a batch of dot products of the conjugate of complex vector x and complex vector y\n\nresult_i = conjugate (x_i) * y_i;\nwhere (x_i, y_i) is the i-th instance of the batch.\nx_i and y_i are vectors, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i and y_i.\n@param[in]\nx         device pointer to the first vector (x_1) in the batch.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstridex     [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[in]\ny         device pointer to the first vector (y_1) in the batch.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nstridey     [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[in, out]\nresult\ndevice array or host array of batch_count size to store the dot products of each batch.\nreturn 0.0 for each element if n <= 0.\n"]
     pub fn rocblas_sdot_strided_batched(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -1244,7 +1786,127 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nswap  interchanges vectors x and y:\n\ny := x;\nx := y\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x and y.\n@param[inout]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[inout]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
+    pub fn rocblas_sdot_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f32,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *const f32,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ddot_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f64,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *const f64,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hdot_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_half,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *const rocblas_half,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+        result: *mut rocblas_half,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_bfdot_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_bfloat16,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *const rocblas_bfloat16,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+        result: *mut rocblas_bfloat16,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdotu_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *const rocblas_float_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+        result: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdotu_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *const rocblas_double_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+        result: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdotc_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *const rocblas_float_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+        result: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdotc_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *const rocblas_double_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+        result: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nswap  interchanges vectors x and y:\n\ny := x;\nx := y\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x and y.\n@param[in, out]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in, out]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
     pub fn rocblas_sswap(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -1289,7 +1951,51 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nswap_batched interchanges vectors x_i and y_i, for i = 1 , ... , batch_count:\n\ny_i := x_i;\nx_i := y_i\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i and y_i.\n@param[inout]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[inout]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_sswap_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut f32,
+        incx: i64,
+        y: *mut f32,
+        incy: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dswap_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut f64,
+        incx: i64,
+        y: *mut f64,
+        incy: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cswap_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zswap_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nswap_batched interchanges vectors x_i and y_i, for i = 1 , ... , batch_count:\n\ny_i := x_i;\nx_i := y_i\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i and y_i.\n@param[in, out]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in, out]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_sswap_batched(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -1338,7 +2044,55 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nswap_strided_batched interchanges vectors x_i and y_i, for i = 1 , ... , batch_count:\n\ny_i := x_i;\nx_i := y_i\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i and y_i.\n@param[inout]\nx         device pointer to the first vector x_1.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nstridex   [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\nThere are no restrictions placed on stride_x. However, ensure that stride_x is of appropriate size. For a typical\ncase this means stride_x >= n * incx.\n@param[inout]\ny         device pointer to the first vector y_1.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\nstridey   [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\nThere are no restrictions placed on stride_x. However, ensure that stride_y is of appropriate size. For a typical\ncase this means stride_y >= n * incy. stridey should be non zero.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_sswap_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *mut f32,
+        incx: i64,
+        y: *const *mut f32,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dswap_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *mut f64,
+        incx: i64,
+        y: *const *mut f64,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cswap_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *mut rocblas_float_complex,
+        incx: i64,
+        y: *const *mut rocblas_float_complex,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zswap_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *mut rocblas_double_complex,
+        incx: i64,
+        y: *const *mut rocblas_double_complex,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nswap_strided_batched interchanges vectors x_i and y_i, for i = 1 , ... , batch_count:\n\ny_i := x_i;\nx_i := y_i\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i and y_i.\n@param[in, out]\nx         device pointer to the first vector x_1.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nstridex   [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\nThere are no restrictions placed on stride_x. However, ensure that stride_x is of appropriate size. For a typical\ncase this means stride_x >= n * incx.\n@param[in, out]\ny         device pointer to the first vector y_1.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\nstridey   [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\nThere are no restrictions placed on stride_x. However, ensure that stride_y is of appropriate size. For a typical\ncase this means stride_y >= n * incy. stridey should be non zero.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_sswap_strided_batched(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -1395,7 +2149,75 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\naxpy   computes constant alpha multiplied by vector x, plus vector y:\n\ny := alpha * x + y\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x and y.\n@param[in]\nalpha     device pointer or host pointer to specify the scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[out]\ny         device pointer storing vector y.\n@param[inout]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
+    pub fn rocblas_sswap_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut f32,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut f32,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dswap_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut f64,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut f64,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cswap_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zswap_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\naxpy   computes constant alpha multiplied by vector x, plus vector y:\n\ny := alpha * x + y\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x and y.\n@param[in]\nalpha     device pointer or host pointer to specify the scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[out]\ny         device pointer storing vector y.\n@param[in, out]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
+    pub fn rocblas_haxpy(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_half,
+        x: *const rocblas_half,
+        incx: rocblas_int,
+        y: *mut rocblas_half,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
     pub fn rocblas_saxpy(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -1418,18 +2240,6 @@ extern "C" {
         incy: rocblas_int,
     ) -> rocblas_status;
 }
-extern "C" {
-    #[must_use]
-    pub fn rocblas_haxpy(
-        handle: rocblas_handle,
-        n: rocblas_int,
-        alpha: *const rocblas_half,
-        x: *const rocblas_half,
-        incx: rocblas_int,
-        y: *mut rocblas_half,
-        incy: rocblas_int,
-    ) -> rocblas_status;
-}
 extern "C" {
     #[must_use]
     pub fn rocblas_caxpy(
@@ -1456,7 +2266,67 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\naxpy_batched compute y := alpha * x + y over a set of batched vectors.\n\n@param[in]\nhandle    rocblas_handle\nhandle to the rocblas library context queue.\n@param[in]\nn         rocblas_int\n@param[in]\nalpha     specifies the scalar alpha.\n@param[in]\nx         pointer storing vector x on the GPU.\n@param[in]\nincx      rocblas_int\nspecifies the increment for the elements of x.\n@param[out]\ny         pointer storing vector y on the GPU.\n@param[inout]\nincy      rocblas_int\nspecifies the increment for the elements of y.\n\n@param[in]\nbatch_count rocblas_int\nnumber of instances in the batch.\n"]
+    pub fn rocblas_haxpy_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_half,
+        x: *const rocblas_half,
+        incx: i64,
+        y: *mut rocblas_half,
+        incy: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_saxpy_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f32,
+        x: *const f32,
+        incx: i64,
+        y: *mut f32,
+        incy: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_daxpy_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f64,
+        x: *const f64,
+        incx: i64,
+        y: *mut f64,
+        incy: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_caxpy_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zaxpy_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\naxpy_batched compute y := alpha * x + y over a set of batched vectors.\n\n@param[in]\nhandle    rocblas_handle\nhandle to the rocblas library context queue.\n@param[in]\nn         rocblas_int\n@param[in]\nalpha     specifies the scalar alpha.\n@param[in]\nx         pointer storing vector x on the GPU.\n@param[in]\nincx      rocblas_int\nspecifies the increment for the elements of x.\n@param[out]\ny         pointer storing vector y on the GPU.\n@param[in, out]\nincy      rocblas_int\nspecifies the increment for the elements of y.\n\n@param[in]\nbatch_count rocblas_int\nnumber of instances in the batch.\n"]
     pub fn rocblas_haxpy_batched(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -1522,7 +2392,72 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\naxpy_strided_batched compute y := alpha * x + y over a set of strided batched vectors.\n\n@param[in]\nhandle    rocblas_handle\nhandle to the rocblas library context queue.\n@param[in]\nn         rocblas_int.\n@param[in]\nalpha     specifies the scalar alpha.\n@param[in]\nx         pointer storing vector x on the GPU.\n@param[in]\nincx      rocblas_int\nspecifies the increment for the elements of x.\n@param[in]\nstridex   rocblas_stride\nspecifies the increment between vectors of x.\n@param[out]\ny         pointer storing vector y on the GPU.\n@param[inout]\nincy      rocblas_int\nspecifies the increment for the elements of y.\n@param[in]\nstridey   rocblas_stride\nspecifies the increment between vectors of y.\n\n@param[in]\nbatch_count rocblas_int\nnumber of instances in the batch.\n"]
+    pub fn rocblas_haxpy_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_half,
+        x: *const *const rocblas_half,
+        incx: i64,
+        y: *const *mut rocblas_half,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_saxpy_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f32,
+        x: *const *const f32,
+        incx: i64,
+        y: *const *mut f32,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_daxpy_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f64,
+        x: *const *const f64,
+        incx: i64,
+        y: *const *mut f64,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_caxpy_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        y: *const *mut rocblas_float_complex,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zaxpy_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        y: *const *mut rocblas_double_complex,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\naxpy_strided_batched compute y := alpha * x + y over a set of strided batched vectors.\n\n@param[in]\nhandle    rocblas_handle\nhandle to the rocblas library context queue.\n@param[in]\nn         rocblas_int.\n@param[in]\nalpha     specifies the scalar alpha.\n@param[in]\nx         pointer storing vector x on the GPU.\n@param[in]\nincx      rocblas_int\nspecifies the increment for the elements of x.\n@param[in]\nstridex   rocblas_stride\nspecifies the increment between vectors of x.\n@param[out]\ny         pointer storing vector y on the GPU.\n@param[in, out]\nincy      rocblas_int\nspecifies the increment for the elements of y.\n@param[in]\nstridey   rocblas_stride\nspecifies the increment between vectors of y.\n\n@param[in]\nbatch_count rocblas_int\nnumber of instances in the batch.\n"]
     pub fn rocblas_haxpy_strided_batched(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -1598,7 +2533,82 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nasum computes the sum of the magnitudes of elements of a real vector x,\nor the sum of magnitudes of the real and imaginary parts of elements if x is a complex vector.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x and y.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x. incx must be > 0.\n@param[inout]\nresult\ndevice pointer or host pointer to store the asum product.\nreturn is 0.0 if n <= 0.\n"]
+    pub fn rocblas_haxpy_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_half,
+        x: *const rocblas_half,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut rocblas_half,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_saxpy_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f32,
+        x: *const f32,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut f32,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_daxpy_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f64,
+        x: *const f64,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut f64,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_caxpy_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zaxpy_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nasum computes the sum of the magnitudes of elements of a real vector x,\nor the sum of magnitudes of the real and imaginary parts of elements if x is a complex vector.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x and y.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x. incx must be > 0.\n@param[in, out]\nresult\ndevice pointer or host pointer to store the asum product.\nreturn is 0.0 if n <= 0.\n"]
     pub fn rocblas_sasum(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -1639,7 +2649,47 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nasum_batched computes the sum of the magnitudes of the elements in a batch of real vectors x_i,\nor the sum of magnitudes of the real and imaginary parts of elements if x_i is a complex\nvector, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nnumber of elements in each vector x_i.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i. incx must be > 0.\n@param[out]\nresults\ndevice array or host array of batch_count size for results.\nreturn is 0.0 if n, incx<=0.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    pub fn rocblas_sasum_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f32,
+        incx: i64,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dasum_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f64,
+        incx: i64,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scasum_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dzasum_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nasum_batched computes the sum of the magnitudes of the elements in a batch of real vectors x_i,\nor the sum of magnitudes of the real and imaginary parts of elements if x_i is a complex\nvector, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nnumber of elements in each vector x_i.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i. incx must be > 0.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[out]\nresults\ndevice array or host array of batch_count size for results.\nreturn is 0.0 if n, incx<=0.\n"]
     pub fn rocblas_sasum_batched(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -1684,7 +2734,51 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nasum_strided_batched computes the sum of the magnitudes of elements of a real vectors x_i,\nor the sum of magnitudes of the real and imaginary parts of elements if x_i is a complex\nvector, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nnumber of elements in each vector x_i.\n@param[in]\nx         device pointer to the first vector x_1.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i. incx must be > 0.\n@param[in]\nstridex   [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\nThere are no restrictions placed on stride_x. However, ensure that stride_x is of appropriate size. For a typical\ncase this means stride_x >= n * incx.\n@param[out]\nresults\ndevice pointer or host pointer to array for storing contiguous batch_count results.\nreturn is 0.0 if n, incx<=0.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    pub fn rocblas_sasum_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const f32,
+        incx: i64,
+        batch_count: i64,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dasum_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const f64,
+        incx: i64,
+        batch_count: i64,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scasum_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        batch_count: i64,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dzasum_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        batch_count: i64,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nasum_strided_batched computes the sum of the magnitudes of elements of a real vectors x_i,\nor the sum of magnitudes of the real and imaginary parts of elements if x_i is a complex\nvector, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nnumber of elements in each vector x_i.\n@param[in]\nx         device pointer to the first vector x_1.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i. incx must be > 0.\n@param[in]\nstridex   [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\nThere are no restrictions placed on stride_x. However, ensure that stride_x is of appropriate size. For a typical\ncase this means stride_x >= n * incx.\n@param[out]\nresults\ndevice pointer or host pointer to array for storing contiguous batch_count results.\nreturn is 0.0 if n, incx<=0.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_sasum_strided_batched(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -1733,7 +2827,55 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nnrm2 computes the euclidean norm of a real or complex vector:\n\nresult := sqrt( x'*x ) for real vectors\nresult := sqrt( x**H*x ) for complex vectors\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[inout]\nresult\ndevice pointer or host pointer to store the nrm2 product.\nreturn is 0.0 if n, incx<=0."]
+    pub fn rocblas_sasum_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f32,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dasum_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f64,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scasum_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dzasum_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nnrm2 computes the euclidean norm of a real or complex vector:\n\nresult := sqrt( x'*x ) for real vectors\nresult := sqrt( x**H*x ) for complex vectors\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in, out]\nresult\ndevice pointer or host pointer to store the nrm2 product.\nreturn is 0.0 if n, incx<=0.\n"]
     pub fn rocblas_snrm2(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -1772,6 +2914,46 @@ extern "C" {
         result: *mut f64,
     ) -> rocblas_status;
 }
+extern "C" {
+    #[must_use]
+    pub fn rocblas_snrm2_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f32,
+        incx: i64,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dnrm2_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f64,
+        incx: i64,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scnrm2_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dznrm2_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
 extern "C" {
     #[must_use]
     #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nnrm2_batched computes the euclidean norm over a batch of real or complex vectors:\n\nresult := sqrt( x_i'*x_i ) for real vectors x, for i = 1, ..., batch_count\nresult := sqrt( x_i**H*x_i ) for complex vectors x, for i = 1, ..., batch_count\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nnumber of elements in each x_i.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i. incx must be > 0.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[out]\nresults\ndevice pointer or host pointer to array of batch_count size for nrm2 results.\nreturn is 0.0 for each element if n <= 0, incx<=0.\n"]
@@ -1817,6 +2999,50 @@ extern "C" {
         results: *mut f64,
     ) -> rocblas_status;
 }
+extern "C" {
+    #[must_use]
+    pub fn rocblas_snrm2_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const f32,
+        incx: i64,
+        batch_count: i64,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dnrm2_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const f64,
+        incx: i64,
+        batch_count: i64,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scnrm2_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        batch_count: i64,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dznrm2_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        batch_count: i64,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
 extern "C" {
     #[must_use]
     #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nnrm2_strided_batched computes the euclidean norm over a batch of real or complex vectors:\n\nresult := sqrt( x_i'*x_i ) for real vectors x, for i = 1, ..., batch_count\nresult := sqrt( x_i**H*x_i ) for complex vectors, for i = 1, ..., batch_count\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nnumber of elements in each x_i.\n@param[in]\nx         device pointer to the first vector x_1.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i. incx must be > 0.\n@param[in]\nstridex   [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\nThere are no restrictions placed on stride_x. However, ensure that stride_x is of appropriate size. For a typical\ncase this means stride_x >= n * incx.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[out]\nresults\ndevice pointer or host pointer to array for storing contiguous batch_count results.\nreturn is 0.0 for each element if n <= 0, incx<=0.\n"]
@@ -1868,7 +3094,55 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\namax finds the first index of the element of maximum magnitude of a vector x.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[inout]\nresult\ndevice pointer or host pointer to store the amax index.\nreturn is 0.0 if n, incx<=0."]
+    pub fn rocblas_snrm2_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f32,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dnrm2_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f64,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scnrm2_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dznrm2_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\namax finds the first index of the element of maximum magnitude of a vector x.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in, out]\nresult\ndevice pointer or host pointer to store the amax index.\nreturn is 0.0 if n, incx<=0.\n"]
     pub fn rocblas_isamax(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -1909,7 +3183,47 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\namax_batched finds the first index of the element of maximum magnitude of each vector x_i in a batch, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nnumber of elements in each vector x_i.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i. incx must be > 0.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch. Must be > 0.\n@param[out]\nresult\ndevice or host array of pointers of batch_count size for results.\nreturn is 0 if n, incx<=0."]
+    pub fn rocblas_isamax_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f32,
+        incx: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_idamax_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f64,
+        incx: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_icamax_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_izamax_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\namax_batched finds the first index of the element of maximum magnitude of each vector x_i in a batch, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nnumber of elements in each vector x_i.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i. incx must be > 0.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch. Must be > 0.\n@param[out]\nresult\ndevice or host array of pointers of batch_count size for results.\nreturn is 0 if n, incx<=0.\n"]
     pub fn rocblas_isamax_batched(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -1952,6 +3266,50 @@ extern "C" {
         result: *mut rocblas_int,
     ) -> rocblas_status;
 }
+extern "C" {
+    #[must_use]
+    pub fn rocblas_isamax_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const f32,
+        incx: i64,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_idamax_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const f64,
+        incx: i64,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_icamax_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_izamax_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
 extern "C" {
     #[must_use]
     #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\namax_strided_batched finds the first index of the element of maximum magnitude of each vector x_i in a batch, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nnumber of elements in each vector x_i.\n@param[in]\nx         device pointer to the first vector x_1.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i. incx must be > 0.\n@param[in]\nstridex   [rocblas_stride]\nspecifies the pointer increment between one x_i and the next x_(i + 1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[out]\nresult\ndevice or host pointer for storing contiguous batch_count results.\nreturn is 0 if n <= 0, incx<=0.\n"]
@@ -2003,7 +3361,55 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\namin finds the first index of the element of minimum magnitude of a vector x.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[inout]\nresult\ndevice pointer or host pointer to store the amin index.\nreturn is 0.0 if n, incx<=0."]
+    pub fn rocblas_isamax_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f32,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_idamax_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f64,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_icamax_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_izamax_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\namin finds the first index of the element of minimum magnitude of a vector x.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in, out]\nresult\ndevice pointer or host pointer to store the amin index.\nreturn is 0.0 if n, incx<=0.\n"]
     pub fn rocblas_isamin(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -2044,7 +3450,47 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\namin_batched finds the first index of the element of minimum magnitude of each vector x_i in a batch, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nnumber of elements in each vector x_i.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i. incx must be > 0.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch. Must be > 0.\n@param[out]\nresult\ndevice or host pointers to array of batch_count size for results.\nreturn is 0 if n, incx<=0."]
+    pub fn rocblas_isamin_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f32,
+        incx: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_idamin_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f64,
+        incx: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_icamin_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_izamin_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\namin_batched finds the first index of the element of minimum magnitude of each vector x_i in a batch, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nnumber of elements in each vector x_i.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i. incx must be > 0.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch. Must be > 0.\n@param[out]\nresult\ndevice or host pointers to array of batch_count size for results.\nreturn is 0 if n, incx<=0.\n"]
     pub fn rocblas_isamin_batched(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -2087,6 +3533,50 @@ extern "C" {
         result: *mut rocblas_int,
     ) -> rocblas_status;
 }
+extern "C" {
+    #[must_use]
+    pub fn rocblas_isamin_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const f32,
+        incx: i64,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_idamin_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const f64,
+        incx: i64,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_icamin_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_izamin_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
 extern "C" {
     #[must_use]
     #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\namin_strided_batched finds the first index of the element of minimum magnitude of each vector x_i in a batch, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nnumber of elements in each vector x_i.\n@param[in]\nx         device pointer to the first vector x_1.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i. incx must be > 0.\n@param[in]\nstridex   [rocblas_stride]\nspecifies the pointer increment between one x_i and the next x_(i + 1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[out]\nresult\ndevice or host pointer to array for storing contiguous batch_count results.\nreturn is 0 if n <= 0, incx<=0.\n"]
@@ -2138,7 +3628,55 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrot applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to vectors x and y.\nScalars c and s may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn       [rocblas_int]\nnumber of elements in the x and y vectors.\n@param[inout]\nx       device pointer storing vector x.\n@param[in]\nincx    [rocblas_int]\nspecifies the increment between elements of x.\n@param[inout]\ny       device pointer storing vector y.\n@param[in]\nincy    [rocblas_int]\nspecifies the increment between elements of y.\n@param[in]\nc       device pointer or host pointer storing scalar cosine component of the rotation matrix.\n@param[in]\ns       device pointer or host pointer storing scalar sine component of the rotation matrix.\n"]
+    pub fn rocblas_isamin_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f32,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_idamin_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f64,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_icamin_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_izamin_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrot applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to vectors x and y.\nScalars c and s may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn       [rocblas_int]\nnumber of elements in the x and y vectors.\n@param[in, out]\nx       device pointer storing vector x.\n@param[in]\nincx    [rocblas_int]\nspecifies the increment between elements of x.\n@param[in, out]\ny       device pointer storing vector y.\n@param[in]\nincy    [rocblas_int]\nspecifies the increment between elements of y.\n@param[in]\nc       device pointer or host pointer storing scalar cosine component of the rotation matrix.\n@param[in]\ns       device pointer or host pointer storing scalar sine component of the rotation matrix.\n"]
     pub fn rocblas_srot(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -2217,7 +3755,85 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrot_batched applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to batched vectors x_i and y_i, for i = 1, ..., batch_count.\nScalars c and s may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn       [rocblas_int]\nnumber of elements in each x_i and y_i vectors.\n@param[inout]\nx       device array of deivce pointers storing each vector x_i.\n@param[in]\nincx    [rocblas_int]\nspecifies the increment between elements of each x_i.\n@param[inout]\ny       device array of device pointers storing each vector y_i.\n@param[in]\nincy    [rocblas_int]\nspecifies the increment between elements of each y_i.\n@param[in]\nc       device pointer or host pointer to scalar cosine component of the rotation matrix.\n@param[in]\ns       device pointer or host pointer to scalar sine component of the rotation matrix.\n@param[in]\nbatch_count [rocblas_int]\nthe number of x and y arrays, i.e. the number of batches.\n"]
+    pub fn rocblas_srot_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut f32,
+        incx: i64,
+        y: *mut f32,
+        incy: i64,
+        c: *const f32,
+        s: *const f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drot_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut f64,
+        incx: i64,
+        y: *mut f64,
+        incy: i64,
+        c: *const f64,
+        s: *const f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_crot_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+        c: *const f32,
+        s: *const rocblas_float_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csrot_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+        c: *const f32,
+        s: *const f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zrot_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+        c: *const f64,
+        s: *const rocblas_double_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdrot_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+        c: *const f64,
+        s: *const f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrot_batched applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to batched vectors x_i and y_i, for i = 1, ..., batch_count.\nScalars c and s may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn       [rocblas_int]\nnumber of elements in each x_i and y_i vectors.\n@param[in, out]\nx       device array of deivce pointers storing each vector x_i.\n@param[in]\nincx    [rocblas_int]\nspecifies the increment between elements of each x_i.\n@param[in, out]\ny       device array of device pointers storing each vector y_i.\n@param[in]\nincy    [rocblas_int]\nspecifies the increment between elements of each y_i.\n@param[in]\nc       device pointer or host pointer to scalar cosine component of the rotation matrix.\n@param[in]\ns       device pointer or host pointer to scalar sine component of the rotation matrix.\n@param[in]\nbatch_count [rocblas_int]\nthe number of x and y arrays, i.e. the number of batches.\n"]
     pub fn rocblas_srot_batched(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -2302,7 +3918,91 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrot_strided_batched applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to strided batched vectors x_i and y_i, for i = 1, ..., batch_count.\nScalars c and s may be stored in either host or device memory, location is specified by calling rocblas_set_pointer_mode.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn       [rocblas_int]\nnumber of elements in each x_i and y_i vectors.\n@param[inout]\nx       device pointer to the first vector x_1.\n@param[in]\nincx    [rocblas_int]\nspecifies the increment between elements of each x_i.\n@param[in]\nstride_x [rocblas_stride]\nspecifies the increment from the beginning of x_i to the beginning of x_(i+1).\n@param[inout]\ny       device pointer to the first vector y_1.\n@param[in]\nincy    [rocblas_int]\nspecifies the increment between elements of each y_i.\n@param[in]\nstride_y [rocblas_stride]\nspecifies the increment from the beginning of y_i to the beginning of y_(i+1)\n@param[in]\nc       device pointer or host pointer to scalar cosine component of the rotation matrix.\n@param[in]\ns       device pointer or host pointer to scalar sine component of the rotation matrix.\n@param[in]\nbatch_count [rocblas_int]\nthe number of x and y arrays, i.e. the number of batches.\n"]
+    pub fn rocblas_srot_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *mut f32,
+        incx: i64,
+        y: *const *mut f32,
+        incy: i64,
+        c: *const f32,
+        s: *const f32,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drot_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *mut f64,
+        incx: i64,
+        y: *const *mut f64,
+        incy: i64,
+        c: *const f64,
+        s: *const f64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_crot_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *mut rocblas_float_complex,
+        incx: i64,
+        y: *const *mut rocblas_float_complex,
+        incy: i64,
+        c: *const f32,
+        s: *const rocblas_float_complex,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csrot_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *mut rocblas_float_complex,
+        incx: i64,
+        y: *const *mut rocblas_float_complex,
+        incy: i64,
+        c: *const f32,
+        s: *const f32,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zrot_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *mut rocblas_double_complex,
+        incx: i64,
+        y: *const *mut rocblas_double_complex,
+        incy: i64,
+        c: *const f64,
+        s: *const rocblas_double_complex,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdrot_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *mut rocblas_double_complex,
+        incx: i64,
+        y: *const *mut rocblas_double_complex,
+        incy: i64,
+        c: *const f64,
+        s: *const f64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrot_strided_batched applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to strided batched vectors x_i and y_i, for i = 1, ..., batch_count.\nScalars c and s may be stored in either host or device memory, location is specified by calling rocblas_set_pointer_mode.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn       [rocblas_int]\nnumber of elements in each x_i and y_i vectors.\n@param[in, out]\nx       device pointer to the first vector x_1.\n@param[in]\nincx    [rocblas_int]\nspecifies the increment between elements of each x_i.\n@param[in]\nstride_x [rocblas_stride]\nspecifies the increment from the beginning of x_i to the beginning of x_(i+1).\n@param[in, out]\ny       device pointer to the first vector y_1.\n@param[in]\nincy    [rocblas_int]\nspecifies the increment between elements of each y_i.\n@param[in]\nstride_y [rocblas_stride]\nspecifies the increment from the beginning of y_i to the beginning of y_(i+1)\n@param[in]\nc       device pointer or host pointer to scalar cosine component of the rotation matrix.\n@param[in]\ns       device pointer or host pointer to scalar sine component of the rotation matrix.\n@param[in]\nbatch_count [rocblas_int]\nthe number of x and y arrays, i.e. the number of batches.\n"]
     pub fn rocblas_srot_strided_batched(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -2399,7 +4099,103 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrotg creates the Givens rotation matrix for the vector (a b).\nScalars c and s and arrays a and b may be stored in either host or device memory, location is specified by calling rocblas_set_pointer_mode:\n\n- If the pointer mode is set to rocblas_pointer_mode_host, then this function blocks the CPU until the GPU has finished and the results are available in host memory.\n- If the pointer mode is set to rocblas_pointer_mode_device, then this function returns immediately and synchronization is required to read the results.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[inout]\na       device pointer or host pointer to input vector element, overwritten with r.\n@param[inout]\nb       device pointer or host pointer to input vector element, overwritten with z.\n@param[inout]\nc       device pointer or host pointer to cosine element of Givens rotation.\n@param[inout]\ns       device pointer or host pointer sine element of Givens rotation.\n"]
+    pub fn rocblas_srot_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut f32,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *mut f32,
+        incy: i64,
+        stride_y: rocblas_stride,
+        c: *const f32,
+        s: *const f32,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drot_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut f64,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *mut f64,
+        incy: i64,
+        stride_y: rocblas_stride,
+        c: *const f64,
+        s: *const f64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_crot_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+        stride_y: rocblas_stride,
+        c: *const f32,
+        s: *const rocblas_float_complex,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csrot_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+        stride_y: rocblas_stride,
+        c: *const f32,
+        s: *const f32,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zrot_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+        stride_y: rocblas_stride,
+        c: *const f64,
+        s: *const rocblas_double_complex,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdrot_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+        stride_y: rocblas_stride,
+        c: *const f64,
+        s: *const f64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrotg creates the Givens rotation matrix for the vector (a b).\nScalars a, b, c, and s may be stored in either host or device memory, location is specified by\ncalling rocblas_set_pointer_mode. The computation uses the formulas\n\nsigma = sgn(a)    if |a| >  |b|\n= sgn(b)    if |b| >= |a|\nr = sigma*sqrt( a**2 + b**2 )\nc = 1; s = 0      if r = 0\nc = a/r; s = b/r  if r != 0\n\nThe subroutine also computes\n\nz = s    if |a| > |b|,\n= 1/c  if |b| >= |a| and c != 0\n= 1    if c = 0\n\nThis allows c and s to be reconstructed from z as follows:\n\nIf z = 1, set c = 0, s = 1.\nIf |z| < 1, set c = sqrt(1 - z**2) and s = z.\nIf |z| > 1, set c = 1/z and s = sqrt( 1 - c**2).\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in, out]\na       pointer to a, an element in vector (a,b), overwritten with r.\n@param[in, out]\nb       pointer to b, an element in vector (a,b), overwritten with z.\n@param[out]\nc       pointer to c, cosine element of Givens rotation.\n@param[out]\ns       pointer to s, sine element of Givens rotation.\n"]
     pub fn rocblas_srotg(
         handle: rocblas_handle,
         a: *mut f32,
@@ -2440,7 +4236,47 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrotg_batched creates the Givens rotation matrix for the batched vectors (a_i b_i), for i = 1, ..., batch_count.\na, b, c, and s may be stored in either host or device memory, location is specified by calling rocblas_set_pointer_mode:\n\n- If the pointer mode is set to rocblas_pointer_mode_host, then this function blocks the CPU until the GPU has finished and the results are available in host memory.\n- If the pointer mode is set to rocblas_pointer_mode_device, then this function returns immediately and synchronization is required to read the results.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[inout]\na       device array of device pointers storing each single input vector element a_i, overwritten with r_i.\n@param[inout]\nb       device array of device pointers storing each single input vector element b_i, overwritten with z_i.\n@param[inout]\nc       device array of device pointers storing each cosine element of Givens rotation for the batch.\n@param[inout]\ns       device array of device pointers storing each sine element of Givens rotation for the batch.\n@param[in]\nbatch_count [rocblas_int]\nnumber of batches (length of arrays a, b, c, and s).\n"]
+    pub fn rocblas_srotg_64(
+        handle: rocblas_handle,
+        a: *mut f32,
+        b: *mut f32,
+        c: *mut f32,
+        s: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotg_64(
+        handle: rocblas_handle,
+        a: *mut f64,
+        b: *mut f64,
+        c: *mut f64,
+        s: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_crotg_64(
+        handle: rocblas_handle,
+        a: *mut rocblas_float_complex,
+        b: *mut rocblas_float_complex,
+        c: *mut f32,
+        s: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zrotg_64(
+        handle: rocblas_handle,
+        a: *mut rocblas_double_complex,
+        b: *mut rocblas_double_complex,
+        c: *mut f64,
+        s: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrotg_batched creates the Givens rotation matrix for the batched vectors (a_i b_i), for i = 1, ..., batch_count.\na, b, c, and s are host pointers to an array of device pointers on the device, where each device pointer points\nto a scalar value of a_i, b_i, c_i, or s_i.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in, out]\na       a, overwritten with r.\n@param[in, out]\nb       b overwritten with z.\n@param[out]\nc       cosine element of Givens rotation for the batch.\n@param[out]\ns       sine element of Givens rotation for the batch.\n@param[in]\nbatch_count [rocblas_int]\nnumber of batches (length of arrays a, b, c, and s).\n"]
     pub fn rocblas_srotg_batched(
         handle: rocblas_handle,
         a: *const *mut f32,
@@ -2485,7 +4321,51 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrotg_strided_batched creates the Givens rotation matrix for the strided batched vectors (a_i b_i), for i = 1, ..., batch_count.\na, b, c, and s may be stored in either host or device memory, location is specified by calling rocblas_set_pointer_mode:\n\n- If the pointer mode is set to rocblas_pointer_mode_host, then this function blocks the CPU until the GPU has finished and the results are available in host memory.\n- If the pointer mode is set to rocblas_pointer_mode_device, then this function returns immediately and synchronization is required to read the results.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[inout]\na       device strided_batched pointer or host strided_batched pointer to first single input vector element a_1, overwritten with r.\n@param[in]\nstride_a [rocblas_stride]\ndistance between elements of a in batch (distance between a_i and a_(i + 1)).\n@param[inout]\nb       device strided_batched pointer or host strided_batched pointer to first single input vector element b_1, overwritten with z.\n@param[in]\nstride_b [rocblas_stride]\ndistance between elements of b in batch (distance between b_i and b_(i + 1)).\n@param[inout]\nc       device strided_batched pointer or host strided_batched pointer to first cosine element of Givens rotations c_1.\n@param[in]\nstride_c [rocblas_stride]\ndistance between elements of c in batch (distance between c_i and c_(i + 1)).\n@param[inout]\ns       device strided_batched pointer or host strided_batched pointer to sine element of Givens rotations s_1.\n@param[in]\nstride_s [rocblas_stride]\ndistance between elements of s in batch (distance between s_i and s_(i + 1)).\n@param[in]\nbatch_count [rocblas_int]\nnumber of batches (length of arrays a, b, c, and s).\n"]
+    pub fn rocblas_srotg_batched_64(
+        handle: rocblas_handle,
+        a: *const *mut f32,
+        b: *const *mut f32,
+        c: *const *mut f32,
+        s: *const *mut f32,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotg_batched_64(
+        handle: rocblas_handle,
+        a: *const *mut f64,
+        b: *const *mut f64,
+        c: *const *mut f64,
+        s: *const *mut f64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_crotg_batched_64(
+        handle: rocblas_handle,
+        a: *const *mut rocblas_float_complex,
+        b: *const *mut rocblas_float_complex,
+        c: *const *mut f32,
+        s: *const *mut rocblas_float_complex,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zrotg_batched_64(
+        handle: rocblas_handle,
+        a: *const *mut rocblas_double_complex,
+        b: *const *mut rocblas_double_complex,
+        c: *const *mut f64,
+        s: *const *mut rocblas_double_complex,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrotg_strided_batched creates the Givens rotation matrix for the strided batched vectors (a_i b_i), for i = 1, ..., batch_count.\na, b, c, and s are host pointers to arrays a, b, c, s on the device.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in, out]\na       host pointer to first single input vector element a_1 on the device, overwritten with r.\n@param[in]\nstride_a [rocblas_stride]\ndistance between elements of a in batch (distance between a_i and a_(i + 1)).\n@param[in, out]\nb       host pointer to first single input vector element b_1 on the device, overwritten with z.\n@param[in]\nstride_b [rocblas_stride]\ndistance between elements of b in batch (distance between b_i and b_(i + 1)).\n@param[out]\nc       host pointer to first single cosine element of Givens rotations c_1 on the device.\n@param[in]\nstride_c [rocblas_stride]\ndistance between elements of c in batch (distance between c_i and c_(i + 1)).\n@param[out]\ns       host pointer to first single sine element of Givens rotations s_1 on the device.\n@param[in]\nstride_s [rocblas_stride]\ndistance between elements of s in batch (distance between s_i and s_(i + 1)).\n@param[in]\nbatch_count [rocblas_int]\nnumber of batches (length of arrays a, b, c, and s).\n"]
     pub fn rocblas_srotg_strided_batched(
         handle: rocblas_handle,
         a: *mut f32,
@@ -2546,7 +4426,67 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrotm applies the modified Givens rotation matrix defined by param to vectors x and y.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn       [rocblas_int]\nnumber of elements in the x and y vectors.\n@param[inout]\nx       device pointer storing vector x.\n@param[in]\nincx    [rocblas_int]\nspecifies the increment between elements of x.\n@param[inout]\ny       device pointer storing vector y.\n@param[in]\nincy    [rocblas_int]\nspecifies the increment between elements of y.\n@param[in]\nparam   device vector or host vector of 5 elements defining the rotation.\n\nparam[0] = flag\nparam[1] = H11\nparam[2] = H21\nparam[3] = H12\nparam[4] = H22\n\nThe flag parameter defines the form of H:\n\nflag = -1 => H = ( H11 H12 H21 H22 )\nflag =  0 => H = ( 1.0 H12 H21 1.0 )\nflag =  1 => H = ( H11 1.0 -1.0 H22 )\nflag = -2 => H = ( 1.0 0.0 0.0 1.0 )\n\nparam may be stored in either host or device memory,\nlocation is specified by calling rocblas_set_pointer_mode.\n"]
+    pub fn rocblas_srotg_strided_batched_64(
+        handle: rocblas_handle,
+        a: *mut f32,
+        stride_a: rocblas_stride,
+        b: *mut f32,
+        stride_b: rocblas_stride,
+        c: *mut f32,
+        stride_c: rocblas_stride,
+        s: *mut f32,
+        stride_s: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotg_strided_batched_64(
+        handle: rocblas_handle,
+        a: *mut f64,
+        stride_a: rocblas_stride,
+        b: *mut f64,
+        stride_b: rocblas_stride,
+        c: *mut f64,
+        stride_c: rocblas_stride,
+        s: *mut f64,
+        stride_s: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_crotg_strided_batched_64(
+        handle: rocblas_handle,
+        a: *mut rocblas_float_complex,
+        stride_a: rocblas_stride,
+        b: *mut rocblas_float_complex,
+        stride_b: rocblas_stride,
+        c: *mut f32,
+        stride_c: rocblas_stride,
+        s: *mut rocblas_float_complex,
+        stride_s: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zrotg_strided_batched_64(
+        handle: rocblas_handle,
+        a: *mut rocblas_double_complex,
+        stride_a: rocblas_stride,
+        b: *mut rocblas_double_complex,
+        stride_b: rocblas_stride,
+        c: *mut f64,
+        stride_c: rocblas_stride,
+        s: *mut rocblas_double_complex,
+        stride_s: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrotm applies the modified Givens rotation matrix defined by param to vectors x and y.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn       [rocblas_int]\nnumber of elements in the x and y vectors.\n@param[in, out]\nx       device pointer storing vector x.\n@param[in]\nincx    [rocblas_int]\nspecifies the increment between elements of x.\n@param[in, out]\ny       device pointer storing vector y.\n@param[in]\nincy    [rocblas_int]\nspecifies the increment between elements of y.\n@param[in]\nparam   device vector or host vector of 5 elements defining the rotation.\n\nparam[0] = flag\nparam[1] = H11\nparam[2] = H21\nparam[3] = H12\nparam[4] = H22\n\nThe flag parameter defines the form of H:\n\nflag = -1 => H = ( H11 H12 H21 H22 )\nflag =  0 => H = ( 1.0 H12 H21 1.0 )\nflag =  1 => H = ( H11 1.0 -1.0 H22 )\nflag = -2 => H = ( 1.0 0.0 0.0 1.0 )\n\nparam may be stored in either host or device memory,\nlocation is specified by calling rocblas_set_pointer_mode.\n"]
     pub fn rocblas_srotm(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -2571,7 +4511,31 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrotm_batched applies the modified Givens rotation matrix defined by param_i to batched vectors x_i and y_i, for i = 1, ..., batch_count.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn       [rocblas_int]\nnumber of elements in the x and y vectors.\n@param[inout]\nx       device array of device pointers storing each vector x_i.\n@param[in]\nincx    [rocblas_int]\nspecifies the increment between elements of each x_i.\n@param[inout]\ny       device array of device pointers storing each vector y_1.\n@param[in]\nincy    [rocblas_int]\nspecifies the increment between elements of each y_i.\n@param[in]\nparam   device array of device vectors of 5 elements defining the rotation.\n\nparam[0] = flag\nparam[1] = H11\nparam[2] = H21\nparam[3] = H12\nparam[4] = H22\n\nThe flag parameter defines the form of H:\n\nflag = -1 => H = ( H11 H12 H21 H22 )\nflag =  0 => H = ( 1.0 H12 H21 1.0 )\nflag =  1 => H = ( H11 1.0 -1.0 H22 )\nflag = -2 => H = ( 1.0 0.0 0.0 1.0 )\n\nparam may ONLY be stored on the device for the batched version of this function.\n\n@param[in]\nbatch_count [rocblas_int]\nthe number of x and y arrays, i.e. the number of batches.\n"]
+    pub fn rocblas_srotm_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut f32,
+        incx: i64,
+        y: *mut f32,
+        incy: i64,
+        param: *const f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotm_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut f64,
+        incx: i64,
+        y: *mut f64,
+        incy: i64,
+        param: *const f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrotm_batched applies the modified Givens rotation matrix defined by param_i to batched vectors x_i and y_i, for i = 1, ..., batch_count.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn       [rocblas_int]\nnumber of elements in the x and y vectors.\n@param[in, out]\nx       device array of device pointers storing each vector x_i.\n@param[in]\nincx    [rocblas_int]\nspecifies the increment between elements of each x_i.\n@param[in, out]\ny       device array of device pointers storing each vector y_1.\n@param[in]\nincy    [rocblas_int]\nspecifies the increment between elements of each y_i.\n@param[in]\nparam   device array of device vectors of 5 elements defining the rotation.\n\nparam[0] = flag\nparam[1] = H11\nparam[2] = H21\nparam[3] = H12\nparam[4] = H22\n\nThe flag parameter defines the form of H:\n\nflag = -1 => H = ( H11 H12 H21 H22 )\nflag =  0 => H = ( 1.0 H12 H21 1.0 )\nflag =  1 => H = ( H11 1.0 -1.0 H22 )\nflag = -2 => H = ( 1.0 0.0 0.0 1.0 )\n\nparam may ONLY be stored on the device for the batched version of this function.\n\n@param[in]\nbatch_count [rocblas_int]\nthe number of x and y arrays, i.e. the number of batches.\n"]
     pub fn rocblas_srotm_batched(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -2598,7 +4562,33 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrotm_strided_batched applies the modified Givens rotation matrix defined by param_i to strided batched vectors x_i and y_i, for i = 1, ..., batch_count\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn       [rocblas_int]\nnumber of elements in the x and y vectors.\n@param[inout]\nx       device pointer pointing to first strided batched vector x_1.\n@param[in]\nincx    [rocblas_int]\nspecifies the increment between elements of each x_i.\n@param[in]\nstride_x [rocblas_stride]\nspecifies the increment between the beginning of x_i and x_(i + 1)\n@param[inout]\ny       device pointer pointing to first strided batched vector y_1.\n@param[in]\nincy    [rocblas_int]\nspecifies the increment between elements of each y_i.\n@param[in]\nstride_y [rocblas_stride]\nspecifies the increment between the beginning of y_i and y_(i + 1).\n@param[in]\nparam   device pointer pointing to first array of 5 elements defining the rotation (param_1).\n\nparam[0] = flag\nparam[1] = H11\nparam[2] = H21\nparam[3] = H12\nparam[4] = H22\n\nThe flag parameter defines the form of H:\n\nflag = -1 => H = ( H11 H12 H21 H22 )\nflag =  0 => H = ( 1.0 H12 H21 1.0 )\nflag =  1 => H = ( H11 1.0 -1.0 H22 )\nflag = -2 => H = ( 1.0 0.0 0.0 1.0 )\n\nparam may ONLY be stored on the device for the strided_batched\nversion of this function.\n\n@param[in]\nstride_param [rocblas_stride]\nspecifies the increment between the beginning of param_i and param_(i + 1).\n@param[in]\nbatch_count [rocblas_int]\nthe number of x and y arrays, i.e. the number of batches.\n"]
+    pub fn rocblas_srotm_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *mut f32,
+        incx: i64,
+        y: *const *mut f32,
+        incy: i64,
+        param: *const *const f32,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotm_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *mut f64,
+        incx: i64,
+        y: *const *mut f64,
+        incy: i64,
+        param: *const *const f64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrotm_strided_batched applies the modified Givens rotation matrix defined by param_i to strided batched vectors x_i and y_i, for i = 1, ..., batch_count\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn       [rocblas_int]\nnumber of elements in the x and y vectors.\n@param[in, out]\nx       device pointer pointing to first strided batched vector x_1.\n@param[in]\nincx    [rocblas_int]\nspecifies the increment between elements of each x_i.\n@param[in]\nstride_x [rocblas_stride]\nspecifies the increment between the beginning of x_i and x_(i + 1)\n@param[in, out]\ny       device pointer pointing to first strided batched vector y_1.\n@param[in]\nincy    [rocblas_int]\nspecifies the increment between elements of each y_i.\n@param[in]\nstride_y [rocblas_stride]\nspecifies the increment between the beginning of y_i and y_(i + 1).\n@param[in]\nparam   device pointer pointing to first array of 5 elements defining the rotation (param_1).\n\nparam[0] = flag\nparam[1] = H11\nparam[2] = H21\nparam[3] = H12\nparam[4] = H22\n\nThe flag parameter defines the form of H:\n\nflag = -1 => H = ( H11 H12 H21 H22 )\nflag =  0 => H = ( 1.0 H12 H21 1.0 )\nflag =  1 => H = ( H11 1.0 -1.0 H22 )\nflag = -2 => H = ( 1.0 0.0 0.0 1.0 )\n\nparam may ONLY be stored on the device for the strided_batched\nversion of this function.\n\n@param[in]\nstride_param [rocblas_stride]\nspecifies the increment between the beginning of param_i and param_(i + 1).\n@param[in]\nbatch_count [rocblas_int]\nthe number of x and y arrays, i.e. the number of batches.\n"]
     pub fn rocblas_srotm_strided_batched(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -2631,7 +4621,39 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrotmg creates the modified Givens rotation matrix for the vector (d1 * x1, d2 * y1).\nParameters may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode:\n\n- If the pointer mode is set to rocblas_pointer_mode_host, then this function blocks the CPU until the GPU has finished and the results are available in host memory.\n- If the pointer mode is set to rocblas_pointer_mode_device, then this function returns immediately and synchronization is required to read the results.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[inout]\nd1      device pointer or host pointer to input scalar that is overwritten.\n@param[inout]\nd2      device pointer or host pointer to input scalar that is overwritten.\n@param[inout]\nx1      device pointer or host pointer to input scalar that is overwritten.\n@param[in]\ny1      device pointer or host pointer to input scalar.\n@param[out]\nparam   device vector or host vector of five elements defining the rotation.\n\nparam[0] = flag\nparam[1] = H11\nparam[2] = H21\nparam[3] = H12\nparam[4] = H22\n\nThe flag parameter defines the form of H:\n\nflag = -1 => H = ( H11 H12 H21 H22 )\nflag =  0 => H = ( 1.0 H12 H21 1.0 )\nflag =  1 => H = ( H11 1.0 -1.0 H22 )\nflag = -2 => H = ( 1.0 0.0 0.0 1.0 )\n\nparam may be stored in either host or device memory.\nLocation is specified by calling rocblas_set_pointer_mode.\n"]
+    pub fn rocblas_srotm_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut f32,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *mut f32,
+        incy: i64,
+        stride_y: rocblas_stride,
+        param: *const f32,
+        stride_param: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotm_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut f64,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *mut f64,
+        incy: i64,
+        stride_y: rocblas_stride,
+        param: *const f64,
+        stride_param: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrotmg creates the modified Givens rotation matrix for the vector (d1 * x1, d2 * y1).\nParameters may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode:\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in, out]\nd1      device pointer or host pointer to input scalar that is overwritten.\n@param[in, out]\nd2      device pointer or host pointer to input scalar that is overwritten.\n@param[in, out]\nx1      device pointer or host pointer to input scalar that is overwritten.\n@param[in]\ny1      device pointer or host pointer to input scalar.\n@param[out]\nparam   device vector or host vector of five elements defining the rotation.\n\nparam[0] = flag\nparam[1] = H11\nparam[2] = H21\nparam[3] = H12\nparam[4] = H22\n\nThe flag parameter defines the form of H:\n\nflag = -1 => H = ( H11 H12 H21 H22 )\nflag =  0 => H = ( 1.0 H12 H21 1.0 )\nflag =  1 => H = ( H11 1.0 -1.0 H22 )\nflag = -2 => H = ( 1.0 0.0 0.0 1.0 )\n\nparam may be stored in either host or device memory.\nLocation is specified by calling rocblas_set_pointer_mode.\n"]
     pub fn rocblas_srotmg(
         handle: rocblas_handle,
         d1: *mut f32,
@@ -2654,7 +4676,29 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrotmg_batched creates the modified Givens rotation matrix for the batched vectors (d1_i * x1_i, d2_i * y1_i), for i = 1, ..., batch_count.\nParameters may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode:\n\n- If the pointer mode is set to rocblas_pointer_mode_host, then this function blocks the CPU until the GPU has finished and the results are available in host memory.\n- If the pointer mode is set to rocblas_pointer_mode_device, then this function returns immediately and synchronization is required to read the results.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[inout]\nd1      device batched array or host batched array of input scalars that is overwritten.\n@param[inout]\nd2      device batched array or host batched array of input scalars that is overwritten.\n@param[inout]\nx1      device batched array or host batched array of input scalars that is overwritten.\n@param[in]\ny1      device batched array or host batched array of input scalars.\n@param[out]\nparam   device batched array or host batched array of vectors of 5 elements defining the rotation.\n\nparam[0] = flag\nparam[1] = H11\nparam[2] = H21\nparam[3] = H12\nparam[4] = H22\n\nThe flag parameter defines the form of H:\n\nflag = -1 => H = ( H11 H12 H21 H22 )\nflag =  0 => H = ( 1.0 H12 H21 1.0 )\nflag =  1 => H = ( H11 1.0 -1.0 H22 )\nflag = -2 => H = ( 1.0 0.0 0.0 1.0 )\n\nparam may be stored in either host or device memory.\nLocation is specified by calling rocblas_set_pointer_mode.\n\n@param[in]\nbatch_count [rocblas_int]\nthe number of instances in the batch.\n"]
+    pub fn rocblas_srotmg_64(
+        handle: rocblas_handle,
+        d1: *mut f32,
+        d2: *mut f32,
+        x1: *mut f32,
+        y1: *const f32,
+        param: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotmg_64(
+        handle: rocblas_handle,
+        d1: *mut f64,
+        d2: *mut f64,
+        x1: *mut f64,
+        y1: *const f64,
+        param: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrotmg_batched creates the modified Givens rotation matrix for the batched vectors (d1_i * x1_i, d2_i * y1_i), for i = 1, ..., batch_count.\nParameters may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode:\n\n- If the pointer mode is set to rocblas_pointer_mode_host, then this function blocks the CPU until the GPU has finished and the results are available in host memory.\n- If the pointer mode is set to rocblas_pointer_mode_device, then this function returns immediately and synchronization is required to read the results.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in, out]\nd1      device batched array or host batched array of input scalars that is overwritten.\n@param[in, out]\nd2      device batched array or host batched array of input scalars that is overwritten.\n@param[in, out]\nx1      device batched array or host batched array of input scalars that is overwritten.\n@param[in]\ny1      device batched array or host batched array of input scalars.\n@param[out]\nparam   device batched array or host batched array of vectors of 5 elements defining the rotation.\n\nparam[0] = flag\nparam[1] = H11\nparam[2] = H21\nparam[3] = H12\nparam[4] = H22\n\nThe flag parameter defines the form of H:\n\nflag = -1 => H = ( H11 H12 H21 H22 )\nflag =  0 => H = ( 1.0 H12 H21 1.0 )\nflag =  1 => H = ( H11 1.0 -1.0 H22 )\nflag = -2 => H = ( 1.0 0.0 0.0 1.0 )\n\nparam may be stored in either host or device memory.\nLocation is specified by calling rocblas_set_pointer_mode.\n\n@param[in]\nbatch_count [rocblas_int]\nthe number of instances in the batch.\n"]
     pub fn rocblas_srotmg_batched(
         handle: rocblas_handle,
         d1: *const *mut f32,
@@ -2679,7 +4723,31 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrotmg_strided_batched creates the modified Givens rotation matrix for the strided batched vectors (d1_i * x1_i, d2_i * y1_i), for i = 1, ..., batch_count.\nParameters may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode:\n\n- If the pointer mode is set to rocblas_pointer_mode_host, then this function blocks the CPU until the GPU has finished and the results are available in host memory.\n- If the pointer mode is set to rocblas_pointer_mode_device, then this function returns immediately and synchronization is required to read the results.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[inout]\nd1      device strided_batched array or host strided_batched array of input scalars that is overwritten.\n@param[in]\nstride_d1 [rocblas_stride]\nspecifies the increment between the beginning of d1_i and d1_(i+1).\n@param[inout]\nd2      device strided_batched array or host strided_batched array of input scalars that is overwritten.\n@param[in]\nstride_d2 [rocblas_stride]\nspecifies the increment between the beginning of d2_i and d2_(i+1).\n@param[inout]\nx1      device strided_batched array or host strided_batched array of input scalars that is overwritten.\n@param[in]\nstride_x1 [rocblas_stride]\nspecifies the increment between the beginning of x1_i and x1_(i+1).\n@param[in]\ny1      device strided_batched array or host strided_batched array of input scalars.\n@param[in]\nstride_y1 [rocblas_stride]\nspecifies the increment between the beginning of y1_i and y1_(i+1).\n@param[out]\nparam   device strided_batched array or host strided_batched array of vectors of 5 elements defining the rotation.\n\nparam[0] = flag\nparam[1] = H11\nparam[2] = H21\nparam[3] = H12\nparam[4] = H22\nThe flag parameter defines the form of H:\n\nflag = -1 => H = ( H11 H12 H21 H22 )\nflag =  0 => H = ( 1.0 H12 H21 1.0 )\nflag =  1 => H = ( H11 1.0 -1.0 H22 )\nflag = -2 => H = ( 1.0 0.0 0.0 1.0 )\n\nparam may be stored in either host or device memory.\nLocation is specified by calling rocblas_set_pointer_mode.\n\n@param[in]\nstride_param [rocblas_stride]\nspecifies the increment between the beginning of param_i and param_(i + 1).\n@param[in]\nbatch_count [rocblas_int]\nthe number of instances in the batch.\n"]
+    pub fn rocblas_srotmg_batched_64(
+        handle: rocblas_handle,
+        d1: *const *mut f32,
+        d2: *const *mut f32,
+        x1: *const *mut f32,
+        y1: *const *const f32,
+        param: *const *mut f32,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotmg_batched_64(
+        handle: rocblas_handle,
+        d1: *const *mut f64,
+        d2: *const *mut f64,
+        x1: *const *mut f64,
+        y1: *const *const f64,
+        param: *const *mut f64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrotmg_strided_batched creates the modified Givens rotation matrix for the strided batched vectors (d1_i * x1_i, d2_i * y1_i), for i = 1, ..., batch_count.\nParameters may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode:\n\n- If the pointer mode is set to rocblas_pointer_mode_host, then this function blocks the CPU until the GPU has finished and the results are available in host memory.\n- If the pointer mode is set to rocblas_pointer_mode_device, then this function returns immediately and synchronization is required to read the results.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in, out]\nd1      device strided_batched array or host strided_batched array of input scalars that is overwritten.\n@param[in]\nstride_d1 [rocblas_stride]\nspecifies the increment between the beginning of d1_i and d1_(i+1).\n@param[in, out]\nd2      device strided_batched array or host strided_batched array of input scalars that is overwritten.\n@param[in]\nstride_d2 [rocblas_stride]\nspecifies the increment between the beginning of d2_i and d2_(i+1).\n@param[in, out]\nx1      device strided_batched array or host strided_batched array of input scalars that is overwritten.\n@param[in]\nstride_x1 [rocblas_stride]\nspecifies the increment between the beginning of x1_i and x1_(i+1).\n@param[in]\ny1      device strided_batched array or host strided_batched array of input scalars.\n@param[in]\nstride_y1 [rocblas_stride]\nspecifies the increment between the beginning of y1_i and y1_(i+1).\n@param[out]\nparam   device strided_batched array or host strided_batched array of vectors of 5 elements defining the rotation.\n\nparam[0] = flag\nparam[1] = H11\nparam[2] = H21\nparam[3] = H12\nparam[4] = H22\nThe flag parameter defines the form of H:\n\nflag = -1 => H = ( H11 H12 H21 H22 )\nflag =  0 => H = ( 1.0 H12 H21 1.0 )\nflag =  1 => H = ( H11 1.0 -1.0 H22 )\nflag = -2 => H = ( 1.0 0.0 0.0 1.0 )\n\nparam may be stored in either host or device memory.\nLocation is specified by calling rocblas_set_pointer_mode.\n\n@param[in]\nstride_param [rocblas_stride]\nspecifies the increment between the beginning of param_i and param_(i + 1).\n@param[in]\nbatch_count [rocblas_int]\nthe number of instances in the batch.\n"]
     pub fn rocblas_srotmg_strided_batched(
         handle: rocblas_handle,
         d1: *mut f32,
@@ -2714,7 +4782,41 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ngbmv performs one of the matrix-vector operations:\n\ny := alpha*A*x    + beta*y,   or\ny := alpha*A**T*x + beta*y,   or\ny := alpha*A**H*x + beta*y,\nwhere alpha and beta are scalars, x and y are vectors and A is an\nm by n banded matrix with kl sub-diagonals and ku super-diagonals.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntrans     [rocblas_operation]\nindicates whether matrix A is tranposed (conjugated) or not.\n@param[in]\nm         [rocblas_int]\nnumber of rows of matrix A.\n@param[in]\nn         [rocblas_int]\nnumber of columns of matrix A.\n@param[in]\nkl        [rocblas_int]\nnumber of sub-diagonals of A.\n@param[in]\nku        [rocblas_int]\nnumber of super-diagonals of A.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA     device pointer storing banded matrix A.\nLeading (kl + ku + 1) by n part of the matrix contains the coefficients\nof the banded matrix. The leading diagonal resides in row (ku + 1) with\nthe first super-diagonal above on the RHS of row ku. The first sub-diagonal\nresides below on the LHS of row ku + 2. This propagates up and down across\nsub/super-diagonals.\n\nEx: (m = n = 7; ku = 2, kl = 2)\n1 2 3 0 0 0 0             0 0 3 3 3 3 3\n4 1 2 3 0 0 0             0 2 2 2 2 2 2\n5 4 1 2 3 0 0    ---->    1 1 1 1 1 1 1\n0 5 4 1 2 3 0             4 4 4 4 4 4 0\n0 0 5 4 1 2 0             5 5 5 5 5 0 0\n0 0 0 5 4 1 2             0 0 0 0 0 0 0\n0 0 0 0 5 4 1             0 0 0 0 0 0 0\n\nNote that the empty elements which do not correspond to data will not\nbe referenced.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A. Must be >= (kl + ku + 1).\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[inout]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
+    pub fn rocblas_srotmg_strided_batched_64(
+        handle: rocblas_handle,
+        d1: *mut f32,
+        stride_d1: rocblas_stride,
+        d2: *mut f32,
+        stride_d2: rocblas_stride,
+        x1: *mut f32,
+        stride_x1: rocblas_stride,
+        y1: *const f32,
+        stride_y1: rocblas_stride,
+        param: *mut f32,
+        stride_param: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotmg_strided_batched_64(
+        handle: rocblas_handle,
+        d1: *mut f64,
+        stride_d1: rocblas_stride,
+        d2: *mut f64,
+        stride_d2: rocblas_stride,
+        x1: *mut f64,
+        stride_x1: rocblas_stride,
+        y1: *const f64,
+        stride_y1: rocblas_stride,
+        param: *mut f64,
+        stride_param: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ngbmv performs one of the matrix-vector operations:\n\ny := alpha*A*x    + beta*y,   or\ny := alpha*A**T*x + beta*y,   or\ny := alpha*A**H*x + beta*y,\nwhere alpha and beta are scalars, x and y are vectors and A is an\nm by n banded matrix with kl sub-diagonals and ku super-diagonals.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntrans     [rocblas_operation]\nindicates whether matrix A is tranposed (conjugated) or not.\n@param[in]\nm         [rocblas_int]\nnumber of rows of matrix A.\n@param[in]\nn         [rocblas_int]\nnumber of columns of matrix A.\n@param[in]\nkl        [rocblas_int]\nnumber of sub-diagonals of A.\n@param[in]\nku        [rocblas_int]\nnumber of super-diagonals of A.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA     device pointer storing banded matrix A.\nLeading (kl + ku + 1) by n part of the matrix contains the coefficients\nof the banded matrix. The leading diagonal resides in row (ku + 1) with\nthe first super-diagonal above on the RHS of row ku. The first sub-diagonal\nresides below on the LHS of row ku + 2. This propagates up and down across\nsub/super-diagonals.\n\nEx: (m = n = 7; ku = 2, kl = 2)\n1 2 3 0 0 0 0             0 0 3 3 3 3 3\n4 1 2 3 0 0 0             0 2 2 2 2 2 2\n5 4 1 2 3 0 0    ---->    1 1 1 1 1 1 1\n0 5 4 1 2 3 0             4 4 4 4 4 4 0\n0 0 5 4 1 2 0             5 5 5 5 5 0 0\n0 0 0 5 4 1 2             0 0 0 0 0 0 0\n0 0 0 0 5 4 1             0 0 0 0 0 0 0\n\nNote that the empty elements which do not correspond to data will not\nbe referenced.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A. Must be >= (kl + ku + 1).\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[in, out]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
     pub fn rocblas_sgbmv(
         handle: rocblas_handle,
         trans: rocblas_operation,
@@ -2791,7 +4893,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ngbmv_batched performs one of the matrix-vector operations:\n\ny_i := alpha*A_i*x_i    + beta*y_i,   or\ny_i := alpha*A_i**T*x_i + beta*y_i,   or\ny_i := alpha*A_i**H*x_i + beta*y_i,\nwhere (A_i, x_i, y_i) is the i-th instance of the batch.\nalpha and beta are scalars, x_i and y_i are vectors and A_i is an\nm by n banded matrix with kl sub-diagonals and ku super-diagonals,\nfor i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntrans     [rocblas_operation]\nindicates whether matrix A is tranposed (conjugated) or not.\n@param[in]\nm         [rocblas_int]\nnumber of rows of each matrix A_i.\n@param[in]\nn         [rocblas_int]\nnumber of columns of each matrix A_i.\n@param[in]\nkl        [rocblas_int]\nnumber of sub-diagonals of each A_i.\n@param[in]\nku        [rocblas_int]\nnumber of super-diagonals of each A_i.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA     device array of device pointers storing each banded matrix A_i.\nLeading (kl + ku + 1) by n part of the matrix contains the coefficients\nof the banded matrix. The leading diagonal resides in row (ku + 1) with\nthe first super-diagonal above on the RHS of row ku. The first sub-diagonal\nresides below on the LHS of row ku + 2. This propagates up and down across\nsub/super-diagonals.\n\nEx: (m = n = 7; ku = 2, kl = 2)\n1 2 3 0 0 0 0             0 0 3 3 3 3 3\n4 1 2 3 0 0 0             0 2 2 2 2 2 2\n5 4 1 2 3 0 0    ---->    1 1 1 1 1 1 1\n0 5 4 1 2 3 0             4 4 4 4 4 4 0\n0 0 5 4 1 2 0             5 5 5 5 5 0 0\n0 0 0 5 4 1 2             0 0 0 0 0 0 0\n0 0 0 0 5 4 1             0 0 0 0 0 0 0\n\nNote that the empty elements which do not correspond to data will not\nbe referenced.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. Must be >= (kl + ku + 1)\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[inout]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nbatch_count [rocblas_int]\nspecifies the number of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ngbmv_batched performs one of the matrix-vector operations:\n\ny_i := alpha*A_i*x_i    + beta*y_i,   or\ny_i := alpha*A_i**T*x_i + beta*y_i,   or\ny_i := alpha*A_i**H*x_i + beta*y_i,\nwhere (A_i, x_i, y_i) is the i-th instance of the batch.\nalpha and beta are scalars, x_i and y_i are vectors and A_i is an\nm by n banded matrix with kl sub-diagonals and ku super-diagonals,\nfor i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntrans     [rocblas_operation]\nindicates whether matrix A is tranposed (conjugated) or not.\n@param[in]\nm         [rocblas_int]\nnumber of rows of each matrix A_i.\n@param[in]\nn         [rocblas_int]\nnumber of columns of each matrix A_i.\n@param[in]\nkl        [rocblas_int]\nnumber of sub-diagonals of each A_i.\n@param[in]\nku        [rocblas_int]\nnumber of super-diagonals of each A_i.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA     device array of device pointers storing each banded matrix A_i.\nLeading (kl + ku + 1) by n part of the matrix contains the coefficients\nof the banded matrix. The leading diagonal resides in row (ku + 1) with\nthe first super-diagonal above on the RHS of row ku. The first sub-diagonal\nresides below on the LHS of row ku + 2. This propagates up and down across\nsub/super-diagonals.\n\nEx: (m = n = 7; ku = 2, kl = 2)\n1 2 3 0 0 0 0             0 0 3 3 3 3 3\n4 1 2 3 0 0 0             0 2 2 2 2 2 2\n5 4 1 2 3 0 0    ---->    1 1 1 1 1 1 1\n0 5 4 1 2 3 0             4 4 4 4 4 4 0\n0 0 5 4 1 2 0             5 5 5 5 5 0 0\n0 0 0 5 4 1 2             0 0 0 0 0 0 0\n0 0 0 0 5 4 1             0 0 0 0 0 0 0\n\nNote that the empty elements which do not correspond to data will not\nbe referenced.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. Must be >= (kl + ku + 1)\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[in, out]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nbatch_count [rocblas_int]\nspecifies the number of instances in the batch.\n"]
     pub fn rocblas_sgbmv_batched(
         handle: rocblas_handle,
         trans: rocblas_operation,
@@ -2872,7 +4974,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ngbmv_strided_batched performs one of the matrix-vector operations:\n\ny_i := alpha*A_i*x_i    + beta*y_i,   or\ny_i := alpha*A_i**T*x_i + beta*y_i,   or\ny_i := alpha*A_i**H*x_i + beta*y_i,\nwhere (A_i, x_i, y_i) is the i-th instance of the batch.\nalpha and beta are scalars, x_i and y_i are vectors and A_i is an\nm by n banded matrix with kl sub-diagonals and ku super-diagonals,\nfor i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntrans     [rocblas_operation]\nindicates whether matrix A is tranposed (conjugated) or not.\n@param[in]\nm         [rocblas_int]\nnumber of rows of matrix A.\n@param[in]\nn         [rocblas_int]\nnumber of columns of matrix A.\n@param[in]\nkl        [rocblas_int]\nnumber of sub-diagonals of A.\n@param[in]\nku        [rocblas_int]\nnumber of super-diagonals of A.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA     device pointer to first banded matrix (A_1).\nLeading (kl + ku + 1) by n part of the matrix contains the coefficients\nof the banded matrix. The leading diagonal resides in row (ku + 1) with\nthe first super-diagonal above on the RHS of row ku. The first sub-diagonal\nresides below on the LHS of row ku + 2. This propagates up and down across\nsub/super-diagonals.\n\nEx: (m = n = 7; ku = 2, kl = 2)\n1 2 3 0 0 0 0             0 0 3 3 3 3 3\n4 1 2 3 0 0 0             0 2 2 2 2 2 2\n5 4 1 2 3 0 0    ---->    1 1 1 1 1 1 1\n0 5 4 1 2 3 0             4 4 4 4 4 4 0\n0 0 5 4 1 2 0             5 5 5 5 5 0 0\n0 0 0 5 4 1 2             0 0 0 0 0 0 0\n0 0 0 0 5 4 1             0 0 0 0 0 0 0\n\nNote that the empty elements which do not correspond to data will not\nbe referenced.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A. Must be >= (kl + ku + 1).\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n@param[in]\nx         device pointer to first vector (x_1).\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[inout]\ny         device pointer to first vector (y_1).\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\nstride_y  [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (x_i+1).\n@param[in]\nbatch_count [rocblas_int]\nspecifies the number of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ngbmv_strided_batched performs one of the matrix-vector operations:\n\ny_i := alpha*A_i*x_i    + beta*y_i,   or\ny_i := alpha*A_i**T*x_i + beta*y_i,   or\ny_i := alpha*A_i**H*x_i + beta*y_i,\nwhere (A_i, x_i, y_i) is the i-th instance of the batch.\nalpha and beta are scalars, x_i and y_i are vectors and A_i is an\nm by n banded matrix with kl sub-diagonals and ku super-diagonals,\nfor i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntrans     [rocblas_operation]\nindicates whether matrix A is tranposed (conjugated) or not.\n@param[in]\nm         [rocblas_int]\nnumber of rows of matrix A.\n@param[in]\nn         [rocblas_int]\nnumber of columns of matrix A.\n@param[in]\nkl        [rocblas_int]\nnumber of sub-diagonals of A.\n@param[in]\nku        [rocblas_int]\nnumber of super-diagonals of A.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA     device pointer to first banded matrix (A_1).\nLeading (kl + ku + 1) by n part of the matrix contains the coefficients\nof the banded matrix. The leading diagonal resides in row (ku + 1) with\nthe first super-diagonal above on the RHS of row ku. The first sub-diagonal\nresides below on the LHS of row ku + 2. This propagates up and down across\nsub/super-diagonals.\n\nEx: (m = n = 7; ku = 2, kl = 2)\n1 2 3 0 0 0 0             0 0 3 3 3 3 3\n4 1 2 3 0 0 0             0 2 2 2 2 2 2\n5 4 1 2 3 0 0    ---->    1 1 1 1 1 1 1\n0 5 4 1 2 3 0             4 4 4 4 4 4 0\n0 0 5 4 1 2 0             5 5 5 5 5 0 0\n0 0 0 5 4 1 2             0 0 0 0 0 0 0\n0 0 0 0 5 4 1             0 0 0 0 0 0 0\n\nNote that the empty elements which do not correspond to data will not\nbe referenced.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A. Must be >= (kl + ku + 1).\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n@param[in]\nx         device pointer to first vector (x_1).\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[in, out]\ny         device pointer to first vector (y_1).\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\nstride_y  [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (x_i+1).\n@param[in]\nbatch_count [rocblas_int]\nspecifies the number of instances in the batch.\n"]
     pub fn rocblas_sgbmv_strided_batched(
         handle: rocblas_handle,
         trans: rocblas_operation,
@@ -2965,7 +5067,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ngemv performs one of the matrix-vector operations:\n\ny := alpha*A*x    + beta*y,   or\ny := alpha*A**T*x + beta*y,   or\ny := alpha*A**H*x + beta*y,\nwhere alpha and beta are scalars, x and y are vectors and A is an\nm by n matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntrans     [rocblas_operation]\nindicates whether matrix A is tranposed (conjugated) or not.\n@param[in]\nm         [rocblas_int]\nnumber of rows of matrix A.\n@param[in]\nn         [rocblas_int]\nnumber of columns of matrix A.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA         device pointer storing matrix A.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[inout]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ngemv performs one of the matrix-vector operations:\n\ny := alpha*A*x    + beta*y,   or\ny := alpha*A**T*x + beta*y,   or\ny := alpha*A**H*x + beta*y,\nwhere alpha and beta are scalars, x and y are vectors and A is an\nm by n matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntrans     [rocblas_operation]\nindicates whether matrix A is tranposed (conjugated) or not.\n@param[in]\nm         [rocblas_int]\nnumber of rows of matrix A.\n@param[in]\nn         [rocblas_int]\nnumber of columns of matrix A.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA         device pointer storing matrix A.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[in, out]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
     pub fn rocblas_sgemv(
         handle: rocblas_handle,
         trans: rocblas_operation,
@@ -3034,7 +5136,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ngemv_batched performs a batch of matrix-vector operations:\n\ny_i := alpha*A_i*x_i    + beta*y_i,   or\ny_i := alpha*A_i**T*x_i + beta*y_i,   or\ny_i := alpha*A_i**H*x_i + beta*y_i,\nwhere (A_i, x_i, y_i) is the i-th instance of the batch.\nalpha and beta are scalars, x_i and y_i are vectors and A_i is an\nm by n matrix, for i = 1, ..., batch_count.\n\n@param[in]\nhandle      [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntrans       [rocblas_operation]\nindicates whether matrices A_i are tranposed (conjugated) or not.\n@param[in]\nm           [rocblas_int]\nnumber of rows of each matrix A_i.\n@param[in]\nn           [rocblas_int]\nnumber of columns of each matrix A_i.\n@param[in]\nalpha       device pointer or host pointer to scalar alpha.\n@param[in]\nA           device array of device pointers storing each matrix A_i.\n@param[in]\nlda         [rocblas_int]\nspecifies the leading dimension of each matrix A_i.\n@param[in]\nx           device array of device pointers storing each vector x_i.\n@param[in]\nincx        [rocblas_int]\nspecifies the increment for the elements of each vector x_i.\n@param[in]\nbeta        device pointer or host pointer to scalar beta.\n@param[inout]\ny           device array of device pointers storing each vector y_i.\n@param[in]\nincy        [rocblas_int]\nspecifies the increment for the elements of each vector y_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ngemv_batched performs a batch of matrix-vector operations:\n\ny_i := alpha*A_i*x_i    + beta*y_i,   or\ny_i := alpha*A_i**T*x_i + beta*y_i,   or\ny_i := alpha*A_i**H*x_i + beta*y_i,\nwhere (A_i, x_i, y_i) is the i-th instance of the batch.\nalpha and beta are scalars, x_i and y_i are vectors and A_i is an\nm by n matrix, for i = 1, ..., batch_count.\n\n@param[in]\nhandle      [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntrans       [rocblas_operation]\nindicates whether matrices A_i are tranposed (conjugated) or not.\n@param[in]\nm           [rocblas_int]\nnumber of rows of each matrix A_i.\n@param[in]\nn           [rocblas_int]\nnumber of columns of each matrix A_i.\n@param[in]\nalpha       device pointer or host pointer to scalar alpha.\n@param[in]\nA           device array of device pointers storing each matrix A_i.\n@param[in]\nlda         [rocblas_int]\nspecifies the leading dimension of each matrix A_i.\n@param[in]\nx           device array of device pointers storing each vector x_i.\n@param[in]\nincx        [rocblas_int]\nspecifies the increment for the elements of each vector x_i.\n@param[in]\nbeta        device pointer or host pointer to scalar beta.\n@param[in, out]\ny           device array of device pointers storing each vector y_i.\n@param[in]\nincy        [rocblas_int]\nspecifies the increment for the elements of each vector y_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_sgemv_batched(
         handle: rocblas_handle,
         trans: rocblas_operation,
@@ -3107,7 +5209,79 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ngemv_strided_batched performs a batch of matrix-vector operations:\n\ny_i := alpha*A_i*x_i    + beta*y_i,   or\ny_i := alpha*A_i**T*x_i + beta*y_i,   or\ny_i := alpha*A_i**H*x_i + beta*y_i,\nwhere (A_i, x_i, y_i) is the i-th instance of the batch.\nalpha and beta are scalars, x_i and y_i are vectors and A_i is an\nm by n matrix, for i = 1, ..., batch_count.\n\n@param[in]\nhandle      [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntransA      [rocblas_operation]\nindicates whether matrices A_i are tranposed (conjugated) or not.\n@param[in]\nm           [rocblas_int]\nnumber of rows of matrices A_i.\n@param[in]\nn           [rocblas_int]\nnumber of columns of matrices A_i.\n@param[in]\nalpha       device pointer or host pointer to scalar alpha.\n@param[in]\nA           device pointer to the first matrix (A_1) in the batch.\n@param[in]\nlda         [rocblas_int]\nspecifies the leading dimension of matrices A_i.\n@param[in]\nstrideA     [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n@param[in]\nx           device pointer to the first vector (x_1) in the batch.\n@param[in]\nincx        [rocblas_int]\nspecifies the increment for the elements of vectors x_i.\n@param[in]\nstridex     [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\nThere are no restrictions placed on stride_x. However, ensure that stride_x is of appropriate size. When trans equals rocblas_operation_none\nthis typically means stride_x >= n * incx, otherwise stride_x >= m * incx.\n@param[in]\nbeta        device pointer or host pointer to scalar beta.\n@param[inout]\ny           device pointer to the first vector (y_1) in the batch.\n@param[in]\nincy        [rocblas_int]\nspecifies the increment for the elements of vectors y_i.\n@param[in]\nstridey     [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\nThere are no restrictions placed on stride_y. However, ensure that stride_y is of appropriate size. When trans equals rocblas_operation_none\nthis typically means stride_y >= m * incy, otherwise stride_y >= n * incy. stridey should be non zero.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_hshgemv_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const *const rocblas_half,
+        lda: rocblas_int,
+        x: *const *const rocblas_half,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *const *mut rocblas_half,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hssgemv_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const *const rocblas_half,
+        lda: rocblas_int,
+        x: *const *const rocblas_half,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *const *mut f32,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_tstgemv_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const *const rocblas_bfloat16,
+        lda: rocblas_int,
+        x: *const *const rocblas_bfloat16,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *const *mut rocblas_bfloat16,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_tssgemv_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const *const rocblas_bfloat16,
+        lda: rocblas_int,
+        x: *const *const rocblas_bfloat16,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *const *mut f32,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ngemv_strided_batched performs a batch of matrix-vector operations:\n\ny_i := alpha*A_i*x_i    + beta*y_i,   or\ny_i := alpha*A_i**T*x_i + beta*y_i,   or\ny_i := alpha*A_i**H*x_i + beta*y_i,\nwhere (A_i, x_i, y_i) is the i-th instance of the batch.\nalpha and beta are scalars, x_i and y_i are vectors and A_i is an\nm by n matrix, for i = 1, ..., batch_count.\n\n@param[in]\nhandle      [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntransA      [rocblas_operation]\nindicates whether matrices A_i are tranposed (conjugated) or not.\n@param[in]\nm           [rocblas_int]\nnumber of rows of matrices A_i.\n@param[in]\nn           [rocblas_int]\nnumber of columns of matrices A_i.\n@param[in]\nalpha       device pointer or host pointer to scalar alpha.\n@param[in]\nA           device pointer to the first matrix (A_1) in the batch.\n@param[in]\nlda         [rocblas_int]\nspecifies the leading dimension of matrices A_i.\n@param[in]\nstrideA     [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n@param[in]\nx           device pointer to the first vector (x_1) in the batch.\n@param[in]\nincx        [rocblas_int]\nspecifies the increment for the elements of vectors x_i.\n@param[in]\nstridex     [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\nThere are no restrictions placed on stride_x. However, ensure that stride_x is of appropriate size. When trans equals rocblas_operation_none\nthis typically means stride_x >= n * incx, otherwise stride_x >= m * incx.\n@param[in]\nbeta        device pointer or host pointer to scalar beta.\n@param[in, out]\ny           device pointer to the first vector (y_1) in the batch.\n@param[in]\nincy        [rocblas_int]\nspecifies the increment for the elements of vectors y_i.\n@param[in]\nstridey     [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\nThere are no restrictions placed on stride_y. However, ensure that stride_y is of appropriate size. When trans equals rocblas_operation_none\nthis typically means stride_y >= m * incy, otherwise stride_y >= n * incy. stridey should be non zero.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_sgemv_strided_batched(
         handle: rocblas_handle,
         transA: rocblas_operation,
@@ -3192,7 +5366,91 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhbmv performs the matrix-vector operations:\n\ny := alpha*A*x + beta*y\nwhere alpha and beta are scalars, x and y are n element vectors and A is an\nn by n Hermitian band matrix, with k super-diagonals.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: The upper triangular part of A is being supplied.\n- rocblas_fill_lower: The lower triangular part of A is being supplied.\n@param[in]\nn         [rocblas_int]\nthe order of the matrix A.\n@param[in]\nk         [rocblas_int]\nthe number of super-diagonals of the matrix A. Must be >= 0.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA         device pointer storing matrix A. Of dimension (lda, n).\n\nif uplo == rocblas_fill_upper:\nThe leading (k + 1) by n part of A must contain the upper\ntriangular band part of the Hermitian matrix, with the leading\ndiagonal in row (k + 1), the first super-diagonal on the RHS\nof row k, etc.\nThe top left k by x triangle of A will not be referenced.\nEx (upper, lda = n = 4, k = 1):\nA                             Represented matrix\n(0,0) (5,9) (6,8) (7,7)       (1, 0) (5, 9) (0, 0) (0, 0)\n(1,0) (2,0) (3,0) (4,0)       (5,-9) (2, 0) (6, 8) (0, 0)\n(0,0) (0,0) (0,0) (0,0)       (0, 0) (6,-8) (3, 0) (7, 7)\n(0,0) (0,0) (0,0) (0,0)       (0, 0) (0, 0) (7,-7) (4, 0)\n\nif uplo == rocblas_fill_lower:\nThe leading (k + 1) by n part of A must contain the lower\ntriangular band part of the Hermitian matrix, with the leading\ndiagonal in row (1), the first sub-diagonal on the LHS of\nrow 2, etc.\nThe bottom right k by k triangle of A will not be referenced.\nEx (lower, lda = 2, n = 4, k = 1):\nA                               Represented matrix\n(1,0) (2,0) (3,0) (4,0)         (1, 0) (5,-9) (0, 0) (0, 0)\n(5,9) (6,8) (7,7) (0,0)         (5, 9) (2, 0) (6,-8) (0, 0)\n(0, 0) (6, 8) (3, 0) (7,-7)\n(0, 0) (0, 0) (7, 7) (4, 0)\n\nAs a Hermitian matrix, the imaginary part of the main diagonal\nof A will not be referenced and is assumed to be == 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A. must be >= k + 1.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[inout]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
+    pub fn rocblas_hshgemv_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const rocblas_half,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const rocblas_half,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const f32,
+        y: *mut rocblas_half,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hssgemv_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const rocblas_half,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const rocblas_half,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const f32,
+        y: *mut f32,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_tstgemv_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const rocblas_bfloat16,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const rocblas_bfloat16,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const f32,
+        y: *mut rocblas_bfloat16,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_tssgemv_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const rocblas_bfloat16,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const rocblas_bfloat16,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const f32,
+        y: *mut f32,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhbmv performs the matrix-vector operations:\n\ny := alpha*A*x + beta*y\nwhere alpha and beta are scalars, x and y are n element vectors and A is an\nn by n Hermitian band matrix, with k super-diagonals.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: The upper triangular part of A is being supplied.\n- rocblas_fill_lower: The lower triangular part of A is being supplied.\n@param[in]\nn         [rocblas_int]\nthe order of the matrix A.\n@param[in]\nk         [rocblas_int]\nthe number of super-diagonals of the matrix A. Must be >= 0.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA         device pointer storing matrix A. Of dimension (lda, n).\n\nif uplo == rocblas_fill_upper:\nThe leading (k + 1) by n part of A must contain the upper\ntriangular band part of the Hermitian matrix, with the leading\ndiagonal in row (k + 1), the first super-diagonal on the RHS\nof row k, etc.\nThe top left k by x triangle of A will not be referenced.\nEx (upper, lda = n = 4, k = 1):\nA                             Represented matrix\n(0,0) (5,9) (6,8) (7,7)       (1, 0) (5, 9) (0, 0) (0, 0)\n(1,0) (2,0) (3,0) (4,0)       (5,-9) (2, 0) (6, 8) (0, 0)\n(0,0) (0,0) (0,0) (0,0)       (0, 0) (6,-8) (3, 0) (7, 7)\n(0,0) (0,0) (0,0) (0,0)       (0, 0) (0, 0) (7,-7) (4, 0)\n\nif uplo == rocblas_fill_lower:\nThe leading (k + 1) by n part of A must contain the lower\ntriangular band part of the Hermitian matrix, with the leading\ndiagonal in row (1), the first sub-diagonal on the LHS of\nrow 2, etc.\nThe bottom right k by k triangle of A will not be referenced.\nEx (lower, lda = 2, n = 4, k = 1):\nA                               Represented matrix\n(1,0) (2,0) (3,0) (4,0)         (1, 0) (5,-9) (0, 0) (0, 0)\n(5,9) (6,8) (7,7) (0,0)         (5, 9) (2, 0) (6,-8) (0, 0)\n(0, 0) (6, 8) (3, 0) (7,-7)\n(0, 0) (0, 0) (7, 7) (4, 0)\n\nAs a Hermitian matrix, the imaginary part of the main diagonal\nof A will not be referenced and is assumed to be == 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A. must be >= k + 1.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[in, out]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
     pub fn rocblas_chbmv(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -3227,7 +5485,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhbmv_batched performs one of the matrix-vector operations:\n\ny_i := alpha*A_i*x_i + beta*y_i\nwhere alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an\nn by n Hermitian band matrix with k super-diagonals, for each batch in i = [1, batch_count].\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: The upper triangular part of each A_i is being supplied.\n- rocblas_fill_lower: The lower triangular part of each A_i is being supplied.\n@param[in]\nn         [rocblas_int]\nthe order of each matrix A_i.\n@param[in]\nk         [rocblas_int]\nthe number of super-diagonals of each matrix A_i. Must be >= 0.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA         device array of device pointers storing each matrix_i A of dimension (lda, n).\n\nif uplo == rocblas_fill_upper:\nThe leading (k + 1) by n part of each A_i must contain the upper\ntriangular band part of the Hermitian matrix, with the leading\ndiagonal in row (k + 1), the first super-diagonal on the RHS\nof row k, etc.\nThe top left k by x triangle of each A_i will not be referenced.\nEx (upper, lda = n = 4, k = 1):\nA                             Represented matrix\n(0,0) (5,9) (6,8) (7,7)       (1, 0) (5, 9) (0, 0) (0, 0)\n(1,0) (2,0) (3,0) (4,0)       (5,-9) (2, 0) (6, 8) (0, 0)\n(0,0) (0,0) (0,0) (0,0)       (0, 0) (6,-8) (3, 0) (7, 7)\n(0,0) (0,0) (0,0) (0,0)       (0, 0) (0, 0) (7,-7) (4, 0)\n\nif uplo == rocblas_fill_lower:\nThe leading (k + 1) by n part of each A_i must contain the lower\ntriangular band part of the Hermitian matrix, with the leading\ndiagonal in row (1), the first sub-diagonal on the LHS of\nrow 2, etc.\nThe bottom right k by k triangle of each A_i will not be referenced.\nEx (lower, lda = 2, n = 4, k = 1):\nA                               Represented matrix\n(1,0) (2,0) (3,0) (4,0)         (1, 0) (5,-9) (0, 0) (0, 0)\n(5,9) (6,8) (7,7) (0,0)         (5, 9) (2, 0) (6,-8) (0, 0)\n(0, 0) (6, 8) (3, 0) (7,-7)\n(0, 0) (0, 0) (7, 7) (4, 0)\n\nAs a Hermitian matrix, the imaginary part of the main diagonal\nof each A_i will not be referenced and is assumed to be == 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. must be >= max(1, n).\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[inout]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhbmv_batched performs one of the matrix-vector operations:\n\ny_i := alpha*A_i*x_i + beta*y_i\nwhere alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an\nn by n Hermitian band matrix with k super-diagonals, for each batch in i = [1, batch_count].\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: The upper triangular part of each A_i is being supplied.\n- rocblas_fill_lower: The lower triangular part of each A_i is being supplied.\n@param[in]\nn         [rocblas_int]\nthe order of each matrix A_i.\n@param[in]\nk         [rocblas_int]\nthe number of super-diagonals of each matrix A_i. Must be >= 0.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA         device array of device pointers storing each matrix_i A of dimension (lda, n).\n\nif uplo == rocblas_fill_upper:\nThe leading (k + 1) by n part of each A_i must contain the upper\ntriangular band part of the Hermitian matrix, with the leading\ndiagonal in row (k + 1), the first super-diagonal on the RHS\nof row k, etc.\nThe top left k by x triangle of each A_i will not be referenced.\nEx (upper, lda = n = 4, k = 1):\nA                             Represented matrix\n(0,0) (5,9) (6,8) (7,7)       (1, 0) (5, 9) (0, 0) (0, 0)\n(1,0) (2,0) (3,0) (4,0)       (5,-9) (2, 0) (6, 8) (0, 0)\n(0,0) (0,0) (0,0) (0,0)       (0, 0) (6,-8) (3, 0) (7, 7)\n(0,0) (0,0) (0,0) (0,0)       (0, 0) (0, 0) (7,-7) (4, 0)\n\nif uplo == rocblas_fill_lower:\nThe leading (k + 1) by n part of each A_i must contain the lower\ntriangular band part of the Hermitian matrix, with the leading\ndiagonal in row (1), the first sub-diagonal on the LHS of\nrow 2, etc.\nThe bottom right k by k triangle of each A_i will not be referenced.\nEx (lower, lda = 2, n = 4, k = 1):\nA                               Represented matrix\n(1,0) (2,0) (3,0) (4,0)         (1, 0) (5,-9) (0, 0) (0, 0)\n(5,9) (6,8) (7,7) (0,0)         (5, 9) (2, 0) (6,-8) (0, 0)\n(0, 0) (6, 8) (3, 0) (7,-7)\n(0, 0) (0, 0) (7, 7) (4, 0)\n\nAs a Hermitian matrix, the imaginary part of the main diagonal\nof each A_i will not be referenced and is assumed to be == 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. must be >= max(1, n).\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[in, out]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_chbmv_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -3264,7 +5522,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhbmv_strided_batched performs one of the matrix-vector operations:\n\ny_i := alpha*A_i*x_i + beta*y_i\nwhere alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an\nn by n Hermitian band matrix with k super-diagonals, for each batch in i = [1, batch_count].\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: The upper triangular part of each A_i is being supplied.\n- rocblas_fill_lower: The lower triangular part of each A_i is being supplied.\n@param[in]\nn         [rocblas_int]\nthe order of each matrix A_i.\n@param[in]\nk         [rocblas_int]\nthe number of super-diagonals of each matrix A_i. Must be >= 0.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA         device array pointing to the first matrix A_1. Each A_i is of dimension (lda, n).\n\nif uplo == rocblas_fill_upper:\nThe leading (k + 1) by n part of each A_i must contain the upper\ntriangular band part of the Hermitian matrix, with the leading\ndiagonal in row (k + 1), the first super-diagonal on the RHS\nof row k, etc.\nThe top left k by x triangle of each A_i will not be referenced.\nEx (upper, lda = n = 4, k = 1):\nA                             Represented matrix\n(0,0) (5,9) (6,8) (7,7)       (1, 0) (5, 9) (0, 0) (0, 0)\n(1,0) (2,0) (3,0) (4,0)       (5,-9) (2, 0) (6, 8) (0, 0)\n(0,0) (0,0) (0,0) (0,0)       (0, 0) (6,-8) (3, 0) (7, 7)\n(0,0) (0,0) (0,0) (0,0)       (0, 0) (0, 0) (7,-7) (4, 0)\n\nif uplo == rocblas_fill_lower:\nThe leading (k + 1) by n part of each A_i must contain the lower\ntriangular band part of the Hermitian matrix, with the leading\ndiagonal in row (1), the first sub-diagonal on the LHS of\nrow 2, etc.\nThe bottom right k by k triangle of each A_i will not be referenced.\nEx (lower, lda = 2, n = 4, k = 1):\nA                               Represented matrix\n(1,0) (2,0) (3,0) (4,0)         (1, 0) (5,-9) (0, 0) (0, 0)\n(5,9) (6,8) (7,7) (0,0)         (5, 9) (2, 0) (6,-8) (0, 0)\n(0, 0) (6, 8) (3, 0) (7,-7)\n(0, 0) (0, 0) (7, 7) (4, 0)\n\nAs a Hermitian matrix, the imaginary part of the main diagonal\nof each A_i will not be referenced and is assumed to be == 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. must be >= max(1, n).\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n@param[in]\nx         device array pointing to the first vector y_1.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[inout]\ny         device array pointing to the first vector y_1.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\nstride_y  [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhbmv_strided_batched performs one of the matrix-vector operations:\n\ny_i := alpha*A_i*x_i + beta*y_i\nwhere alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an\nn by n Hermitian band matrix with k super-diagonals, for each batch in i = [1, batch_count].\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: The upper triangular part of each A_i is being supplied.\n- rocblas_fill_lower: The lower triangular part of each A_i is being supplied.\n@param[in]\nn         [rocblas_int]\nthe order of each matrix A_i.\n@param[in]\nk         [rocblas_int]\nthe number of super-diagonals of each matrix A_i. Must be >= 0.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA         device array pointing to the first matrix A_1. Each A_i is of dimension (lda, n).\n\nif uplo == rocblas_fill_upper:\nThe leading (k + 1) by n part of each A_i must contain the upper\ntriangular band part of the Hermitian matrix, with the leading\ndiagonal in row (k + 1), the first super-diagonal on the RHS\nof row k, etc.\nThe top left k by x triangle of each A_i will not be referenced.\nEx (upper, lda = n = 4, k = 1):\nA                             Represented matrix\n(0,0) (5,9) (6,8) (7,7)       (1, 0) (5, 9) (0, 0) (0, 0)\n(1,0) (2,0) (3,0) (4,0)       (5,-9) (2, 0) (6, 8) (0, 0)\n(0,0) (0,0) (0,0) (0,0)       (0, 0) (6,-8) (3, 0) (7, 7)\n(0,0) (0,0) (0,0) (0,0)       (0, 0) (0, 0) (7,-7) (4, 0)\n\nif uplo == rocblas_fill_lower:\nThe leading (k + 1) by n part of each A_i must contain the lower\ntriangular band part of the Hermitian matrix, with the leading\ndiagonal in row (1), the first sub-diagonal on the LHS of\nrow 2, etc.\nThe bottom right k by k triangle of each A_i will not be referenced.\nEx (lower, lda = 2, n = 4, k = 1):\nA                               Represented matrix\n(1,0) (2,0) (3,0) (4,0)         (1, 0) (5,-9) (0, 0) (0, 0)\n(5,9) (6,8) (7,7) (0,0)         (5, 9) (2, 0) (6,-8) (0, 0)\n(0, 0) (6, 8) (3, 0) (7,-7)\n(0, 0) (0, 0) (7, 7) (4, 0)\n\nAs a Hermitian matrix, the imaginary part of the main diagonal\nof each A_i will not be referenced and is assumed to be == 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. must be >= max(1, n).\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n@param[in]\nx         device array pointing to the first vector y_1.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[in, out]\ny         device array pointing to the first vector y_1.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\nstride_y  [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_chbmv_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -3307,7 +5565,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhemv performs one of the matrix-vector operations:\n\ny := alpha*A*x + beta*y\nwhere alpha and beta are scalars, x and y are n element vectors and A is an\nn by n Hermitian matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: the upper triangular part of the Hermitian matrix A is supplied.\n- rocblas_fill_lower: the lower triangular part of the Hermitian matrix A is supplied.\n@param[in]\nn         [rocblas_int]\nthe order of the matrix A.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA         device pointer storing matrix A. Of dimension (lda, n).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular part of A must contain\nthe upper triangular part of a Hermitian matrix. The lower\ntriangular part of A will not be referenced.\n\nif uplo == rocblas_fill_lower:\nThe lower triangular part of A must contain\nthe lower triangular part of a Hermitian matrix. The upper\ntriangular part of A will not be referenced.\nAs a Hermitian matrix, the imaginary part of the main diagonal\nof A will not be referenced and is assumed to be == 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A. must be >= max(1, n).\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[inout]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhemv performs one of the matrix-vector operations:\n\ny := alpha*A*x + beta*y\nwhere alpha and beta are scalars, x and y are n element vectors and A is an\nn by n Hermitian matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: the upper triangular part of the Hermitian matrix A is supplied.\n- rocblas_fill_lower: the lower triangular part of the Hermitian matrix A is supplied.\n@param[in]\nn         [rocblas_int]\nthe order of the matrix A.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA         device pointer storing matrix A. Of dimension (lda, n).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular part of A must contain\nthe upper triangular part of a Hermitian matrix. The lower\ntriangular part of A will not be referenced.\n\nif uplo == rocblas_fill_lower:\nThe lower triangular part of A must contain\nthe lower triangular part of a Hermitian matrix. The upper\ntriangular part of A will not be referenced.\nAs a Hermitian matrix, the imaginary part of the main diagonal\nof A will not be referenced and is assumed to be == 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A. must be >= max(1, n).\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[in, out]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
     pub fn rocblas_chemv(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -3340,7 +5598,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhemv_batched performs one of the matrix-vector operations:\n\ny_i := alpha*A_i*x_i + beta*y_i\nwhere alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an\nn by n Hermitian matrix, for each batch in i = [1, batch_count].\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: the upper triangular part of the Hermitian matrix A is supplied.\n- rocblas_fill_lower: the lower triangular part of the Hermitian matrix A is supplied.\n@param[in]\nn         [rocblas_int]\nthe order of each matrix A_i.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA         device array of device pointers storing each matrix A_i of dimension (lda, n).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular part of each A_i must contain\nthe upper triangular part of a Hermitian matrix. The lower\ntriangular part of each A_i will not be referenced.\n\nif uplo == rocblas_fill_lower:\nThe lower triangular part of each A_i must contain\nthe lower triangular part of a Hermitian matrix. The upper\ntriangular part of each A_i will not be referenced.\nAs a Hermitian matrix, the imaginary part of the main diagonal\nof each A_i will not be referenced and is assumed to be == 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. must be >= max(1, n).\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[inout]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhemv_batched performs one of the matrix-vector operations:\n\ny_i := alpha*A_i*x_i + beta*y_i\nwhere alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an\nn by n Hermitian matrix, for each batch in i = [1, batch_count].\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: the upper triangular part of the Hermitian matrix A is supplied.\n- rocblas_fill_lower: the lower triangular part of the Hermitian matrix A is supplied.\n@param[in]\nn         [rocblas_int]\nthe order of each matrix A_i.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA         device array of device pointers storing each matrix A_i of dimension (lda, n).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular part of each A_i must contain\nthe upper triangular part of a Hermitian matrix. The lower\ntriangular part of each A_i will not be referenced.\n\nif uplo == rocblas_fill_lower:\nThe lower triangular part of each A_i must contain\nthe lower triangular part of a Hermitian matrix. The upper\ntriangular part of each A_i will not be referenced.\nAs a Hermitian matrix, the imaginary part of the main diagonal\nof each A_i will not be referenced and is assumed to be == 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. must be >= max(1, n).\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[in, out]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_chemv_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -3375,7 +5633,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhemv_strided_batched performs one of the matrix-vector operations:\n\ny_i := alpha*A_i*x_i + beta*y_i\nwhere alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an\nn by n Hermitian matrix, for each batch in i = [1, batch_count].\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: the upper triangular part of the Hermitian matrix A is supplied.\n- rocblas_fill_lower: the lower triangular part of the Hermitian matrix A is supplied.\n@param[in]\nn         [rocblas_int]\nthe order of each matrix A_i.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA         device array of device pointers storing each matrix A_i of dimension (lda, n).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular part of each A_i must contain\nthe upper triangular part of a Hermitian matrix. The lower\ntriangular part of each A_i will not be referenced.\n\nif uplo == rocblas_fill_lower:\nThe lower triangular part of each A_i must contain\nthe lower triangular part of a Hermitian matrix. The upper\ntriangular part of each A_i will not be referenced.\nAs a Hermitian matrix, the imaginary part of the main diagonal\nof each A_i will not be referenced and is assumed to be == 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. must be >= max(1, n).\n@param[in]\nstride_A    [rocblas_stride]\nstride from the start of one (A_i) to the next (A_i+1).\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[inout]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\nstride_y  [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhemv_strided_batched performs one of the matrix-vector operations:\n\ny_i := alpha*A_i*x_i + beta*y_i\nwhere alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an\nn by n Hermitian matrix, for each batch in i = [1, batch_count].\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: the upper triangular part of the Hermitian matrix A is supplied.\n- rocblas_fill_lower: the lower triangular part of the Hermitian matrix A is supplied.\n@param[in]\nn         [rocblas_int]\nthe order of each matrix A_i.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA         device array of device pointers storing each matrix A_i of dimension (lda, n).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular part of each A_i must contain\nthe upper triangular part of a Hermitian matrix. The lower\ntriangular part of each A_i will not be referenced.\n\nif uplo == rocblas_fill_lower:\nThe lower triangular part of each A_i must contain\nthe lower triangular part of a Hermitian matrix. The upper\ntriangular part of each A_i will not be referenced.\nAs a Hermitian matrix, the imaginary part of the main diagonal\nof each A_i will not be referenced and is assumed to be == 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. must be >= max(1, n).\n@param[in]\nstride_A    [rocblas_stride]\nstride from the start of one (A_i) to the next (A_i+1).\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[in, out]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\nstride_y  [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_chemv_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -3416,7 +5674,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nher performs the matrix-vector operations:\n\nA := A + alpha*x*x**H\nwhere alpha is a real scalar, x is a vector, and A is an\nn by n Hermitian matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of A is supplied in A.\n- rocblas_fill_lower: The lower triangular part of A is supplied in A.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[inout]\nA         device pointer storing the specified triangular portion of the Hermitian matrix A. Of size (lda * n).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of the Hermitian matrix A is supplied.\nThe lower triangluar portion will not be touched.\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of the Hermitian matrix A is supplied.\nThe upper triangular portion will not be touched.\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A. Must be at least max(1, n)."]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nher performs the matrix-vector operations:\n\nA := A + alpha*x*x**H\nwhere alpha is a real scalar, x is a vector, and A is an\nn by n Hermitian matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of A is supplied in A.\n- rocblas_fill_lower: The lower triangular part of A is supplied in A.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in, out]\nA         device pointer storing the specified triangular portion of the Hermitian matrix A. Of size (lda * n).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of the Hermitian matrix A is supplied.\nThe lower triangluar portion will not be touched.\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of the Hermitian matrix A is supplied.\nThe upper triangular portion will not be touched.\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A. Must be at least max(1, n)."]
     pub fn rocblas_cher(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -3443,7 +5701,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nher_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*x_i**H\nwhere alpha is a real scalar, x_i is a vector, and A_i is an\nn by n symmetric matrix, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in A.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in A.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[inout]\nA         device array of device pointers storing the specified triangular portion of\neach Hermitian matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batch_count.\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe lower triangular portion of each A_i will not be touched.\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe upper triangular portion of each A_i will not be touched.\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. Must be at least max(1, n).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nher_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*x_i**H\nwhere alpha is a real scalar, x_i is a vector, and A_i is an\nn by n symmetric matrix, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in A.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in A.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in, out]\nA         device array of device pointers storing the specified triangular portion of\neach Hermitian matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batch_count.\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe lower triangular portion of each A_i will not be touched.\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe upper triangular portion of each A_i will not be touched.\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. Must be at least max(1, n).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
     pub fn rocblas_cher_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -3472,7 +5730,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nher_strided_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*x_i**H\nwhere alpha is a real scalar, x_i is a vector, and A_i is an\nn by n Hermitian matrix, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in A.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in A.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer pointing to the first vector (x_1).\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[inout]\nA         device array of device pointers storing the specified triangular portion of\neach Hermitian matrix A_i. Points to the first matrix (A_1).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe lower triangular portion of each A_i will not be touched.\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe upper triangular portion of each A_i will not be touched.\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nstride_A    [rocblas_stride]\nstride from the start of one (A_i) and the next (A_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nher_strided_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*x_i**H\nwhere alpha is a real scalar, x_i is a vector, and A_i is an\nn by n Hermitian matrix, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in A.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in A.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer pointing to the first vector (x_1).\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[in, out]\nA         device array of device pointers storing the specified triangular portion of\neach Hermitian matrix A_i. Points to the first matrix (A_1).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe lower triangular portion of each A_i will not be touched.\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe upper triangular portion of each A_i will not be touched.\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nstride_A    [rocblas_stride]\nstride from the start of one (A_i) and the next (A_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
     pub fn rocblas_cher_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -3505,7 +5763,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nher2 performs the matrix-vector operations:\n\nA := A + alpha*x*y**H + conj(alpha)*y*x**H\nwhere alpha is a complex scalar, x and y are vectors, and A is an\nn by n Hermitian matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of A is supplied.\n- rocblas_fill_lower: The lower triangular part of A is supplied.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[inout]\nA         device pointer storing the specified triangular portion of\nthe Hermitian matrix A. Of size (lda, n).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of the Hermitian matrix A is supplied.\nThe lower triangular portion of A will not be touched.\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of the Hermitian matrix A is supplied.\nThe upper triangular portion of A will not be touched.\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A. Must be at least max(lda, 1)."]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nher2 performs the matrix-vector operations:\n\nA := A + alpha*x*y**H + conj(alpha)*y*x**H\nwhere alpha is a complex scalar, x and y are vectors, and A is an\nn by n Hermitian matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of A is supplied.\n- rocblas_fill_lower: The lower triangular part of A is supplied.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in, out]\nA         device pointer storing the specified triangular portion of\nthe Hermitian matrix A. Of size (lda, n).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of the Hermitian matrix A is supplied.\nThe lower triangular portion of A will not be touched.\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of the Hermitian matrix A is supplied.\nThe upper triangular portion of A will not be touched.\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A. Must be at least max(lda, 1)."]
     pub fn rocblas_cher2(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -3536,7 +5794,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nher2_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H\nwhere alpha is a complex scalar, x_i and y_i are vectors, and A_i is an\nn by n Hermitian matrix for each batch in i = [1, batch_count].\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[inout]\nA         device array of device pointers storing the specified triangular portion of\neach Hermitian matrix A_i of size (lda, n).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe lower triangular portion of each A_i will not be touched.\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe upper triangular portion of each A_i will not be touched.\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. Must be at least max(lda, 1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nher2_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H\nwhere alpha is a complex scalar, x_i and y_i are vectors, and A_i is an\nn by n Hermitian matrix for each batch in i = [1, batch_count].\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in, out]\nA         device array of device pointers storing the specified triangular portion of\neach Hermitian matrix A_i of size (lda, n).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe lower triangular portion of each A_i will not be touched.\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe upper triangular portion of each A_i will not be touched.\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. Must be at least max(lda, 1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
     pub fn rocblas_cher2_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -3569,7 +5827,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nher2_strided_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H\nwhere alpha is a complex scalar, x_i and y_i are vectors, and A_i is an\nn by n Hermitian matrix for each batch in i = [1, batch_count].\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer pointing to the first vector x_1.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nspecifies the stride between the beginning of one vector (x_i) and the next (x_i+1).\n@param[in]\ny         device pointer pointing to the first vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nstride_y  [rocblas_stride]\nspecifies the stride between the beginning of one vector (y_i) and the next (y_i+1).\n@param[inout]\nA         device pointer pointing to the first matrix (A_1). Stores the specified triangular portion of\neach Hermitian matrix A_i.\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe lower triangular portion of each A_i will not be touched.\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe upper triangular portion of each A_i will not be touched.\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. Must be at least max(lda, 1).\n@param[in]\nstride_A  [rocblas_stride]\nspecifies the stride between the beginning of one matrix (A_i) and the next (A_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nher2_strided_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H\nwhere alpha is a complex scalar, x_i and y_i are vectors, and A_i is an\nn by n Hermitian matrix for each batch in i = [1, batch_count].\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer pointing to the first vector x_1.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nspecifies the stride between the beginning of one vector (x_i) and the next (x_i+1).\n@param[in]\ny         device pointer pointing to the first vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nstride_y  [rocblas_stride]\nspecifies the stride between the beginning of one vector (y_i) and the next (y_i+1).\n@param[in, out]\nA         device pointer pointing to the first matrix (A_1). Stores the specified triangular portion of\neach Hermitian matrix A_i.\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe lower triangular portion of each A_i will not be touched.\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe upper triangular portion of each A_i will not be touched.\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. Must be at least max(lda, 1).\n@param[in]\nstride_A  [rocblas_stride]\nspecifies the stride between the beginning of one matrix (A_i) and the next (A_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
     pub fn rocblas_cher2_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -3608,7 +5866,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhpmv performs the matrix-vector operation:\n\ny := alpha*A*x + beta*y\nwhere alpha and beta are scalars, x and y are n element vectors and A is an\nn by n Hermitian matrix, supplied in packed form (see description below).\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: the upper triangular part of the Hermitian matrix A is supplied in AP.\n- rocblas_fill_lower: the lower triangular part of the Hermitian matrix A is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe order of the matrix A. Must be >= 0.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nAP        device pointer storing the packed version of the specified triangular portion of\nthe Hermitian matrix A. Of at least size ((n * (n + 1)) / 2).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of the Hermitian matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 3)\n(1, 0) (2, 1) (3, 2)\n(2,-1) (4, 0) (5,-1) ---> [(1,0),(2,1),(4,0),(3,2),(5,-1),(6,0)]\n(3,-2) (5, 1) (6, 0)\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of the Hermitian matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 3)\n(1, 0) (2, 1) (3, 2)\n(2,-1) (4, 0) (5,-1) ---> [(1,0),(2,-1),(3,-2),(4,0),(5,1),(6,0)]\n(3,-2) (5, 1) (6, 0)\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[inout]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhpmv performs the matrix-vector operation:\n\ny := alpha*A*x + beta*y\nwhere alpha and beta are scalars, x and y are n element vectors and A is an\nn by n Hermitian matrix, supplied in packed form (see description below).\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: the upper triangular part of the Hermitian matrix A is supplied in AP.\n- rocblas_fill_lower: the lower triangular part of the Hermitian matrix A is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe order of the matrix A. Must be >= 0.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nAP        device pointer storing the packed version of the specified triangular portion of\nthe Hermitian matrix A. Of at least size ((n * (n + 1)) / 2).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of the Hermitian matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 3)\n(1, 0) (2, 1) (3, 2)\n(2,-1) (4, 0) (5,-1) ---> [(1,0),(2,1),(4,0),(3,2),(5,-1),(6,0)]\n(3,-2) (5, 1) (6, 0)\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of the Hermitian matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 3)\n(1, 0) (2, 1) (3, 2)\n(2,-1) (4, 0) (5,-1) ---> [(1,0),(2,-1),(3,-2),(4,0),(5,1),(6,0)]\n(3,-2) (5, 1) (6, 0)\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[in, out]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
     pub fn rocblas_chpmv(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -3639,7 +5897,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhpmv_batched performs the matrix-vector operation:\n\ny_i := alpha*A_i*x_i + beta*y_i\nwhere alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an\nn by n Hermitian matrix, supplied in packed form (see description below),\nfor each batch in i = [1, batch_count].\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: the upper triangular part of each Hermitian matrix A_i is supplied in AP.\n- rocblas_fill_lower: the lower triangular part of each Hermitian matrix A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe order of each matrix A_i.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nAP      device pointer of device pointers storing the packed version of the specified triangular\nportion of each Hermitian matrix A_i. Each A_i is of at least size ((n * (n + 1)) / 2).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that each AP_i contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 3)\n(1, 0) (2, 1) (3, 2)\n(2,-1) (4, 0) (5,-1) ---> [(1,0),(2,1),(4,0),(3,2),(5,-1),(6,0)]\n(3,-2) (5, 1) (6, 0)\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that each AP_i contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 3)\n(1, 0) (2, 1) (3, 2)\n(2,-1) (4, 0) (5,-1) ---> [(1,0),(2,-1),(3,-2),(4,0),(5,1),(6,0)]\n(3,-2) (5, 1) (6, 0)\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[inout]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhpmv_batched performs the matrix-vector operation:\n\ny_i := alpha*A_i*x_i + beta*y_i\nwhere alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an\nn by n Hermitian matrix, supplied in packed form (see description below),\nfor each batch in i = [1, batch_count].\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: the upper triangular part of each Hermitian matrix A_i is supplied in AP.\n- rocblas_fill_lower: the lower triangular part of each Hermitian matrix A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe order of each matrix A_i.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nAP      device pointer of device pointers storing the packed version of the specified triangular\nportion of each Hermitian matrix A_i. Each A_i is of at least size ((n * (n + 1)) / 2).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that each AP_i contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 3)\n(1, 0) (2, 1) (3, 2)\n(2,-1) (4, 0) (5,-1) ---> [(1,0),(2,1),(4,0),(3,2),(5,-1),(6,0)]\n(3,-2) (5, 1) (6, 0)\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that each AP_i contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 3)\n(1, 0) (2, 1) (3, 2)\n(2,-1) (4, 0) (5,-1) ---> [(1,0),(2,-1),(3,-2),(4,0),(5,1),(6,0)]\n(3,-2) (5, 1) (6, 0)\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[in, out]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_chpmv_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -3672,7 +5930,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhpmv_strided_batched performs the matrix-vector operation:\n\ny_i := alpha*A_i*x_i + beta*y_i\nwhere alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an\nn by n Hermitian matrix, supplied in packed form (see description below),\nfor each batch in i = [1, batch_count].\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: the upper triangular part of each Hermitian matrix A_i is supplied in AP.\n- rocblas_fill_lower: the lower triangular part of each Hermitian matrix A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe order of each matrix A_i.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nAP        device pointer pointing to the beginning of the first matrix (AP_1). Stores the packed\nversion of the specified triangular portion of each Hermitian matrix AP_i of size ((n * (n + 1)) / 2).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that each AP_i contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 3)\n(1, 0) (2, 1) (3, 2)\n(2,-1) (4, 0) (5,-1) ---> [(1,0),(2,1),(4,0),(3,2),(5,-1),(6,0)]\n(3,-2) (5, 1) (6, 0)\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that each AP_i contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 3)\n(1, 0) (2, 1) (3, 2)\n(2,-1) (4, 0) (5,-1) ---> [(1,0),(2,-1),(3,-2),(4,0),(5,1),(6,0)]\n(3,-2) (5, 1) (6, 0)\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (AP_i) and the next one (AP_i+1).\n@param[in]\nx         device array pointing to the beginning of the first vector (x_1).\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[inout]\ny         device array pointing to the beginning of the first vector (y_1).\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\nstride_y  [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhpmv_strided_batched performs the matrix-vector operation:\n\ny_i := alpha*A_i*x_i + beta*y_i\nwhere alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an\nn by n Hermitian matrix, supplied in packed form (see description below),\nfor each batch in i = [1, batch_count].\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: the upper triangular part of each Hermitian matrix A_i is supplied in AP.\n- rocblas_fill_lower: the lower triangular part of each Hermitian matrix A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe order of each matrix A_i.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nAP        device pointer pointing to the beginning of the first matrix (AP_1). Stores the packed\nversion of the specified triangular portion of each Hermitian matrix AP_i of size ((n * (n + 1)) / 2).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that each AP_i contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 3)\n(1, 0) (2, 1) (3, 2)\n(2,-1) (4, 0) (5,-1) ---> [(1,0),(2,1),(4,0),(3,2),(5,-1),(6,0)]\n(3,-2) (5, 1) (6, 0)\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that each AP_i contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 3)\n(1, 0) (2, 1) (3, 2)\n(2,-1) (4, 0) (5,-1) ---> [(1,0),(2,-1),(3,-2),(4,0),(5,1),(6,0)]\n(3,-2) (5, 1) (6, 0)\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (AP_i) and the next one (AP_i+1).\n@param[in]\nx         device array pointing to the beginning of the first vector (x_1).\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[in, out]\ny         device array pointing to the beginning of the first vector (y_1).\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\nstride_y  [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_chpmv_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -3711,7 +5969,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhpr performs the matrix-vector operations:\n\nA := A + alpha*x*x**H\nwhere alpha is a real scalar, x is a vector, and A is an\nn by n Hermitian matrix, supplied in packed form.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of A is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of A is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[inout]\nAP        device pointer storing the packed version of the specified triangular portion of\nthe Hermitian matrix A. Of at least size ((n * (n + 1)) / 2).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of the Hermitian matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,1),(3,0),(4,9),(5,3),(6,0)]\n(4,-9) (5,-3) (6,0)\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of the Hermitian matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,-1),(4,-9),(3,0),(5,-3),(6,0)]\n(4,-9) (5,-3) (6,0)\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0."]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhpr performs the matrix-vector operations:\n\nA := A + alpha*x*x**H\nwhere alpha is a real scalar, x is a vector, and A is an\nn by n Hermitian matrix, supplied in packed form.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of A is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of A is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in, out]\nAP        device pointer storing the packed version of the specified triangular portion of\nthe Hermitian matrix A. Of at least size ((n * (n + 1)) / 2).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of the Hermitian matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,1),(3,0),(4,9),(5,3),(6,0)]\n(4,-9) (5,-3) (6,0)\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of the Hermitian matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,-1),(4,-9),(3,0),(5,-3),(6,0)]\n(4,-9) (5,-3) (6,0)\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0."]
     pub fn rocblas_chpr(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -3736,7 +5994,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhpr_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*x_i**H\nwhere alpha is a real scalar, x_i is a vector, and A_i is an\nn by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[inout]\nAP        device array of device pointers storing the packed version of the specified triangular portion of\neach Hermitian matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batch_count.\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,1),(3,0),(4,9),(5,3),(6,0)]\n(4,-9) (5,-3) (6,0)\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,-1),(4,-9),(3,0),(5,-3),(6,0)]\n(4,-9) (5,-3) (6,0)\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhpr_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*x_i**H\nwhere alpha is a real scalar, x_i is a vector, and A_i is an\nn by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in, out]\nAP        device array of device pointers storing the packed version of the specified triangular portion of\neach Hermitian matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batch_count.\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,1),(3,0),(4,9),(5,3),(6,0)]\n(4,-9) (5,-3) (6,0)\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,-1),(4,-9),(3,0),(5,-3),(6,0)]\n(4,-9) (5,-3) (6,0)\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
     pub fn rocblas_chpr_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -3763,7 +6021,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhpr_strided_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*x_i**H\nwhere alpha is a real scalar, x_i is a vector, and A_i is an\nn by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer pointing to the first vector (x_1).\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[inout]\nAP        device array of device pointers storing the packed version of the specified triangular portion of\neach Hermitian matrix A_i. Points to the first matrix (A_1).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,1),(3,0),(4,9),(5,3),(6,0)]\n(4,-9) (5,-3) (6,0)\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,-1),(4,-9),(3,0),(5,-3),(6,0)]\n(4,-9) (5,-3) (6,0)\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nstride_A    [rocblas_stride]\nstride from the start of one (A_i) and the next (A_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhpr_strided_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*x_i**H\nwhere alpha is a real scalar, x_i is a vector, and A_i is an\nn by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer pointing to the first vector (x_1).\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[in, out]\nAP        device array of device pointers storing the packed version of the specified triangular portion of\neach Hermitian matrix A_i. Points to the first matrix (A_1).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,1),(3,0),(4,9),(5,3),(6,0)]\n(4,-9) (5,-3) (6,0)\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,-1),(4,-9),(3,0),(5,-3),(6,0)]\n(4,-9) (5,-3) (6,0)\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nstride_A    [rocblas_stride]\nstride from the start of one (A_i) and the next (A_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
     pub fn rocblas_chpr_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -3794,7 +6052,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhpr2 performs the matrix-vector operations:\n\nA := A + alpha*x*y**H + conj(alpha)*y*x**H\nwhere alpha is a complex scalar, x and y are vectors, and A is an\nn by n Hermitian matrix, supplied in packed form.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of A is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of A is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[inout]\nAP        device pointer storing the packed version of the specified triangular portion of\nthe Hermitian matrix A. Of at least size ((n * (n + 1)) / 2).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of the Hermitian matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,1),(3,0),(4,9),(5,3),(6,0)]\n(4,-9) (5,-3) (6,0)\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of the Hermitian matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,-1),(4,-9),(3,0),(5,-3),(6,0)]\n(4,-9) (5,-3) (6,0)\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0."]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhpr2 performs the matrix-vector operations:\n\nA := A + alpha*x*y**H + conj(alpha)*y*x**H\nwhere alpha is a complex scalar, x and y are vectors, and A is an\nn by n Hermitian matrix, supplied in packed form.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of A is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of A is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in, out]\nAP        device pointer storing the packed version of the specified triangular portion of\nthe Hermitian matrix A. Of at least size ((n * (n + 1)) / 2).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of the Hermitian matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,1),(3,0),(4,9),(5,3),(6,0)]\n(4,-9) (5,-3) (6,0)\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of the Hermitian matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,-1),(4,-9),(3,0),(5,-3),(6,0)]\n(4,-9) (5,-3) (6,0)\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0."]
     pub fn rocblas_chpr2(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -3823,7 +6081,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhpr2_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H\nwhere alpha is a complex scalar, x_i and y_i are vectors, and A_i is an\nn by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[inout]\nAP        device array of device pointers storing the packed version of the specified triangular portion of\neach Hermitian matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batch_count.\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,1),(3,0),(4,9),(5,3),(6,0)]\n(4,-9) (5,-3) (6,0)\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) --> [(1,0),(2,-1),(4,-9),(3,0),(5,-3),(6,0)]\n(4,-9) (5,-3) (6,0)\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhpr2_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H\nwhere alpha is a complex scalar, x_i and y_i are vectors, and A_i is an\nn by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in, out]\nAP        device array of device pointers storing the packed version of the specified triangular portion of\neach Hermitian matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batch_count.\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,1),(3,0),(4,9),(5,3),(6,0)]\n(4,-9) (5,-3) (6,0)\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) --> [(1,0),(2,-1),(4,-9),(3,0),(5,-3),(6,0)]\n(4,-9) (5,-3) (6,0)\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
     pub fn rocblas_chpr2_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -3854,7 +6112,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhpr2_strided_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H\nwhere alpha is a complex scalar, x_i and y_i are vectors, and A_i is an\nn by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer pointing to the first vector (x_1).\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[in]\ny         device pointer pointing to the first vector (y_1).\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nstride_y  [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\n@param[inout]\nAP        device array of device pointers storing the packed version of the specified triangular portion of\neach Hermitian matrix A_i. Points to the first matrix (A_1).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,1),(3,0),(4,9),(5,3),(6,0)]\n(4,-9) (5,-3) (6,0)\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,-1),(4,-9),(3,0),(5,-3),(6,0)]\n(4,-9) (5,-3) (6,0)\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nstride_A    [rocblas_stride]\nstride from the start of one (A_i) and the next (A_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhpr2_strided_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H\nwhere alpha is a complex scalar, x_i and y_i are vectors, and A_i is an\nn by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer pointing to the first vector (x_1).\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[in]\ny         device pointer pointing to the first vector (y_1).\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nstride_y  [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\n@param[in, out]\nAP        device array of device pointers storing the packed version of the specified triangular portion of\neach Hermitian matrix A_i. Points to the first matrix (A_1).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,1),(3,0),(4,9),(5,3),(6,0)]\n(4,-9) (5,-3) (6,0)\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,-1),(4,-9),(3,0),(5,-3),(6,0)]\n(4,-9) (5,-3) (6,0)\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nstride_A    [rocblas_stride]\nstride from the start of one (A_i) and the next (A_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
     pub fn rocblas_chpr2_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -3891,13 +6149,13 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntrmv performs one of the matrix-vector operations:\n\nx = A*x or\nx = A**T*x,\nwhere x is an n element vector and A is an n by n unit, or non-unit, upper or lower triangular matrix.\nThe vector x is overwritten.\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.\n\n@param[in]\nm         [rocblas_int]\nm specifies the number of rows of A. m >= 0.\n\n@param[in]\nA         device pointer storing matrix A,\nof dimension ( lda, m ).\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\nlda = max( 1, m ).\n\n@param[in]\nx         device pointer storing vector x.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntrmv performs one of the matrix-vector operations:\n\nx = A*x or\nx = A**T*x or\nx = A**H*x\nwhere x is an n element vector and A is an n by n unit, or non-unit, upper or lower triangular matrix.\nThe vector x is overwritten.\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n- rocblas_operation_none:    op(A) = A.\n- rocblas_operation_transpose:   op(A) = A^T\n- rocblas_operation_conjugate_transpose:  op(A) = A^H\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.\n\n@param[in]\nn         [rocblas_int]\nn specifies the number of rows of A. n >= 0.\n\n@param[in]\nA         device pointer storing matrix A, of dimension ( lda, n ). If uplo == rocblas_fill_upper, the upper triangular part of the leading n-by-n array contains the matrix A, otherwise the lower triangular part of the leading n-by-n array contains the matrix A.\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A. lda must be at least max( 1, n ).\n\n@param[in, out]\nx         device pointer storing vector x. On exit, x is overwritten with the transformed vector x.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n"]
     pub fn rocblas_strmv(
         handle: rocblas_handle,
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const f32,
         lda: rocblas_int,
         x: *mut f32,
@@ -3911,7 +6169,7 @@ extern "C" {
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const f64,
         lda: rocblas_int,
         x: *mut f64,
@@ -3925,7 +6183,7 @@ extern "C" {
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const rocblas_float_complex,
         lda: rocblas_int,
         x: *mut rocblas_float_complex,
@@ -3939,7 +6197,7 @@ extern "C" {
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const rocblas_double_complex,
         lda: rocblas_int,
         x: *mut rocblas_double_complex,
@@ -3948,13 +6206,13 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntrmv_batched performs one of the matrix-vector operations:\n\nx_i = A_i*x_i or\nx_i = A**T*x_i, 0 < i < batch_count\nwhere x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix)\nThe vectors x_i are overwritten.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A_i is an upper triangular matrix.\n- rocblas_fill_lower:  A_i is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A_i is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.\n\n@param[in]\nm         [rocblas_int]\nm specifies the number of rows of matrices A_i. m >= 0.\n\n@param[in]\nA         device pointer storing pointer of matrices A_i,\nof dimension ( lda, m )\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A_i.\nlda >= max( 1, m ).\n\n@param[in]\nx         device pointer storing vectors x_i.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of vectors x_i.\n\n@param[in]\nbatch_count [rocblas_int]\nThe number of batched matrices/vectors.\n\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntrmv_batched performs one of the matrix-vector operations:\n\nx_i = A_i*x_i or\nx_i = A_i**T*x_i or\nx_i = A_i**H*x_i, 0 < i < batch_count\nwhere x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix)\nThe vectors x_i are overwritten.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A_i is an upper triangular matrix.\n- rocblas_fill_lower:  A_i is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n- rocblas_operation_none:    op(A) = A.\n- rocblas_operation_transpose:   op(A) = A^T\n- rocblas_operation_conjugate_transpose:  op(A) = A^H\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A_i is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.\n\n@param[in]\nn         [rocblas_int]\nn specifies the number of rows of matrices A_i. n >= 0.\n\n@param[in]\nA         device pointer to an array of device pointers to the A_i matrices, of dimension ( lda, n ). If uplo == rocblas_fill_upper, the upper triangular part of the leading n-by-n array contains the matrix A_i, otherwise the lower triangular part of the leading n-by-n array contains the matrix A_i.\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A_i. lda must be at least max( 1, n ).\n\n@param[in, out]\nx         device pointer to an array of device pointers to the x_i vectors. On exit, each x_i is overwritten with the transformed vector x_i.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of vectors x_i.\n\n@param[in]\nbatch_count [rocblas_int]\nThe number of batched matrices/vectors.\n\n"]
     pub fn rocblas_strmv_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const *const f32,
         lda: rocblas_int,
         x: *const *mut f32,
@@ -3969,7 +6227,7 @@ extern "C" {
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const *const f64,
         lda: rocblas_int,
         x: *const *mut f64,
@@ -3984,7 +6242,7 @@ extern "C" {
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const *const rocblas_float_complex,
         lda: rocblas_int,
         x: *const *mut rocblas_float_complex,
@@ -3999,7 +6257,7 @@ extern "C" {
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const *const rocblas_double_complex,
         lda: rocblas_int,
         x: *const *mut rocblas_double_complex,
@@ -4009,13 +6267,13 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntrmv_strided_batched performs one of the matrix-vector operations:\n\nx_i = A_i*x_i or\nx_i = A**T*x_i, 0 < i < batch_count\nwhere x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix)\nwith strides specifying how to retrieve $x_i$ (resp. $A_i$) from $x_{i-1}$ (resp. $A_i$).\n\nThe vectors x_i are overwritten.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A_i is an upper triangular matrix.\n- rocblas_fill_lower:  A_i is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A_i is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.\n\n@param[in]\nm         [rocblas_int]\nm specifies the number of rows of matrices A_i. m >= 0.\n\n@param[in]\nA         device pointer of the matrix A_0,\nof dimension ( lda, m ).\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A_i.\nlda >= max( 1, m ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one A_i matrix to the next A_{i + 1}.\n\n@param[in]\nx         device pointer storing the vector x_0.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of one vector x.\n\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one x_i vector to the next x_{i + 1}.\n\n@param[in]\nbatch_count [rocblas_int]\nThe number of batched matrices/vectors.\n\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntrmv_strided_batched performs one of the matrix-vector operations:\n\nx_i = A_i*x_i or\nx_i = A_i**T*x_i, or\nx_i = A_i**H*x_i, 0 < i < batch_count\nwhere x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix)\nwith strides specifying how to retrieve $x_i$ (resp. $A_i$) from $x_{i-1}$ (resp. $A_i$).\n\nThe vectors x_i are overwritten.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A_i is an upper triangular matrix.\n- rocblas_fill_lower:  A_i is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n- rocblas_operation_none:    op(A) = A.\n- rocblas_operation_transpose:   op(A) = A^T\n- rocblas_operation_conjugate_transpose:  op(A) = A^H\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A_i is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.\n\n@param[in]\nn         [rocblas_int]\nn specifies the number of rows of matrices A_i. n >= 0.\n\n@param[in]\nA         device pointer to the matrix A_1 of the batch, of dimension ( lda, n ). If uplo == rocblas_fill_upper, the upper triangular part of the leading n-by-n array contains the matrix A_i, otherwise the lower triangular part of the leading n-by-n array contains the matrix A_i.\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A_i. lda must be at least max( 1, n ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one A_i matrix to the next A_{i + 1}.\n\n@param[in, out]\nx         device pointer to the vector x_1 of the batch. On exit, each x_i is overwritten with the transformed vector x_i.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of one vector x.\n\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one x_i vector to the next x_{i + 1}.\n\n@param[in]\nbatch_count [rocblas_int]\nThe number of batched matrices/vectors.\n\n"]
     pub fn rocblas_strmv_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const f32,
         lda: rocblas_int,
         stride_A: rocblas_stride,
@@ -4032,7 +6290,7 @@ extern "C" {
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const f64,
         lda: rocblas_int,
         stride_A: rocblas_stride,
@@ -4049,7 +6307,7 @@ extern "C" {
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const rocblas_float_complex,
         lda: rocblas_int,
         stride_A: rocblas_stride,
@@ -4066,7 +6324,7 @@ extern "C" {
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const rocblas_double_complex,
         lda: rocblas_int,
         stride_A: rocblas_stride,
@@ -4078,13 +6336,13 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntpmv performs one of the matrix-vector operations:\n\nx = A*x or\nx = A**T*x,\nwhere x is an n element vector and A is an n by n unit, or non-unit,\nupper or lower triangular matrix, supplied in the pack form.\nThe vector x is overwritten.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of A. m >= 0.\n\n@param[in]\nA       device pointer storing matrix A,\nof dimension at leat ( m * ( m + 1 ) / 2 ).\n- Before entry with uplo = rocblas_fill_upper, the array A\nmust contain the upper triangular matrix packed sequentially,\ncolumn by column, so that\nA[0] contains a_{0,0}, A[1] and A[2] contain\na_{0,1} and a_{1, 1}, respectively, and so on.\n\n- Before entry with uplo = rocblas_fill_lower, the array A\nmust contain the lower triangular matrix packed sequentially,\ncolumn by column, so that\nA[0] contains a_{0,0}, A[1] and A[2] contain\na_{1,0} and a_{2,0}, respectively, and so on.\n\nNote that when DIAG = rocblas_diagonal_unit, the diagonal elements of A are\nnot referenced, but are assumed to be unity.\n\n@param[in]\nx       device pointer storing vector x.\n\n@param[in]\nincx    [rocblas_int]\nspecifies the increment for the elements of x. incx must not be zero.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntpmv performs one of the matrix-vector operations:\n\nx = A*x or\nx = A**T*x or\nx = A**H*x\nwhere x is an n element vector and A is an n by n unit, or non-unit,\nupper or lower triangular matrix, supplied in the pack form.\nThe vector x is overwritten.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n- rocblas_operation_none:    op(A) = A.\n- rocblas_operation_transpose:   op(A) = A^T\n- rocblas_operation_conjugate_transpose:  op(A) = A^H\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows of A. n >= 0.\n\n@param[in]\nA       device pointer storing matrix A,\nof dimension at leat ( n * ( n + 1 ) / 2 ).\n- Before entry with uplo = rocblas_fill_upper, the array A\nmust contain the upper triangular matrix packed sequentially,\ncolumn by column, so that\nA[0] contains a_{0,0}, A[1] and A[2] contain\na_{0,1} and a_{1, 1}, respectively, and so on.\n\n- Before entry with uplo = rocblas_fill_lower, the array A\nmust contain the lower triangular matrix packed sequentially,\ncolumn by column, so that\nA[0] contains a_{0,0}, A[1] and A[2] contain\na_{1,0} and a_{2,0}, respectively, and so on.\n\nNote that when DIAG = rocblas_diagonal_unit, the diagonal elements of A are\nnot referenced, but are assumed to be unity.\n\n@param[in, out]\nx      device pointer storing vector x. On exit, x is overwritten with the transformed vector x.\n\n@param[in]\nincx    [rocblas_int]\nspecifies the increment for the elements of x. incx must not be zero.\n"]
     pub fn rocblas_stpmv(
         handle: rocblas_handle,
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const f32,
         x: *mut f32,
         incx: rocblas_int,
@@ -4097,7 +6355,7 @@ extern "C" {
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const f64,
         x: *mut f64,
         incx: rocblas_int,
@@ -4110,7 +6368,7 @@ extern "C" {
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const rocblas_float_complex,
         x: *mut rocblas_float_complex,
         incx: rocblas_int,
@@ -4123,7 +6381,7 @@ extern "C" {
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const rocblas_double_complex,
         x: *mut rocblas_double_complex,
         incx: rocblas_int,
@@ -4131,13 +6389,13 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntpmv_batched performs one of the matrix-vector operations:\n\nx_i = A_i*x_i or\nx_i = A**T*x_i, 0 < i < batch_count\nwhere x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix)\nThe vectors x_i are overwritten.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A_i is an upper triangular matrix.\n- rocblas_fill_lower:  A_i is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A_i is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.\n\n@param[in]\nm         [rocblas_int]\nm specifies the number of rows of matrices A_i. m >= 0.\n\n@param[in]\nA         device pointer storing pointer of matrices A_i,\nof dimension ( lda, m ).\n\n@param[in]\nx         device pointer storing vectors x_i.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of vectors x_i.\n\n@param[in]\nbatch_count [rocblas_int]\nThe number of batched matrices/vectors.\n\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntpmv_batched performs one of the matrix-vector operations:\n\nx_i = A_i*x_i or\nx_i = A_i**T*x_i or\nx_i = A_i**H*x_i, 0 < i < batch_count\nwhere x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix)\nThe vectors x_i are overwritten.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A_i is an upper triangular matrix.\n- rocblas_fill_lower:  A_i is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n- rocblas_operation_none:    op(A) = A.\n- rocblas_operation_transpose:   op(A) = A^T\n- rocblas_operation_conjugate_transpose:  op(A) = A^H\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A_i is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.\n\n@param[in]\nn         [rocblas_int]\nn specifies the number of rows of matrices A_i. n >= 0.\n\n@param[in]\nA         device pointer to an array of device pointers to the A_i matrices, of dimension ( lda, n ). If uplo == rocblas_fill_upper, the upper triangular part of the leading n-by-n array contains the matrix A_i, otherwise the lower triangular part of the leading n-by-n array contains the matrix A_i.\n\n@param[in, out]\nx         device pointer to an array of device pointers to the x_i vectors. On exit, each x_i is overwritten with the transformed vector x_i.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of vectors x_i.\n\n@param[in]\nbatch_count [rocblas_int]\nThe number of batched matrices/vectors.\n\n"]
     pub fn rocblas_stpmv_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const *const f32,
         x: *const *mut f32,
         incx: rocblas_int,
@@ -4151,7 +6409,7 @@ extern "C" {
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const *const f64,
         x: *const *mut f64,
         incx: rocblas_int,
@@ -4165,7 +6423,7 @@ extern "C" {
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const *const rocblas_float_complex,
         x: *const *mut rocblas_float_complex,
         incx: rocblas_int,
@@ -4179,7 +6437,7 @@ extern "C" {
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const *const rocblas_double_complex,
         x: *const *mut rocblas_double_complex,
         incx: rocblas_int,
@@ -4188,13 +6446,13 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntpmv_strided_batched performs one of the matrix-vector operations:\n\nx_i = A_i*x_i or\nx_i = A**T*x_i, 0 < i < batch_count\nwhere x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix)\nwith strides specifying how to retrieve $x_i$ (resp. $A_i$) from $x_{i-1}$ (resp. $A_i$).\nThe vectors x_i are overwritten.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A_i is an upper triangular matrix.\n- rocblas_fill_lower:  A_i is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A_i is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.\n\n@param[in]\nm         [rocblas_int]\nm specifies the number of rows of matrices A_i. m >= 0.\n\n@param[in]\nA         device pointer of the matrix A_0,\nof dimension ( lda, m )\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one A_i matrix to the next A_{i + 1}.\n\n@param[in]\nx         device pointer storing the vector x_0.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of one vector x.\n\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one x_i vector to the next x_{i + 1}.\n\n@param[in]\nbatch_count [rocblas_int]\nThe number of batched matrices/vectors.\n\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntpmv_strided_batched performs one of the matrix-vector operations:\n\nx_i = A_i*x_i or\nx_i = A_i**T*x_i or\nx_i = A_i**H*x_i, 0 < i < batch_count\nwhere x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix)\nwith strides specifying how to retrieve $x_i$ (resp. $A_i$) from $x_{i-1}$ (resp. $A_i$).\nThe vectors x_i are overwritten.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A_i is an upper triangular matrix.\n- rocblas_fill_lower:  A_i is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n- rocblas_operation_none:    op(A) = A.\n- rocblas_operation_transpose:   op(A) = A^T\n- rocblas_operation_conjugate_transpose:  op(A) = A^H\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A_i is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.\n\n@param[in]\nn         [rocblas_int]\nn specifies the number of rows of matrices A_i. n >= 0.\n\n@param[in]\nA       device pointer to the matrix A_1 of the batch, of dimension ( lda, n ). If uplo == rocblas_fill_upper, the upper triangular part of the leading n-by-n array contains the matrix A_i, otherwise the lower triangular part of the leading n-by-n array contains the matrix A_i.\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one A_i matrix to the next A_{i + 1}.\n\n@param[in, out]\nx       device pointer to the vector x_1 of the batch. On exit, each x_i is overwritten with the transformed vector x_i.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of one vector x.\n\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one x_i vector to the next x_{i + 1}.\n\n@param[in]\nbatch_count [rocblas_int]\nThe number of batched matrices/vectors.\n\n"]
     pub fn rocblas_stpmv_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const f32,
         stride_A: rocblas_stride,
         x: *mut f32,
@@ -4210,7 +6468,7 @@ extern "C" {
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const f64,
         stride_A: rocblas_stride,
         x: *mut f64,
@@ -4226,7 +6484,7 @@ extern "C" {
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const rocblas_float_complex,
         stride_A: rocblas_stride,
         x: *mut rocblas_float_complex,
@@ -4242,7 +6500,7 @@ extern "C" {
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const rocblas_double_complex,
         stride_A: rocblas_stride,
         x: *mut rocblas_double_complex,
@@ -4253,7 +6511,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntbmv performs one of the matrix-vector operations:\n\nx := A*x      or\nx := A**T*x   or\nx := A**H*x,\nx is a vectors and A is a banded m by m matrix (see description below).\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: A is an upper banded triangular matrix.\n- rocblas_fill_lower: A is a  lower banded triangular matrix.\n@param[in]\ntrans     [rocblas_operation]\nindicates whether matrix A is tranposed (conjugated) or not.\n@param[in]\ndiag      [rocblas_diagonal]\n- rocblas_diagonal_unit: The main diagonal of A is assumed to consist of only\n1's and is not referenced.\n- rocblas_diagonal_non_unit: No assumptions are made of A's main diagonal.\n@param[in]\nm         [rocblas_int]\nthe number of rows and columns of the matrix represented by A.\n@param[in]\nk         [rocblas_int]\n\nif uplo == rocblas_fill_upper, k specifies the number of super-diagonals\nof the matrix A.\n\nif uplo == rocblas_fill_lower, k specifies the number of sub-diagonals\nof the matrix A.\nk must satisfy k > 0 && k < lda.\n@param[in]\nA         device pointer storing banded triangular matrix A.\n\nif uplo == rocblas_fill_upper:\nThe matrix represented is an upper banded triangular matrix\nwith the main diagonal and k super-diagonals, everything\nelse can be assumed to be 0.\nThe matrix is compacted so that the main diagonal resides on the k'th\nrow, the first super diagonal resides on the RHS of the k-1'th row, etc,\nwith the k'th diagonal on the RHS of the 0'th row.\nEx: (rocblas_fill_upper; m = 5; k = 2)\n1 6 9 0 0              0 0 9 8 7\n0 2 7 8 0              0 6 7 8 9\n0 0 3 8 7     ---->    1 2 3 4 5\n0 0 0 4 9              0 0 0 0 0\n0 0 0 0 5              0 0 0 0 0\n\nif uplo == rocblas_fill_lower:\nThe matrix represnted is a lower banded triangular matrix\nwith the main diagonal and k sub-diagonals, everything else can be\nassumed to be 0.\nThe matrix is compacted so that the main diagonal resides on the 0'th row,\nworking up to the k'th diagonal residing on the LHS of the k'th row.\nEx: (rocblas_fill_lower; m = 5; k = 2)\n1 0 0 0 0              1 2 3 4 5\n6 2 0 0 0              6 7 8 9 0\n9 7 3 0 0     ---->    9 8 7 0 0\n0 8 8 4 0              0 0 0 0 0\n0 0 7 9 5              0 0 0 0 0\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A. lda must satisfy lda > k.\n@param[inout]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntbmv performs one of the matrix-vector operations:\n\nx := A*x      or\nx := A**T*x   or\nx := A**H*x,\nx is a vectors and A is a banded m by m matrix (see description below).\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: A is an upper banded triangular matrix.\n- rocblas_fill_lower: A is a  lower banded triangular matrix.\n@param[in]\ntrans     [rocblas_operation]\nindicates whether matrix A is tranposed (conjugated) or not.\n@param[in]\ndiag      [rocblas_diagonal]\n- rocblas_diagonal_unit: The main diagonal of A is assumed to consist of only\n1's and is not referenced.\n- rocblas_diagonal_non_unit: No assumptions are made of A's main diagonal.\n@param[in]\nm         [rocblas_int]\nthe number of rows and columns of the matrix represented by A.\n@param[in]\nk         [rocblas_int]\n\nif uplo == rocblas_fill_upper, k specifies the number of super-diagonals\nof the matrix A.\n\nif uplo == rocblas_fill_lower, k specifies the number of sub-diagonals\nof the matrix A.\nk must satisfy k > 0 && k < lda.\n@param[in]\nA         device pointer storing banded triangular matrix A.\n\nif uplo == rocblas_fill_upper:\nThe matrix represented is an upper banded triangular matrix\nwith the main diagonal and k super-diagonals, everything\nelse can be assumed to be 0.\nThe matrix is compacted so that the main diagonal resides on the k'th\nrow, the first super diagonal resides on the RHS of the k-1'th row, etc,\nwith the k'th diagonal on the RHS of the 0'th row.\nEx: (rocblas_fill_upper; m = 5; k = 2)\n1 6 9 0 0              0 0 9 8 7\n0 2 7 8 0              0 6 7 8 9\n0 0 3 8 7     ---->    1 2 3 4 5\n0 0 0 4 9              0 0 0 0 0\n0 0 0 0 5              0 0 0 0 0\n\nif uplo == rocblas_fill_lower:\nThe matrix represnted is a lower banded triangular matrix\nwith the main diagonal and k sub-diagonals, everything else can be\nassumed to be 0.\nThe matrix is compacted so that the main diagonal resides on the 0'th row,\nworking up to the k'th diagonal residing on the LHS of the k'th row.\nEx: (rocblas_fill_lower; m = 5; k = 2)\n1 0 0 0 0              1 2 3 4 5\n6 2 0 0 0              6 7 8 9 0\n9 7 3 0 0     ---->    9 8 7 0 0\n0 8 8 4 0              0 0 0 0 0\n0 0 7 9 5              0 0 0 0 0\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A. lda must satisfy lda > k.\n@param[in, out]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n"]
     pub fn rocblas_stbmv(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -4314,13 +6572,13 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntbmv_batched performs one of the matrix-vector operations:\n\nx_i := A_i*x_i      or\nx_i := A_i**T*x_i   or\nx_i := A_i**H*x_i,\nwhere (A_i, x_i) is the i-th instance of the batch.\nx_i is a vector and A_i is an m by m matrix, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: each A_i is an upper banded triangular matrix.\n- rocblas_fill_lower: each A_i is a  lower banded triangular matrix.\n@param[in]\ntrans     [rocblas_operation]\nindicates whether each matrix A_i is tranposed (conjugated) or not.\n@param[in]\ndiag      [rocblas_diagonal]\n- rocblas_diagonal_unit: The main diagonal of each A_i is assumed to consist of only\n1's and is not referenced.\n- rocblas_diagonal_non_unit: No assumptions are made of each A_i's main diagonal.\n@param[in]\nm         [rocblas_int]\nthe number of rows and columns of the matrix represented by each A_i.\n@param[in]\nk         [rocblas_int]\n\nif uplo == rocblas_fill_upper, k specifies the number of super-diagonals\nof each matrix A_i.\n\nif uplo == rocblas_fill_lower, k specifies the number of sub-diagonals\nof each matrix A_i.\nk must satisfy k > 0 && k < lda.\n@param[in]\nA         device array of device pointers storing each banded triangular matrix A_i.\n\nif uplo == rocblas_fill_upper:\nThe matrix represented is an upper banded triangular matrix\nwith the main diagonal and k super-diagonals, everything\nelse can be assumed to be 0.\nThe matrix is compacted so that the main diagonal resides on the k'th\nrow, the first super diagonal resides on the RHS of the k-1'th row, etc,\nwith the k'th diagonal on the RHS of the 0'th row.\nEx: (rocblas_fill_upper; m = 5; k = 2)\n1 6 9 0 0              0 0 9 8 7\n0 2 7 8 0              0 6 7 8 9\n0 0 3 8 7     ---->    1 2 3 4 5\n0 0 0 4 9              0 0 0 0 0\n0 0 0 0 5              0 0 0 0 0\n\nif uplo == rocblas_fill_lower:\nThe matrix represnted is a lower banded triangular matrix\nwith the main diagonal and k sub-diagonals, everything else can be\nassumed to be 0.\nThe matrix is compacted so that the main diagonal resides on the 0'th row,\nworking up to the k'th diagonal residing on the LHS of the k'th row.\nEx: (rocblas_fill_lower; m = 5; k = 2)\n1 0 0 0 0              1 2 3 4 5\n6 2 0 0 0              6 7 8 9 0\n9 7 3 0 0     ---->    9 8 7 0 0\n0 8 8 4 0              0 0 0 0 0\n0 0 7 9 5              0 0 0 0 0\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. lda must satisfy lda > k.\n@param[inout]\nx         device array of device pointer storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntbmv_batched performs one of the matrix-vector operations:\n\nx_i := A_i*x_i      or\nx_i := A_i**T*x_i   or\nx_i := A_i**H*x_i,\nwhere (A_i, x_i) is the i-th instance of the batch.\nx_i is a vector and A_i is an n by n matrix, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: each A_i is an upper banded triangular matrix.\n- rocblas_fill_lower: each A_i is a  lower banded triangular matrix.\n@param[in]\ntrans     [rocblas_operation]\nindicates whether each matrix A_i is tranposed (conjugated) or not.\n@param[in]\ndiag      [rocblas_diagonal]\n- rocblas_diagonal_unit: The main diagonal of each A_i is assumed to consist of only\n1's and is not referenced.\n- rocblas_diagonal_non_unit: No assumptions are made of each A_i's main diagonal.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of the matrix represented by each A_i.\n@param[in]\nk         [rocblas_int]\n\nif uplo == rocblas_fill_upper, k specifies the number of super-diagonals\nof each matrix A_i.\n\nif uplo == rocblas_fill_lower, k specifies the number of sub-diagonals\nof each matrix A_i.\nk must satisfy k > 0 && k < lda.\n@param[in]\nA         device array of device pointers storing each banded triangular matrix A_i.\n\nif uplo == rocblas_fill_upper:\nThe matrix represented is an upper banded triangular matrix\nwith the main diagonal and k super-diagonals, everything\nelse can be assumed to be 0.\nThe matrix is compacted so that the main diagonal resides on the k'th\nrow, the first super diagonal resides on the RHS of the k-1'th row, etc,\nwith the k'th diagonal on the RHS of the 0'th row.\nEx: (rocblas_fill_upper; n = 5; k = 2)\n1 6 9 0 0              0 0 9 8 7\n0 2 7 8 0              0 6 7 8 9\n0 0 3 8 7     ---->    1 2 3 4 5\n0 0 0 4 9              0 0 0 0 0\n0 0 0 0 5              0 0 0 0 0\n\nif uplo == rocblas_fill_lower:\nThe matrix represnted is a lower banded triangular matrix\nwith the main diagonal and k sub-diagonals, everything else can be\nassumed to be 0.\nThe matrix is compacted so that the main diagonal resides on the 0'th row,\nworking up to the k'th diagonal residing on the LHS of the k'th row.\nEx: (rocblas_fill_lower; n = 5; k = 2)\n1 0 0 0 0              1 2 3 4 5\n6 2 0 0 0              6 7 8 9 0\n9 7 3 0 0     ---->    9 8 7 0 0\n0 8 8 4 0              0 0 0 0 0\n0 0 7 9 5              0 0 0 0 0\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. lda must satisfy lda > k.\n@param[in, out]\nx         device array of device pointer storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_stbmv_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
         trans: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         k: rocblas_int,
         A: *const *const f32,
         lda: rocblas_int,
@@ -4336,7 +6594,7 @@ extern "C" {
         uplo: rocblas_fill,
         trans: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         k: rocblas_int,
         A: *const *const f64,
         lda: rocblas_int,
@@ -4352,7 +6610,7 @@ extern "C" {
         uplo: rocblas_fill,
         trans: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         k: rocblas_int,
         A: *const *const rocblas_float_complex,
         lda: rocblas_int,
@@ -4368,7 +6626,7 @@ extern "C" {
         uplo: rocblas_fill,
         trans: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         k: rocblas_int,
         A: *const *const rocblas_double_complex,
         lda: rocblas_int,
@@ -4379,13 +6637,13 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntbmv_strided_batched performs one of the matrix-vector operations:\n\nx_i := A_i*x_i      or\nx_i := A_i**T*x_i   or\nx_i := A_i**H*x_i,\nwhere (A_i, x_i) is the i-th instance of the batch.\nx_i is a vector and A_i is an m by m matrix, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: each A_i is an upper banded triangular matrix.\n- rocblas_fill_lower: each A_i is a  lower banded triangular matrix.\n@param[in]\ntrans     [rocblas_operation]\nindicates whether each matrix A_i is tranposed (conjugated) or not.\n@param[in]\ndiag      [rocblas_diagonal]\n- rocblas_diagonal_unit: The main diagonal of each A_i is assumed to consist of only\n1's and is not referenced.\n- rocblas_diagonal_non_unit: No assumptions are made of each A_i's main diagonal.\n@param[in]\nm         [rocblas_int]\nthe number of rows and columns of the matrix represented by each A_i.\n@param[in]\nk         [rocblas_int]\n\nif uplo == rocblas_fill_upper, k specifies the number of super-diagonals\nof each matrix A_i.\n\nif uplo == rocblas_fill_lower, k specifies the number of sub-diagonals\nof each matrix A_i.\nk must satisfy k > 0 && k < lda.\n@param[in]\nA         device array to the first matrix A_i of the batch. Stores each banded triangular matrix A_i.\n\nif uplo == rocblas_fill_upper:\nThe matrix represented is an upper banded triangular matrix\nwith the main diagonal and k super-diagonals, everything\nelse can be assumed to be 0.\nThe matrix is compacted so that the main diagonal resides on the k'th\nrow, the first super diagonal resides on the RHS of the k-1'th row, etc,\nwith the k'th diagonal on the RHS of the 0'th row.\nEx: (rocblas_fill_upper; m = 5; k = 2)\n1 6 9 0 0              0 0 9 8 7\n0 2 7 8 0              0 6 7 8 9\n0 0 3 8 7     ---->    1 2 3 4 5\n0 0 0 4 9              0 0 0 0 0\n0 0 0 0 5              0 0 0 0 0\n\nif uplo == rocblas_fill_lower:\nThe matrix represnted is a lower banded triangular matrix\nwith the main diagonal and k sub-diagonals, everything else can be\nassumed to be 0.\nThe matrix is compacted so that the main diagonal resides on the 0'th row,\nworking up to the k'th diagonal residing on the LHS of the k'th row.\nEx: (rocblas_fill_lower; m = 5; k = 2)\n1 0 0 0 0              1 2 3 4 5\n6 2 0 0 0              6 7 8 9 0\n9 7 3 0 0     ---->    9 8 7 0 0\n0 8 8 4 0              0 0 0 0 0\n0 0 7 9 5              0 0 0 0 0\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. lda must satisfy lda > k.\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one A_i matrix to the next A_(i + 1).\n@param[inout]\nx         device array to the first vector x_i of the batch.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one x_i matrix to the next x_(i + 1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntbmv_strided_batched performs one of the matrix-vector operations:\n\nx_i := A_i*x_i      or\nx_i := A_i**T*x_i   or\nx_i := A_i**H*x_i,\nwhere (A_i, x_i) is the i-th instance of the batch.\nx_i is a vector and A_i is an n by n matrix, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: each A_i is an upper banded triangular matrix.\n- rocblas_fill_lower: each A_i is a  lower banded triangular matrix.\n@param[in]\ntrans     [rocblas_operation]\nindicates whether each matrix A_i is tranposed (conjugated) or not.\n@param[in]\ndiag      [rocblas_diagonal]\n- rocblas_diagonal_unit: The main diagonal of each A_i is assumed to consist of only\n1's and is not referenced.\n- rocblas_diagonal_non_unit: No assumptions are made of each A_i's main diagonal.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of the matrix represented by each A_i.\n@param[in]\nk         [rocblas_int]\n\nif uplo == rocblas_fill_upper, k specifies the number of super-diagonals\nof each matrix A_i.\n\nif uplo == rocblas_fill_lower, k specifies the number of sub-diagonals\nof each matrix A_i.\nk must satisfy k > 0 && k < lda.\n@param[in]\nA         device array to the first matrix A_i of the batch. Stores each banded triangular matrix A_i.\n\nif uplo == rocblas_fill_upper:\nThe matrix represented is an upper banded triangular matrix\nwith the main diagonal and k super-diagonals, everything\nelse can be assumed to be 0.\nThe matrix is compacted so that the main diagonal resides on the k'th\nrow, the first super diagonal resides on the RHS of the k-1'th row, etc,\nwith the k'th diagonal on the RHS of the 0'th row.\nEx: (rocblas_fill_upper; n = 5; k = 2)\n1 6 9 0 0              0 0 9 8 7\n0 2 7 8 0              0 6 7 8 9\n0 0 3 8 7     ---->    1 2 3 4 5\n0 0 0 4 9              0 0 0 0 0\n0 0 0 0 5              0 0 0 0 0\n\nif uplo == rocblas_fill_lower:\nThe matrix represnted is a lower banded triangular matrix\nwith the main diagonal and k sub-diagonals, everything else can be\nassumed to be 0.\nThe matrix is compacted so that the main diagonal resides on the 0'th row,\nworking up to the k'th diagonal residing on the LHS of the k'th row.\nEx: (rocblas_fill_lower; n = 5; k = 2)\n1 0 0 0 0              1 2 3 4 5\n6 2 0 0 0              6 7 8 9 0\n9 7 3 0 0     ---->    9 8 7 0 0\n0 8 8 4 0              0 0 0 0 0\n0 0 7 9 5              0 0 0 0 0\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. lda must satisfy lda > k.\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one A_i matrix to the next A_(i + 1).\n@param[in, out]\nx         device array to the first vector x_i of the batch.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one x_i matrix to the next x_(i + 1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_stbmv_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
         trans: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         k: rocblas_int,
         A: *const f32,
         lda: rocblas_int,
@@ -4403,7 +6661,7 @@ extern "C" {
         uplo: rocblas_fill,
         trans: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         k: rocblas_int,
         A: *const f64,
         lda: rocblas_int,
@@ -4421,7 +6679,7 @@ extern "C" {
         uplo: rocblas_fill,
         trans: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         k: rocblas_int,
         A: *const rocblas_float_complex,
         lda: rocblas_int,
@@ -4439,7 +6697,7 @@ extern "C" {
         uplo: rocblas_fill,
         trans: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         k: rocblas_int,
         A: *const rocblas_double_complex,
         lda: rocblas_int,
@@ -4452,7 +6710,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntbsv solves:\n\nA*x = b or\nA**T*x = b or\nA**H*x = b\nwhere x and b are vectors and A is a banded triangular matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n- rocblas_operation_none: Solves A*x = b\n- rocblas_operation_transpose: Solves A**T*x = b\n- rocblas_operation_conjugate_transpose: Solves A**H*x = b\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit: A is assumed to be unit triangular (i.e. the diagonal elements\nof A are not used in computations).\n- rocblas_diagonal_non_unit: A is not assumed to be unit triangular.\n\n@param[in]\nn         [rocblas_int]\nn specifies the number of rows of b. n >= 0.\n@param[in]\nk         [rocblas_int]\n\nif(uplo == rocblas_fill_upper)\nk specifies the number of super-diagonals of A.\nif(uplo == rocblas_fill_lower)\nk specifies the number of sub-diagonals of A.\nk >= 0.\n\n@param[in]\nA         device pointer storing the matrix A in banded format.\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\nlda >= (k + 1).\n\n@param[inout]\nx         device pointer storing input vector b. Overwritten by the output vector x.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntbsv solves:\n\nA*x = b or\nA**T*x = b or\nA**H*x = b\nwhere x and b are vectors and A is a banded triangular matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n- rocblas_operation_none: Solves A*x = b\n- rocblas_operation_transpose: Solves A**T*x = b\n- rocblas_operation_conjugate_transpose: Solves A**H*x = b\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit: A is assumed to be unit triangular (i.e. the diagonal elements\nof A are not used in computations).\n- rocblas_diagonal_non_unit: A is not assumed to be unit triangular.\n\n@param[in]\nn         [rocblas_int]\nn specifies the number of rows of b. n >= 0.\n@param[in]\nk         [rocblas_int]\n\nif(uplo == rocblas_fill_upper)\nk specifies the number of super-diagonals of A.\nif(uplo == rocblas_fill_lower)\nk specifies the number of sub-diagonals of A.\nk >= 0.\n\n@param[in]\nA         device pointer storing the matrix A in banded format.\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\nlda >= (k + 1).\n\n@param[in, out]\nx         device pointer storing input vector b. Overwritten by the output vector x.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n"]
     pub fn rocblas_stbsv(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -4513,7 +6771,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntbsv_batched solves:\n\nA_i*x_i = b_i or\nA_i**T*x_i = b_i or\nA_i**H*x_i = b_i\nwhere x_i and b_i are vectors and A_i is a banded triangular matrix,\nfor i = [1, batch_count].\n\nThe input vectors b_i are overwritten by the output vectors x_i.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A_i is an upper triangular matrix.\n- rocblas_fill_lower:  A_i is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n- rocblas_operation_none: Solves A_i*x_i = b_i\n- rocblas_operation_transpose: Solves A_i**T*x_i = b_i\n- rocblas_operation_conjugate_transpose: Solves A_i**H*x_i = b_i\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     each A_i is assumed to be unit triangular (i.e. the diagonal elements\nof each A_i are not used in computations).\n- rocblas_diagonal_non_unit: each A_i is not assumed to be unit triangular.\n\n@param[in]\nn         [rocblas_int]\nn specifies the number of rows of each b_i. n >= 0.\n@param[in]\nk         [rocblas_int]\n\nif(uplo == rocblas_fill_upper)\nk specifies the number of super-diagonals of each A_i.\nif(uplo == rocblas_fill_lower)\nk specifies the number of sub-diagonals of each A_i.\nk >= 0.\n\n@param[in]\nA         device vector of device pointers storing each matrix A_i in banded format.\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\nlda >= (k + 1).\n\n@param[inout]\nx         device vector of device pointers storing each input vector b_i. Overwritten by each output\nvector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntbsv_batched solves:\n\nA_i*x_i = b_i or\nA_i**T*x_i = b_i or\nA_i**H*x_i = b_i\nwhere x_i and b_i are vectors and A_i is a banded triangular matrix,\nfor i = [1, batch_count].\n\nThe input vectors b_i are overwritten by the output vectors x_i.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A_i is an upper triangular matrix.\n- rocblas_fill_lower:  A_i is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n- rocblas_operation_none: Solves A_i*x_i = b_i\n- rocblas_operation_transpose: Solves A_i**T*x_i = b_i\n- rocblas_operation_conjugate_transpose: Solves A_i**H*x_i = b_i\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     each A_i is assumed to be unit triangular (i.e. the diagonal elements\nof each A_i are not used in computations).\n- rocblas_diagonal_non_unit: each A_i is not assumed to be unit triangular.\n\n@param[in]\nn         [rocblas_int]\nn specifies the number of rows of each b_i. n >= 0.\n@param[in]\nk         [rocblas_int]\n\nif(uplo == rocblas_fill_upper)\nk specifies the number of super-diagonals of each A_i.\nif(uplo == rocblas_fill_lower)\nk specifies the number of sub-diagonals of each A_i.\nk >= 0.\n\n@param[in]\nA         device vector of device pointers storing each matrix A_i in banded format.\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\nlda >= (k + 1).\n\n@param[in, out]\nx         device vector of device pointers storing each input vector b_i. Overwritten by each output\nvector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_stbsv_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -4578,7 +6836,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntbsv_strided_batched solves:\n\nA_i*x_i = b_i or\nA_i**T*x_i = b_i or\nA_i**H*x_i = b_i\nwhere x_i and b_i are vectors and A_i is a banded triangular matrix,\nfor i = [1, batch_count].\n\nThe input vectors b_i are overwritten by the output vectors x_i.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A_i is an upper triangular matrix.\n- rocblas_fill_lower:  A_i is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n- rocblas_operation_none: Solves A_i*x_i = b_i\n- rocblas_operation_transpose: Solves A_i**T*x_i = b_i\n- rocblas_operation_conjugate_transpose: Solves A_i**H*x_i = b_i\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     each A_i is assumed to be unit triangular (i.e. the diagonal elements\nof each A_i are not used in computations).\n- rocblas_diagonal_non_unit: each A_i is not assumed to be unit triangular.\n\n@param[in]\nn         [rocblas_int]\nn specifies the number of rows of each b_i. n >= 0.\n@param[in]\nk         [rocblas_int]\n\nif(uplo == rocblas_fill_upper)\nk specifies the number of super-diagonals of each A_i.\nif(uplo == rocblas_fill_lower)\nk specifies the number of sub-diagonals of each A_i.\nk >= 0.\n\n@param[in]\nA         device pointer pointing to the first banded matrix A_1.\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\nlda >= (k + 1).\n@param[in]\nstride_A  [rocblas_stride]\nspecifies the distance between the start of one matrix (A_i) and the next (A_i+1).\n\n@param[inout]\nx         device pointer pointing to the first input vector b_1. Overwritten by output vectors x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nspecifies the distance between the start of one vector (x_i) and the next (x_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntbsv_strided_batched solves:\n\nA_i*x_i = b_i or\nA_i**T*x_i = b_i or\nA_i**H*x_i = b_i\nwhere x_i and b_i are vectors and A_i is a banded triangular matrix,\nfor i = [1, batch_count].\n\nThe input vectors b_i are overwritten by the output vectors x_i.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A_i is an upper triangular matrix.\n- rocblas_fill_lower:  A_i is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n- rocblas_operation_none: Solves A_i*x_i = b_i\n- rocblas_operation_transpose: Solves A_i**T*x_i = b_i\n- rocblas_operation_conjugate_transpose: Solves A_i**H*x_i = b_i\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     each A_i is assumed to be unit triangular (i.e. the diagonal elements\nof each A_i are not used in computations).\n- rocblas_diagonal_non_unit: each A_i is not assumed to be unit triangular.\n\n@param[in]\nn         [rocblas_int]\nn specifies the number of rows of each b_i. n >= 0.\n@param[in]\nk         [rocblas_int]\n\nif(uplo == rocblas_fill_upper)\nk specifies the number of super-diagonals of each A_i.\nif(uplo == rocblas_fill_lower)\nk specifies the number of sub-diagonals of each A_i.\nk >= 0.\n\n@param[in]\nA         device pointer pointing to the first banded matrix A_1.\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\nlda >= (k + 1).\n@param[in]\nstride_A  [rocblas_stride]\nspecifies the distance between the start of one matrix (A_i) and the next (A_i+1).\n\n@param[in, out]\nx         device pointer pointing to the first input vector b_1. Overwritten by output vectors x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nspecifies the distance between the start of one vector (x_i) and the next (x_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_stbsv_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -4651,13 +6909,13 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntrsv solves:\n\nA*x = b or\nA**T*x = b\nwhere x and b are vectors and A is a triangular matrix.\nThe vector x is overwritten on b.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.\n\n@param[in]\nm         [rocblas_int]\nm specifies the number of rows of b. m >= 0.\n\n@param[in]\nA         device pointer storing matrix A,\nof dimension ( lda, m )\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\nlda = max( 1, m ).\n\n@param[in]\nx         device pointer storing vector x.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntrsv solves:\n\nA*x = b or\nA**T*x = b or\nA**H*x = b,\nwhere x and b are vectors and A is a triangular matrix.\nThe vector x is overwritten on b.\n\nAlthough not widespread, some gemm kernels used by trsv may use atomic operations.\nSee Atomic Operations in the API Reference Guide for more information.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n- rocblas_operation_none:    op(A) = A.\n- rocblas_operation_transpose:   op(A) = A^T\n- rocblas_operation_conjugate_transpose:  op(A) = A^H\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.\n\n@param[in]\nn         [rocblas_int]\nn specifies the number of rows of b. n >= 0.\n\n@param[in]\nA         device pointer storing matrix A, of dimension ( lda, n ). If uplo == rocblas_fill_upper, the upper triangular part of the leading n-by-n array contains the matrix A, otherwise the lower triangular part of the leading n-by-n array contains the matrix A.\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A. lda must be at least max( 1, n ).\n\n@param[in, out]\nx         device pointer storing vector x. On exit, x is overwritten with the transformed vector x.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n"]
     pub fn rocblas_strsv(
         handle: rocblas_handle,
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const f32,
         lda: rocblas_int,
         x: *mut f32,
@@ -4671,7 +6929,7 @@ extern "C" {
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const f64,
         lda: rocblas_int,
         x: *mut f64,
@@ -4685,7 +6943,7 @@ extern "C" {
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const rocblas_float_complex,
         lda: rocblas_int,
         x: *mut rocblas_float_complex,
@@ -4699,7 +6957,7 @@ extern "C" {
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const rocblas_double_complex,
         lda: rocblas_int,
         x: *mut rocblas_double_complex,
@@ -4708,13 +6966,13 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntrsv_batched solves:\n\nA_i*x_i = b_i or\nA_i**T*x_i = b_i\nwhere (A_i, x_i, b_i) is the i-th instance of the batch.\nx_i and b_i are vectors and A_i is an\nm by m triangular matrix.\n\nThe vector x is overwritten on b.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.\n\n@param[in]\nm         [rocblas_int]\nm specifies the number of rows of b. m >= 0.\n\n@param[in]\nA         device array of device pointers storing each matrix A_i.\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\nlda = max(1, m)\n\n@param[in]\nx         device array of device pointers storing each vector x_i.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntrsv_batched solves:\n\nA_i*x_i = b_i or\nA_i**T*x_i = b_i or\nA_i**H*x_i = b_i,\nwhere (A_i, x_i, b_i) is the i-th instance of the batch.\nx_i and b_i are vectors and A_i is an\nn by n triangular matrix.\n\nThe vector x is overwritten on b.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n- rocblas_operation_none:    op(A) = A.\n- rocblas_operation_transpose:   op(A) = A^T\n- rocblas_operation_conjugate_transpose:  op(A) = A^H\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.\n\n@param[in]\nn         [rocblas_int]\nn specifies the number of rows of b. n >= 0.\n\n@param[in]\nA         device pointer to an array of device pointers to the A_i matrices, of dimension ( lda, n ). If uplo == rocblas_fill_upper, the upper triangular part of the leading n-by-n array contains the matrix A_i, otherwise the lower triangular part of the leading n-by-n array contains the matrix A_i.\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A_i. lda must be at least max( 1, n ).\n\n@param[in, out]\nx         device pointer to an array of device pointers to the x_i vectors. On exit, each x_i is overwritten with the transformed vector x_i.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_strsv_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const *const f32,
         lda: rocblas_int,
         x: *const *mut f32,
@@ -4729,7 +6987,7 @@ extern "C" {
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const *const f64,
         lda: rocblas_int,
         x: *const *mut f64,
@@ -4744,7 +7002,7 @@ extern "C" {
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const *const rocblas_float_complex,
         lda: rocblas_int,
         x: *const *mut rocblas_float_complex,
@@ -4759,7 +7017,7 @@ extern "C" {
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const *const rocblas_double_complex,
         lda: rocblas_int,
         x: *const *mut rocblas_double_complex,
@@ -4769,13 +7027,13 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntrsv_strided_batched solves:\n\nA_i*x_i = b_i or\nA_i**T*x_i = b_i\nwhere (A_i, x_i, b_i) is the i-th instance of the batch.\nx_i and b_i are vectors and A_i is an m by m triangular matrix, for i = 1, ..., batch_count.\n\nThe vector x is overwritten on b.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.\n\n@param[in]\nm         [rocblas_int]\nm specifies the number of rows of each b_i. m >= 0.\n\n@param[in]\nA         device pointer to the first matrix (A_1) in the batch, of dimension ( lda, m ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one A_i matrix to the next A_(i + 1).\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\nlda = max( 1, m ).\n\n@param[in, out]\nx         device pointer to the first vector (x_1) in the batch.\n\n@param[in]\nstride_x [rocblas_stride]\nstride from the start of one x_i vector to the next x_(i + 1)\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntrsv_strided_batched solves:\n\nA_i*x_i = b_i or\nA_i**T*x_i = b_i or\nA_i**H*x_i = b_i,\nwhere (A_i, x_i, b_i) is the i-th instance of the batch.\nx_i and b_i are vectors and A_i is an n by n triangular matrix, for i = 1, ..., batch_count.\n\nThe vector x is overwritten on b.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n- rocblas_operation_none:    op(A) = A.\n- rocblas_operation_transpose:   op(A) = A^T\n- rocblas_operation_conjugate_transpose:  op(A) = A^H\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.\n\n@param[in]\nn         [rocblas_int]\nn specifies the number of rows of each b_i. n >= 0.\n\n@param[in]\nA         device pointer to the matrix A_1 of the batch, of dimension ( lda, n ). If uplo == rocblas_fill_upper, the upper triangular part of the leading n-by-n array contains the matrix A_i, otherwise the lower triangular part of the leading n-by-n array contains the matrix A_i.\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one A_i matrix to the next A_(i + 1).\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A_i. lda must be at least max( 1, n ).\n\n@param[in, out]\nx         device pointer to the vector x_1 of the batch. On exit, each x_i is overwritten with the transformed vector x_i.\n\n@param[in]\nstride_x [rocblas_stride]\nstride from the start of one x_i vector to the next x_(i + 1)\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_strsv_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const f32,
         lda: rocblas_int,
         stride_A: rocblas_stride,
@@ -4792,7 +7050,7 @@ extern "C" {
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const f64,
         lda: rocblas_int,
         stride_A: rocblas_stride,
@@ -4809,7 +7067,7 @@ extern "C" {
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const rocblas_float_complex,
         lda: rocblas_int,
         stride_A: rocblas_stride,
@@ -4826,7 +7084,7 @@ extern "C" {
         uplo: rocblas_fill,
         transA: rocblas_operation,
         diag: rocblas_diagonal,
-        m: rocblas_int,
+        n: rocblas_int,
         A: *const rocblas_double_complex,
         lda: rocblas_int,
         stride_A: rocblas_stride,
@@ -4838,7 +7096,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntpsv solves:\n\nA*x = b or\nA**T*x = b or\nA**H*x = b\nwhere x and b are vectors and A is a triangular matrix stored in the packed format.\n\nThe input vector b is overwritten by the output vector x.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\n- rocblas_operation_none: Solves A*x = b\n- rocblas_operation_transpose: Solves A**T*x = b\n- rocblas_operation_conjugate_transpose: Solves A**H*x = b\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:  A is assumed to be unit triangular (i.e. the diagonal elements\nof A are not used in computations).\n- rocblas_diagonal_non_unit: A is not assumed to be unit triangular.\n\n@param[in]\nn         [rocblas_int]\nn specifies the number of rows of b. n >= 0.\n\n@param[in]\nAP        device pointer storing the packed version of matrix A,\nof dimension >= (n * (n + 1) / 2).\n\n@param[inout]\nx         device pointer storing vector b on input, overwritten by x on output.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntpsv solves:\n\nA*x = b or\nA**T*x = b or\nA**H*x = b\nwhere x and b are vectors and A is a triangular matrix stored in the packed format.\n\nThe input vector b is overwritten by the output vector x.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\n- rocblas_operation_none: Solves A*x = b\n- rocblas_operation_transpose: Solves A**T*x = b\n- rocblas_operation_conjugate_transpose: Solves A**H*x = b\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:  A is assumed to be unit triangular (i.e. the diagonal elements\nof A are not used in computations).\n- rocblas_diagonal_non_unit: A is not assumed to be unit triangular.\n\n@param[in]\nn         [rocblas_int]\nn specifies the number of rows of b. n >= 0.\n\n@param[in]\nAP        device pointer storing the packed version of matrix A,\nof dimension >= (n * (n + 1) / 2).\n\n@param[in, out]\nx         device pointer storing vector b on input, overwritten by x on output.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n"]
     pub fn rocblas_stpsv(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -4891,7 +7149,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntpsv_batched solves:\n\nA_i*x_i = b_i or\nA_i**T*x_i = b_i or\nA_i**H*x_i = b_i\nwhere x_i and b_i are vectors and A_i is a triangular matrix stored in the packed format,\nfor i in [1, batch_count].\n\nThe input vectors b_i are overwritten by the output vectors x_i.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  each A_i is an upper triangular matrix.\n- rocblas_fill_lower:  each A_i is a  lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\n- rocblas_operation_none: Solves A*x = b\n- rocblas_operation_transpose: Solves A**T*x = b\n- rocblas_operation_conjugate_transpose: Solves A**H*x = b\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit: Each A_i is assumed to be unit triangular (i.e. the diagonal elements\nof each A_i are not used in computations).\n- rocblas_diagonal_non_unit: each A_i is not assumed to be unit triangular.\n\n@param[in]\nn         [rocblas_int]\nn specifies the number of rows of each b_i. n >= 0.\n\n@param[in]\nAP        device array of device pointers storing the packed versions of each matrix A_i,\nof dimension >= (n * (n + 1) / 2).\n\n@param[inout]\nx         device array of device pointers storing each input vector b_i, overwritten by x_i on output.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nbatch_count [rocblas_int]\nspecifies the number of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntpsv_batched solves:\n\nA_i*x_i = b_i or\nA_i**T*x_i = b_i or\nA_i**H*x_i = b_i\nwhere x_i and b_i are vectors and A_i is a triangular matrix stored in the packed format,\nfor i in [1, batch_count].\n\nThe input vectors b_i are overwritten by the output vectors x_i.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  each A_i is an upper triangular matrix.\n- rocblas_fill_lower:  each A_i is a  lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\n- rocblas_operation_none: Solves A*x = b\n- rocblas_operation_transpose: Solves A**T*x = b\n- rocblas_operation_conjugate_transpose: Solves A**H*x = b\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit: Each A_i is assumed to be unit triangular (i.e. the diagonal elements\nof each A_i are not used in computations).\n- rocblas_diagonal_non_unit: each A_i is not assumed to be unit triangular.\n\n@param[in]\nn         [rocblas_int]\nn specifies the number of rows of each b_i. n >= 0.\n\n@param[in]\nAP        device array of device pointers storing the packed versions of each matrix A_i,\nof dimension >= (n * (n + 1) / 2).\n\n@param[in, out]\nx         device array of device pointers storing each input vector b_i, overwritten by x_i on output.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nbatch_count [rocblas_int]\nspecifies the number of instances in the batch.\n"]
     pub fn rocblas_stpsv_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -4948,7 +7206,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntpsv_strided_batched solves:\n\nA_i*x_i = b_i or\nA_i**T*x_i = b_i or\nA_i**H*x_i = b_i\nwhere x_i and b_i are vectors and A_i is a triangular matrix stored in the packed format,\nfor i in [1, batch_count].\n\nThe input vectors b_i are overwritten by the output vectors x_i.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  each A_i is an upper triangular matrix.\n- rocblas_fill_lower:  each A_i is a  lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\n- rocblas_operation_none: Solves A*x = b\n- rocblas_operation_transpose: Solves A**T*x = b\n- rocblas_operation_conjugate_transpose: Solves A**H*x = b\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     each A_i is assumed to be unit triangular (i.e. the diagonal elements\nof each A_i are not used in computations).\n- rocblas_diagonal_non_unit: each A_i is not assumed to be unit triangular.\n\n@param[in]\nn         [rocblas_int]\nn specifies the number of rows of each b_i. n >= 0.\n\n@param[in]\nAP        device pointer pointing to the first packed matrix A_1,\nof dimension >= (n * (n + 1) / 2).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the beginning of one packed matrix (AP_i) and the next (AP_i+1).\n\n@param[inout]\nx         device pointer pointing to the first input vector b_1. Overwritten by each x_i on output.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the beginning of one vector (x_i) and the next (x_i+1).\n@param[in]\nbatch_count [rocblas_int]\nspecifies the number of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntpsv_strided_batched solves:\n\nA_i*x_i = b_i or\nA_i**T*x_i = b_i or\nA_i**H*x_i = b_i\nwhere x_i and b_i are vectors and A_i is a triangular matrix stored in the packed format,\nfor i in [1, batch_count].\n\nThe input vectors b_i are overwritten by the output vectors x_i.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  each A_i is an upper triangular matrix.\n- rocblas_fill_lower:  each A_i is a  lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\n- rocblas_operation_none: Solves A*x = b\n- rocblas_operation_transpose: Solves A**T*x = b\n- rocblas_operation_conjugate_transpose: Solves A**H*x = b\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     each A_i is assumed to be unit triangular (i.e. the diagonal elements\nof each A_i are not used in computations).\n- rocblas_diagonal_non_unit: each A_i is not assumed to be unit triangular.\n\n@param[in]\nn         [rocblas_int]\nn specifies the number of rows of each b_i. n >= 0.\n\n@param[in]\nAP        device pointer pointing to the first packed matrix A_1,\nof dimension >= (n * (n + 1) / 2).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the beginning of one packed matrix (AP_i) and the next (AP_i+1).\n\n@param[in, out]\nx         device pointer pointing to the first input vector b_1. Overwritten by each x_i on output.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the beginning of one vector (x_i) and the next (x_i+1).\n@param[in]\nbatch_count [rocblas_int]\nspecifies the number of instances in the batch.\n"]
     pub fn rocblas_stpsv_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -5013,7 +7271,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nsymv performs the matrix-vector operation:\n\ny := alpha*A*x + beta*y\nwhere alpha and beta are scalars, x and y are n element vectors and\nA should contain an upper or lower triangular n by n symmetric matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo     [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced.\n- if rocblas_fill_lower, the upper part of A is not referenced.\n@param[in]\nn         [rocblas_int]\n@param[in]\nalpha\nspecifies the scalar alpha.\n@param[in]\nA         pointer storing matrix A on the GPU\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n@param[in]\nx         pointer storing vector x on the GPU.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nbeta      specifies the scalar beta\n@param[out]\ny         pointer storing vector y on the GPU.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nsymv performs the matrix-vector operation:\n\ny := alpha*A*x + beta*y\nwhere alpha and beta are scalars, x and y are n element vectors and\nA should contain an upper or lower triangular n by n symmetric matrix.\n\nsymv has an implementation which uses atomic operations. See Atomic Operations\nin the API Reference Guide for more information.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo     [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced.\n- if rocblas_fill_lower, the upper part of A is not referenced.\n@param[in]\nn         [rocblas_int]\n@param[in]\nalpha\nspecifies the scalar alpha.\n@param[in]\nA         pointer storing matrix A on the GPU\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n@param[in]\nx         pointer storing vector x on the GPU.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nbeta      specifies the scalar beta\n@param[out]\ny         pointer storing vector y on the GPU.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
     pub fn rocblas_ssymv(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -5446,7 +7704,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nger,geru,gerc performs the matrix-vector operations:\n\nA := A + alpha*x*y**T , OR\nA := A + alpha*x*y**H for gerc\nwhere alpha is a scalar, x and y are vectors, and A is an\nm by n matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nm         [rocblas_int]\nthe number of rows of the matrix A.\n@param[in]\nn         [rocblas_int]\nthe number of columns of the matrix A.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[inout]\nA         device pointer storing matrix A.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nger,geru,gerc performs the matrix-vector operations:\n\nA := A + alpha*x*y**T , OR\nA := A + alpha*x*y**H for gerc\nwhere alpha is a scalar, x and y are vectors, and A is an\nm by n matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nm         [rocblas_int]\nthe number of rows of the matrix A.\n@param[in]\nn         [rocblas_int]\nthe number of columns of the matrix A.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in, out]\nA         device pointer storing matrix A.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n"]
     pub fn rocblas_sger(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -5537,7 +7795,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nger_batched,geru_batched,gerc_batched perform a batch of the matrix-vector operations:\n\nA := A + alpha*x*y**T , OR\nA := A + alpha*x*y**H for gerc\nwhere (A_i, x_i, y_i) is the i-th instance of the batch.\nalpha is a scalar, x_i and y_i are vectors and A_i is an\nm by n matrix, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nm         [rocblas_int]\nthe number of rows of each matrix A_i.\n@param[in]\nn         [rocblas_int]\nthe number of columns of each matrix A_i.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each vector x_i.\n@param[in]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each vector y_i.\n@param[inout]\nA         device array of device pointers storing each matrix A_i.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nger_batched,geru_batched,gerc_batched perform a batch of the matrix-vector operations:\n\nA := A + alpha*x*y**T , OR\nA := A + alpha*x*y**H for gerc\nwhere (A_i, x_i, y_i) is the i-th instance of the batch.\nalpha is a scalar, x_i and y_i are vectors and A_i is an\nm by n matrix, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nm         [rocblas_int]\nthe number of rows of each matrix A_i.\n@param[in]\nn         [rocblas_int]\nthe number of columns of each matrix A_i.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each vector x_i.\n@param[in]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each vector y_i.\n@param[in, out]\nA         device array of device pointers storing each matrix A_i.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_sger_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -5634,7 +7892,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nger_strided_batched,geru_strided_batched,gerc_strided_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*y_i**T, OR\nA_i := A_i + alpha*x_i*y_i**H  for gerc\nwhere (A_i, x_i, y_i) is the i-th instance of the batch.\nalpha is a scalar, x_i and y_i are vectors and A_i is an\nm by n matrix, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nm         [rocblas_int]\nthe number of rows of each matrix A_i.\n@param[in]\nn         [rocblas_int]\nthe number of columns of each matrix A_i.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer to the first vector (x_1) in the batch.\n@param[in]\nincx      [rocblas_int]\nspecifies the increments for the elements of each vector x_i.\n@param[in]\nstridex   [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\nThere are no restrictions placed on stride_x. However, ensure that stride_x is of appropriate size. For a typical\ncase this means stride_x >= m * incx.\n@param[inout]\ny         device pointer to the first vector (y_1) in the batch.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each vector y_i.\n@param[in]\nstridey   [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\nThere are no restrictions placed on stride_y. However, ensure that stride_y is of appropriate size. For a typical\ncase this means stride_y >= n * incy.\n@param[inout]\nA         device pointer to the first matrix (A_1) in the batch.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nstrideA     [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1)\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nger_strided_batched,geru_strided_batched,gerc_strided_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*y_i**T, OR\nA_i := A_i + alpha*x_i*y_i**H  for gerc\nwhere (A_i, x_i, y_i) is the i-th instance of the batch.\nalpha is a scalar, x_i and y_i are vectors and A_i is an\nm by n matrix, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nm         [rocblas_int]\nthe number of rows of each matrix A_i.\n@param[in]\nn         [rocblas_int]\nthe number of columns of each matrix A_i.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer to the first vector (x_1) in the batch.\n@param[in]\nincx      [rocblas_int]\nspecifies the increments for the elements of each vector x_i.\n@param[in]\nstridex   [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\nThere are no restrictions placed on stride_x. However, ensure that stride_x is of appropriate size. For a typical\ncase this means stride_x >= m * incx.\n@param[in, out]\ny         device pointer to the first vector (y_1) in the batch.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each vector y_i.\n@param[in]\nstridey   [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\nThere are no restrictions placed on stride_y. However, ensure that stride_y is of appropriate size. For a typical\ncase this means stride_y >= n * incy.\n@param[in, out]\nA         device pointer to the first matrix (A_1) in the batch.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nstrideA     [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1)\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_sger_strided_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -5749,7 +8007,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nspr performs the matrix-vector operations:\n\nA := A + alpha*x*x**T\nwhere alpha is a scalar, x is a vector, and A is an\nn by n symmetric matrix, supplied in packed form.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of A is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of A is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[inout]\nAP        device pointer storing the packed version of the specified triangular portion of\nthe symmetric matrix A. Of at least size ((n * (n + 1)) / 2).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of the symmetric matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 4)\n1 2 4 7\n2 3 5 8   -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n4 5 6 9\n7 8 9 0\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of the symmetric matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 4)\n1 2 3 4\n2 5 6 7    -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n3 6 8 9\n4 7 9 0"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nspr performs the matrix-vector operations:\n\nA := A + alpha*x*x**T\nwhere alpha is a scalar, x is a vector, and A is an\nn by n symmetric matrix, supplied in packed form.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of A is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of A is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in, out]\nAP        device pointer storing the packed version of the specified triangular portion of\nthe symmetric matrix A. Of at least size ((n * (n + 1)) / 2).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of the symmetric matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 4)\n1 2 4 7\n2 3 5 8   -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n4 5 6 9\n7 8 9 0\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of the symmetric matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 4)\n1 2 3 4\n2 5 6 7    -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n3 6 8 9\n4 7 9 0"]
     pub fn rocblas_sspr(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -5798,7 +8056,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nspr_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*x_i**T\nwhere alpha is a scalar, x_i is a vector, and A_i is an\nn by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[inout]\nAP        device array of device pointers storing the packed version of the specified triangular portion of\neach symmetric matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batch_count.\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each symmetric matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 4)\n1 2 4 7\n2 3 5 8   -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n4 5 6 9\n7 8 9 0\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each symmetric matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 4)\n1 2 3 4\n2 5 6 7    -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n3 6 8 9\n4 7 9 0\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nspr_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*x_i**T\nwhere alpha is a scalar, x_i is a vector, and A_i is an\nn by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in, out]\nAP        device array of device pointers storing the packed version of the specified triangular portion of\neach symmetric matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batch_count.\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each symmetric matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 4)\n1 2 4 7\n2 3 5 8   -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n4 5 6 9\n7 8 9 0\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each symmetric matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 4)\n1 2 3 4\n2 5 6 7    -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n3 6 8 9\n4 7 9 0\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
     pub fn rocblas_sspr_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -5851,7 +8109,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nspr_strided_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*x_i**T\nwhere alpha is a scalar, x_i is a vector, and A_i is an\nn by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer pointing to the first vector (x_1).\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[inout]\nAP        device pointer storing the packed version of the specified triangular portion of\neach symmetric matrix A_i. Points to the first A_1.\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each symmetric matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 4)\n1 2 4 7\n2 3 5 8   -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n4 5 6 9\n7 8 9 0\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each symmetric matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 4)\n1 2 3 4\n2 5 6 7    -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n3 6 8 9\n4 7 9 0\n@param[in]\nstride_A    [rocblas_stride]\nstride from the start of one (A_i) and the next (A_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nspr_strided_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*x_i**T\nwhere alpha is a scalar, x_i is a vector, and A_i is an\nn by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer pointing to the first vector (x_1).\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[in, out]\nAP        device pointer storing the packed version of the specified triangular portion of\neach symmetric matrix A_i. Points to the first A_1.\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each symmetric matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 4)\n1 2 4 7\n2 3 5 8   -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n4 5 6 9\n7 8 9 0\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each symmetric matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 4)\n1 2 3 4\n2 5 6 7    -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n3 6 8 9\n4 7 9 0\n@param[in]\nstride_A    [rocblas_stride]\nstride from the start of one (A_i) and the next (A_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
     pub fn rocblas_sspr_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -5912,7 +8170,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nspr2 performs the matrix-vector operation:\n\nA := A + alpha*x*y**T + alpha*y*x**T\nwhere alpha is a scalar, x and y are vectors, and A is an\nn by n symmetric matrix, supplied in packed form.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of A is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of A is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[inout]\nAP        device pointer storing the packed version of the specified triangular portion of\nthe symmetric matrix A. Of at least size ((n * (n + 1)) / 2).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of the symmetric matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 4)\n1 2 4 7\n2 3 5 8   -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n4 5 6 9\n7 8 9 0\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of the symmetric matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(n) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 4)\n1 2 3 4\n2 5 6 7    -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n3 6 8 9\n4 7 9 0"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nspr2 performs the matrix-vector operation:\n\nA := A + alpha*x*y**T + alpha*y*x**T\nwhere alpha is a scalar, x and y are vectors, and A is an\nn by n symmetric matrix, supplied in packed form.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of A is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of A is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in, out]\nAP        device pointer storing the packed version of the specified triangular portion of\nthe symmetric matrix A. Of at least size ((n * (n + 1)) / 2).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of the symmetric matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 4)\n1 2 4 7\n2 3 5 8   -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n4 5 6 9\n7 8 9 0\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of the symmetric matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(n) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 4)\n1 2 3 4\n2 5 6 7    -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n3 6 8 9\n4 7 9 0"]
     pub fn rocblas_sspr2(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -5941,7 +8199,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nspr2_batched performs the matrix-vector operation:\n\nA_i := A_i + alpha*x_i*y_i**T + alpha*y_i*x_i**T\nwhere alpha is a scalar, x_i and y_i are vectors, and A_i is an\nn by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[inout]\nAP        device array of device pointers storing the packed version of the specified triangular portion of\neach symmetric matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batch_count.\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each symmetric matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 4)\n1 2 4 7\n2 3 5 8   -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n4 5 6 9\n7 8 9 0\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each symmetric matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(n) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 4)\n1 2 3 4\n2 5 6 7    -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n3 6 8 9\n4 7 9 0\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nspr2_batched performs the matrix-vector operation:\n\nA_i := A_i + alpha*x_i*y_i**T + alpha*y_i*x_i**T\nwhere alpha is a scalar, x_i and y_i are vectors, and A_i is an\nn by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in, out]\nAP        device array of device pointers storing the packed version of the specified triangular portion of\neach symmetric matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batch_count.\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each symmetric matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 4)\n1 2 4 7\n2 3 5 8   -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n4 5 6 9\n7 8 9 0\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each symmetric matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(n) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 4)\n1 2 3 4\n2 5 6 7    -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n3 6 8 9\n4 7 9 0\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
     pub fn rocblas_sspr2_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -5972,7 +8230,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nspr2_strided_batched performs the matrix-vector operation:\n\nA_i := A_i + alpha*x_i*y_i**T + alpha*y_i*x_i**T\nwhere alpha is a scalar, x_i and y_i are vectors, and A_i is an\nn by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer pointing to the first vector (x_1).\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[in]\ny         device pointer pointing to the first vector (y_1).\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nstride_y  [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\n@param[inout]\nAP        device pointer storing the packed version of the specified triangular portion of\neach symmetric matrix A_i. Points to the first A_1.\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each symmetric matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 4)\n1 2 4 7\n2 3 5 8   -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n4 5 6 9\n7 8 9 0\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each symmetric matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(n) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 4)\n1 2 3 4\n2 5 6 7    -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n3 6 8 9\n4 7 9 0\n@param[in]\nstride_A    [rocblas_stride]\nstride from the start of one (A_i) and the next (A_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nspr2_strided_batched performs the matrix-vector operation:\n\nA_i := A_i + alpha*x_i*y_i**T + alpha*y_i*x_i**T\nwhere alpha is a scalar, x_i and y_i are vectors, and A_i is an\nn by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer pointing to the first vector (x_1).\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[in]\ny         device pointer pointing to the first vector (y_1).\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nstride_y  [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\n@param[in, out]\nAP        device pointer storing the packed version of the specified triangular portion of\neach symmetric matrix A_i. Points to the first A_1.\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each symmetric matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 4)\n1 2 4 7\n2 3 5 8   -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n4 5 6 9\n7 8 9 0\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each symmetric matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(n) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 4)\n1 2 3 4\n2 5 6 7    -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n3 6 8 9\n4 7 9 0\n@param[in]\nstride_A    [rocblas_stride]\nstride from the start of one (A_i) and the next (A_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
     pub fn rocblas_sspr2_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -6009,7 +8267,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nsyr performs the matrix-vector operations:\n\nA := A + alpha*x*x**T\nwhere alpha is a scalar, x is a vector, and A is an\nn by n symmetric matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced\n- if rocblas_fill_lower, the upper part of A is not referenced\n\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[inout]\nA         device pointer storing matrix A.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nsyr performs the matrix-vector operations:\n\nA := A + alpha*x*x**T\nwhere alpha is a scalar, x is a vector, and A is an\nn by n symmetric matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced\n- if rocblas_fill_lower, the upper part of A is not referenced\n\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in, out]\nA         device pointer storing matrix A.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n"]
     pub fn rocblas_ssyr(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -6062,7 +8320,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nsyr_batched performs a batch of matrix-vector operations:\n\nA[i] := A[i] + alpha*x[i]*x[i]**T\nwhere alpha is a scalar, x is an array of vectors, and A is an array of\nn by n symmetric matrices, for i = 1 , ... , batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced\n- if rocblas_fill_lower, the upper part of A is not referenced\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[inout]\nA         device array of device pointers storing each matrix A_i.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nsyr_batched performs a batch of matrix-vector operations:\n\nA[i] := A[i] + alpha*x[i]*x[i]**T\nwhere alpha is a scalar, x is an array of vectors, and A is an array of\nn by n symmetric matrices, for i = 1 , ... , batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced\n- if rocblas_fill_lower, the upper part of A is not referenced\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in, out]\nA         device array of device pointers storing each matrix A_i.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_ssyr_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -6119,7 +8377,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nsyr_strided_batched performs the matrix-vector operations:\n\nA[i] := A[i] + alpha*x[i]*x[i]**T\nwhere alpha is a scalar, vectors, and A is an array of\nn by n symmetric matrices, for i = 1 , ... , batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced\n- if rocblas_fill_lower, the upper part of A is not referenced\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer to the first vector x_1.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstridex   [rocblas_stride]\nspecifies the pointer increment between vectors (x_i) and (x_i+1).\n@param[inout]\nA         device pointer to the first matrix A_1.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nstrideA   [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nsyr_strided_batched performs the matrix-vector operations:\n\nA[i] := A[i] + alpha*x[i]*x[i]**T\nwhere alpha is a scalar, vectors, and A is an array of\nn by n symmetric matrices, for i = 1 , ... , batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced\n- if rocblas_fill_lower, the upper part of A is not referenced\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer to the first vector x_1.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstridex   [rocblas_stride]\nspecifies the pointer increment between vectors (x_i) and (x_i+1).\n@param[in, out]\nA         device pointer to the first matrix A_1.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nstrideA   [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_ssyr_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -6184,7 +8442,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nsyr2 performs the matrix-vector operations:\n\nA := A + alpha*x*y**T + alpha*y*x**T\nwhere alpha is a scalar, x and y are vectors, and A is an\nn by n symmetric matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced\n- if rocblas_fill_lower, the upper part of A is not referenced\n\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[inout]\nA         device pointer storing matrix A.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nsyr2 performs the matrix-vector operations:\n\nA := A + alpha*x*y**T + alpha*y*x**T\nwhere alpha is a scalar, x and y are vectors, and A is an\nn by n symmetric matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced\n- if rocblas_fill_lower, the upper part of A is not referenced\n\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in, out]\nA         device pointer storing matrix A.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n"]
     pub fn rocblas_ssyr2(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -6245,7 +8503,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nsyr2_batched performs a batch of matrix-vector operations:\n\nA[i] := A[i] + alpha*x[i]*y[i]**T + alpha*y[i]*x[i]**T\nwhere alpha is a scalar, x[i] and y[i] are vectors, and A[i] is a\nn by n symmetric matrix, for i = 1 , ... , batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced\n- if rocblas_fill_lower, the upper part of A is not referenced\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[inout]\nA         device array of device pointers storing each matrix A_i.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nsyr2_batched performs a batch of matrix-vector operations:\n\nA[i] := A[i] + alpha*x[i]*y[i]**T + alpha*y[i]*x[i]**T\nwhere alpha is a scalar, x[i] and y[i] are vectors, and A[i] is a\nn by n symmetric matrix, for i = 1 , ... , batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced\n- if rocblas_fill_lower, the upper part of A is not referenced\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in, out]\nA         device array of device pointers storing each matrix A_i.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_ssyr2_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -6310,7 +8568,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nsyr2_strided_batched the matrix-vector operations:\n\nA[i] := A[i] + alpha*x[i]*y[i]**T + alpha*y[i]*x[i]**T\nwhere alpha is a scalar, x[i] and y[i] are vectors, and A[i] is a\nn by n symmetric matrices, for i = 1 , ... , batch_count\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced\n- if rocblas_fill_lower, the upper part of A is not referenced\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer to the first vector x_1.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstridex   [rocblas_stride]\nspecifies the pointer increment between vectors (x_i) and (x_i+1).\n@param[in]\ny         device pointer to the first vector y_1.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nstridey   [rocblas_stride]\nspecifies the pointer increment between vectors (y_i) and (y_i+1).\n@param[inout]\nA         device pointer to the first matrix A_1.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nstrideA   [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nsyr2_strided_batched the matrix-vector operations:\n\nA[i] := A[i] + alpha*x[i]*y[i]**T + alpha*y[i]*x[i]**T\nwhere alpha is a scalar, x[i] and y[i] are vectors, and A[i] is a\nn by n symmetric matrices, for i = 1 , ... , batch_count\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced\n- if rocblas_fill_lower, the upper part of A is not referenced\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer to the first vector x_1.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstridex   [rocblas_stride]\nspecifies the pointer increment between vectors (x_i) and (x_i+1).\n@param[in]\ny         device pointer to the first vector y_1.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nstridey   [rocblas_stride]\nspecifies the pointer increment between vectors (y_i) and (y_i+1).\n@param[in, out]\nA         device pointer to the first matrix A_1.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nstrideA   [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_ssyr2_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -6463,7 +8721,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nhemm_strided_batched performs a batch of the matrix-matrix operations:\n\nC_i := alpha*A_i*B_i + beta*C_i if side == rocblas_side_left,\nC_i := alpha*B_i*A_i + beta*C_i if side == rocblas_side_right,\n\nwhere alpha and beta are scalars, B_i and C_i are m by n matrices, and\nA_i is a Hermitian matrix stored as either upper or lower.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside  [rocblas_side]\n- rocblas_side_left:      C_i := alpha*A_i*B_i + beta*C_i\n- rocblas_side_right:     C_i := alpha*B_i*A_i + beta*C_i\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A_i is an upper triangular matrix\n- rocblas_fill_lower:  A_i is a  lower triangular matrix\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B_i and C_i. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B_i and C_i. n >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A_i and B_i are not referenced.\n\n@param[in]\nA       device pointer to first matrix A_1\n- A_i is m by m if side == rocblas_side_left\n- A_i is n by n if side == rocblas_side_right\nOnly the upper/lower triangular part is accessed.\nThe imaginary component of the diagonal elements is not used.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif side = rocblas_side_left,  lda >= max( 1, m ),\notherwise lda >= max( 1, n ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n\n@param[in]\nB       device pointer to first matrix B_1 of dimension (ldb, n) on the GPU\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B_i.\n\nif side = rocblas_operation_none,  ldb >= max( 1, m ),\notherwise ldb >= max( 1, n ).\n\n@param[in]\nstride_B  [rocblas_stride]\nstride from the start of one matrix (B_i) and the next one (B_i+1).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC        device pointer to first matrix C_1 of dimension (ldc, n) on the GPU.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, m ).\n\n@param[inout]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nhemm_strided_batched performs a batch of the matrix-matrix operations:\n\nC_i := alpha*A_i*B_i + beta*C_i if side == rocblas_side_left,\nC_i := alpha*B_i*A_i + beta*C_i if side == rocblas_side_right,\n\nwhere alpha and beta are scalars, B_i and C_i are m by n matrices, and\nA_i is a Hermitian matrix stored as either upper or lower.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside  [rocblas_side]\n- rocblas_side_left:      C_i := alpha*A_i*B_i + beta*C_i\n- rocblas_side_right:     C_i := alpha*B_i*A_i + beta*C_i\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A_i is an upper triangular matrix\n- rocblas_fill_lower:  A_i is a  lower triangular matrix\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B_i and C_i. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B_i and C_i. n >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A_i and B_i are not referenced.\n\n@param[in]\nA       device pointer to first matrix A_1\n- A_i is m by m if side == rocblas_side_left\n- A_i is n by n if side == rocblas_side_right\nOnly the upper/lower triangular part is accessed.\nThe imaginary component of the diagonal elements is not used.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif side = rocblas_side_left,  lda >= max( 1, m ),\notherwise lda >= max( 1, n ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n\n@param[in]\nB       device pointer to first matrix B_1 of dimension (ldb, n) on the GPU\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B_i.\n\nif side = rocblas_operation_none,  ldb >= max( 1, m ),\notherwise ldb >= max( 1, n ).\n\n@param[in]\nstride_B  [rocblas_stride]\nstride from the start of one matrix (B_i) and the next one (B_i+1).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC        device pointer to first matrix C_1 of dimension (ldc, n) on the GPU.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, m ).\n\n@param[in, out]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_chemm_strided_batched(
         handle: rocblas_handle,
         side: rocblas_side,
@@ -6576,7 +8834,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nherk_strided_batched performs a batch of the matrix-matrix operations for a Hermitian rank-k update:\n\nC_i := alpha*op( A_i )*op( A_i )^H + beta*C_i,\n\nwhere  alpha and beta are scalars, op(A) is an n by k matrix, and\nC_i is a n x n Hermitian matrix stored as either upper or lower.\n\nop( A_i ) = A_i, and A_i is n by k if transA == rocblas_operation_none\nop( A_i ) = A_i^H and A_i is k by n if transA == rocblas_operation_conjugate_transpose\n\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C_i is an upper triangular matrix\n- rocblas_fill_lower:  C_i is a  lower triangular matrix\n\n@param[in]\ntransA  [rocblas_operation]\n- rocblas_operation_conjugate_transpose: op(A) = A^H\n- rocblas_operation_none:                op(A) = A\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C_i. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       Device pointer to the first matrix A_1 on the GPU of dimension (lda, k)\nwhen transA is rocblas_operation_none, otherwise of dimension (lda, n)\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif transA = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       Device pointer to the first matrix C_1 on the GPU.\nThe imaginary component of the diagonal elements are not used but are set to zero unless quick return.\nonly the upper/lower triangular part of each C_i is accessed.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n\n@param[inout]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nherk_strided_batched performs a batch of the matrix-matrix operations for a Hermitian rank-k update:\n\nC_i := alpha*op( A_i )*op( A_i )^H + beta*C_i,\n\nwhere  alpha and beta are scalars, op(A) is an n by k matrix, and\nC_i is a n x n Hermitian matrix stored as either upper or lower.\n\nop( A_i ) = A_i, and A_i is n by k if transA == rocblas_operation_none\nop( A_i ) = A_i^H and A_i is k by n if transA == rocblas_operation_conjugate_transpose\n\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C_i is an upper triangular matrix\n- rocblas_fill_lower:  C_i is a  lower triangular matrix\n\n@param[in]\ntransA  [rocblas_operation]\n- rocblas_operation_conjugate_transpose: op(A) = A^H\n- rocblas_operation_none:                op(A) = A\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C_i. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       Device pointer to the first matrix A_1 on the GPU of dimension (lda, k)\nwhen transA is rocblas_operation_none, otherwise of dimension (lda, n)\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif transA = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       Device pointer to the first matrix C_1 on the GPU.\nThe imaginary component of the diagonal elements are not used but are set to zero unless quick return.\nonly the upper/lower triangular part of each C_i is accessed.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n\n@param[in, out]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_cherk_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -6691,7 +8949,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nher2k_strided_batched performs a batch of the matrix-matrix operations for a Hermitian rank-2k update:\n\nC_i := alpha*op( A_i )*op( B_i )^H + conj(alpha)*op( B_i )*op( A_i )^H + beta*C_i,\n\nwhere  alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrices, and\nC_i is a n x n Hermitian matrix stored as either upper or lower.\n\nop( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == rocblas_operation_none\nop( A_i ) = A_i^H, op( B_i ) = B_i^H,  and A_i and B_i are k by n if trans == rocblas_operation_conjugate_transpose\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C_i is an upper triangular matrix\n- rocblas_fill_lower:  C_i is a  lower triangular matrix\n\n@param[in]\ntrans  [rocblas_operation]\n- rocblas_operation_conjugate_transpose: op( A_i ) = A_i^H, op( B_i ) = B_i^H\n- rocblas_operation_none:                op( A_i ) = A_i, op( B_i ) = B_i\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C_i. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       Device pointer to the first matrix A_1 on the GPU of dimension (lda, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (lda, n).\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif trans = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n\n@param[in]\nB       Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (ldb, n).\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B_i.\n\nif trans = rocblas_operation_none,  ldb >= max( 1, n ),\notherwise ldb >= max( 1, k ).\n\n@param[in]\nstride_B  [rocblas_stride]\nstride from the start of one matrix (B_i) and the next one (B_i+1).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       Device pointer to the first matrix C_1 on the GPU.\nThe imaginary component of the diagonal elements are not used but are set to zero unless quick return.\nonly the upper/lower triangular part of each C_i is accessed.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n\n@param[inout]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nher2k_strided_batched performs a batch of the matrix-matrix operations for a Hermitian rank-2k update:\n\nC_i := alpha*op( A_i )*op( B_i )^H + conj(alpha)*op( B_i )*op( A_i )^H + beta*C_i,\n\nwhere  alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrices, and\nC_i is a n x n Hermitian matrix stored as either upper or lower.\n\nop( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == rocblas_operation_none\nop( A_i ) = A_i^H, op( B_i ) = B_i^H,  and A_i and B_i are k by n if trans == rocblas_operation_conjugate_transpose\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C_i is an upper triangular matrix\n- rocblas_fill_lower:  C_i is a  lower triangular matrix\n\n@param[in]\ntrans  [rocblas_operation]\n- rocblas_operation_conjugate_transpose: op( A_i ) = A_i^H, op( B_i ) = B_i^H\n- rocblas_operation_none:                op( A_i ) = A_i, op( B_i ) = B_i\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C_i. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       Device pointer to the first matrix A_1 on the GPU of dimension (lda, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (lda, n).\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif trans = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n\n@param[in]\nB       Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (ldb, n).\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B_i.\n\nif trans = rocblas_operation_none,  ldb >= max( 1, n ),\notherwise ldb >= max( 1, k ).\n\n@param[in]\nstride_B  [rocblas_stride]\nstride from the start of one matrix (B_i) and the next one (B_i+1).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       Device pointer to the first matrix C_1 on the GPU.\nThe imaginary component of the diagonal elements are not used but are set to zero unless quick return.\nonly the upper/lower triangular part of each C_i is accessed.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n\n@param[in, out]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_cher2k_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -6812,7 +9070,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nherkx_strided_batched performs a batch of the matrix-matrix operations for a Hermitian rank-k update:\n\nC_i := alpha*op( A_i )*op( B_i )^H + beta*C_i,\n\nwhere  alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrices, and\nC_i is a n x n Hermitian matrix stored as either upper or lower.\n\nThis routine should only be used when the caller can guarantee that the result of op( A )*op( B )^T will be Hermitian.\n\nop( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == rocblas_operation_none\nop( A_i ) = A_i^H, op( B_i ) = B_i^H,  and A_i and B_i are k by n if trans == rocblas_operation_conjugate_transpose\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C_i is an upper triangular matrix\n- rocblas_fill_lower:  C_i is a  lower triangular matrix\n\n@param[in]\ntrans  [rocblas_operation]\n- rocblas_operation_conjugate_transpose: op( A_i ) = A_i^H, op( B_i ) = B_i^H\n- rocblas_operation_none:                op( A_i ) = A_i, op( B_i ) = B_i\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C_i. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       Device pointer to the first matrix A_1 on the GPU of dimension (lda, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (lda, n).\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif trans = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1)\n\n@param[in]\nB       Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (ldb, n).\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B_i.\n\nif trans = rocblas_operation_none,  ldb >= max( 1, n ),\notherwise ldb >= max( 1, k ).\n\n@param[in]\nstride_B  [rocblas_stride]\nstride from the start of one matrix (B_i) and the next one (B_i+1)\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       Device pointer to the first matrix C_1 on the GPU.\nThe imaginary component of the diagonal elements are not used but are set to zero unless quick return.\nonly the upper/lower triangular part of each C_i is accessed.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n\n@param[inout]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nherkx_strided_batched performs a batch of the matrix-matrix operations for a Hermitian rank-k update:\n\nC_i := alpha*op( A_i )*op( B_i )^H + beta*C_i,\n\nwhere  alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrices, and\nC_i is a n x n Hermitian matrix stored as either upper or lower.\n\nThis routine should only be used when the caller can guarantee that the result of op( A )*op( B )^T will be Hermitian.\n\nop( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == rocblas_operation_none\nop( A_i ) = A_i^H, op( B_i ) = B_i^H,  and A_i and B_i are k by n if trans == rocblas_operation_conjugate_transpose\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C_i is an upper triangular matrix\n- rocblas_fill_lower:  C_i is a  lower triangular matrix\n\n@param[in]\ntrans  [rocblas_operation]\n- rocblas_operation_conjugate_transpose: op( A_i ) = A_i^H, op( B_i ) = B_i^H\n- rocblas_operation_none:                op( A_i ) = A_i, op( B_i ) = B_i\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C_i. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       Device pointer to the first matrix A_1 on the GPU of dimension (lda, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (lda, n).\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif trans = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1)\n\n@param[in]\nB       Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (ldb, n).\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B_i.\n\nif trans = rocblas_operation_none,  ldb >= max( 1, n ),\notherwise ldb >= max( 1, k ).\n\n@param[in]\nstride_B  [rocblas_stride]\nstride from the start of one matrix (B_i) and the next one (B_i+1)\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       Device pointer to the first matrix C_1 on the GPU.\nThe imaginary component of the diagonal elements are not used but are set to zero unless quick return.\nonly the upper/lower triangular part of each C_i is accessed.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n\n@param[in, out]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_cherkx_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -7007,7 +9265,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nsymm_strided_batched performs a batch of the matrix-matrix operations:\n\nC_i := alpha*A_i*B_i + beta*C_i if side == rocblas_side_left,\nC_i := alpha*B_i*A_i + beta*C_i if side == rocblas_side_right,\n\nwhere alpha and beta are scalars, B_i and C_i are m by n matrices, and\nA_i is a symmetric matrix stored as either upper or lower.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside  [rocblas_side]\n- rocblas_side_left:      C_i := alpha*A_i*B_i + beta*C_i\n- rocblas_side_right:     C_i := alpha*B_i*A_i + beta*C_i\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A_i is an upper triangular matrix\n- rocblas_fill_lower:  A_i is a  lower triangular matrix\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B_i and C_i. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B_i and C_i. n >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A_i and B_i are not referenced.\n\n@param[in]\nA       device pointer to first matrix A_1\n- A_i is m by m if side == rocblas_side_left\n- A_i is n by n if side == rocblas_side_right\nonly the upper/lower triangular part is accessed.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif side = rocblas_side_left,  lda >= max( 1, m ),\notherwise lda >= max( 1, n ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n\n@param[in]\nB       device pointer to first matrix B_1 of dimension (ldb, n) on the GPU.\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B_i. ldb >= max( 1, m ).\n\n@param[in]\nstride_B  [rocblas_stride]\nstride from the start of one matrix (B_i) and the next one (B_i+1).\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC        device pointer to first matrix C_1 of dimension (ldc, n) on the GPU.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, m ).\n\n@param[inout]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nsymm_strided_batched performs a batch of the matrix-matrix operations:\n\nC_i := alpha*A_i*B_i + beta*C_i if side == rocblas_side_left,\nC_i := alpha*B_i*A_i + beta*C_i if side == rocblas_side_right,\n\nwhere alpha and beta are scalars, B_i and C_i are m by n matrices, and\nA_i is a symmetric matrix stored as either upper or lower.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside  [rocblas_side]\n- rocblas_side_left:      C_i := alpha*A_i*B_i + beta*C_i\n- rocblas_side_right:     C_i := alpha*B_i*A_i + beta*C_i\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A_i is an upper triangular matrix\n- rocblas_fill_lower:  A_i is a  lower triangular matrix\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B_i and C_i. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B_i and C_i. n >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A_i and B_i are not referenced.\n\n@param[in]\nA       device pointer to first matrix A_1\n- A_i is m by m if side == rocblas_side_left\n- A_i is n by n if side == rocblas_side_right\nonly the upper/lower triangular part is accessed.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif side = rocblas_side_left,  lda >= max( 1, m ),\notherwise lda >= max( 1, n ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n\n@param[in]\nB       device pointer to first matrix B_1 of dimension (ldb, n) on the GPU.\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B_i. ldb >= max( 1, m ).\n\n@param[in]\nstride_B  [rocblas_stride]\nstride from the start of one matrix (B_i) and the next one (B_i+1).\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC        device pointer to first matrix C_1 of dimension (ldc, n) on the GPU.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, m ).\n\n@param[in, out]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_ssymm_strided_batched(
         handle: rocblas_handle,
         side: rocblas_side,
@@ -7230,7 +9488,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nsyrk_strided_batched performs a batch of the matrix-matrix operations for a symmetric rank-k update:\n\nC_i := alpha*op( A_i )*op( A_i )^T + beta*C_i,\n\nwhere  alpha and beta are scalars, op(A_i) is an n by k matrix, and\nC_i is a symmetric n x n matrix stored as either upper or lower.\n\nop( A_i ) = A_i, and A_i is n by k if transA == rocblas_operation_none\nop( A_i ) = A_i^T and A_i is k by n if transA == rocblas_operation_transpose\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C_i is an upper triangular matrix\n- rocblas_fill_lower:  C_i is a  lower triangular matrix\n\n@param[in]\ntransA  [rocblas_operation]\n- rocblas_operation_transpose:           op(A) = A^T\n- rocblas_operation_none:                op(A) = A\n- rocblas_operation_conjugate_transpose: op(A) = A^T\n\nrocblas_operation_conjugate_transpose is not supported for complex types. See cherk\nand zherk.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C_i. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       Device pointer to the first matrix A_1 on the GPU of dimension (lda, k)\nwhen transA is rocblas_operation_none, otherwise of dimension (lda, n).\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif transA = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       Device pointer to the first matrix C_1 on the GPU. on the GPU.\nonly the upper/lower triangular part of each C_i is accessed.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n\n@param[inout]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1)\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nsyrk_strided_batched performs a batch of the matrix-matrix operations for a symmetric rank-k update:\n\nC_i := alpha*op( A_i )*op( A_i )^T + beta*C_i,\n\nwhere  alpha and beta are scalars, op(A_i) is an n by k matrix, and\nC_i is a symmetric n x n matrix stored as either upper or lower.\n\nop( A_i ) = A_i, and A_i is n by k if transA == rocblas_operation_none\nop( A_i ) = A_i^T and A_i is k by n if transA == rocblas_operation_transpose\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C_i is an upper triangular matrix\n- rocblas_fill_lower:  C_i is a  lower triangular matrix\n\n@param[in]\ntransA  [rocblas_operation]\n- rocblas_operation_transpose:           op(A) = A^T\n- rocblas_operation_none:                op(A) = A\n- rocblas_operation_conjugate_transpose: op(A) = A^T\n\nrocblas_operation_conjugate_transpose is not supported for complex types. See cherk\nand zherk.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C_i. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       Device pointer to the first matrix A_1 on the GPU of dimension (lda, k)\nwhen transA is rocblas_operation_none, otherwise of dimension (lda, n).\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif transA = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       Device pointer to the first matrix C_1 on the GPU. on the GPU.\nonly the upper/lower triangular part of each C_i is accessed.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n\n@param[in, out]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1)\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_ssyrk_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -7457,7 +9715,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nsyr2k_strided_batched performs a batch of the matrix-matrix operations for a symmetric rank-2k update:\n\nC_i := alpha*(op( A_i )*op( B_i )^T + op( B_i )*op( A_i )^T) + beta*C_i,\n\nwhere  alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and\nC_i is a symmetric n x n matrix stored as either upper or lower.\n\nop( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == rocblas_operation_none\nop( A_i ) = A_i^T, op( B_i ) = B_i^T, and A_i and B_i are k by n if trans == rocblas_operation_transpose\nor for ssyr2k_strided_batched and dsyr2k_strided_batched when trans == rocblas_operation_conjugate_transpose\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C_i is an upper triangular matrix\n- rocblas_fill_lower:  C_i is a  lower triangular matrix\n\n@param[in]\ntrans  [rocblas_operation]\n- rocblas_operation_transpose:           op( A_i ) = A_i^T, op( B_i ) = B_i^T\n- rocblas_operation_none:                op( A_i ) = A_i, op( B_i ) = B_i\n- rocblas_operation_conjugate_transpose: op( A_i ) = A_i^T, op( B_i ) = B_i^T\n\nrocblas_operation_conjugate_transpose is not supported for complex types in csyr2k_strided_batched and zsyr2k_strided_batched.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C_i. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       Device pointer to the first matrix A_1 on the GPU of dimension (lda, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (lda, n).\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif trans = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1)\n\n@param[in]\nB       Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (ldb, n)\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B_i.\n\nif trans = rocblas_operation_none,  ldb >= max( 1, n ),\notherwise ldb >= max( 1, k ).\n\n@param[in]\nstride_B  [rocblas_stride]\nstride from the start of one matrix (B_i) and the next one (B_i+1)\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       Device pointer to the first matrix C_1 on the GPU.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n\n@param[inout]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nsyr2k_strided_batched performs a batch of the matrix-matrix operations for a symmetric rank-2k update:\n\nC_i := alpha*(op( A_i )*op( B_i )^T + op( B_i )*op( A_i )^T) + beta*C_i,\n\nwhere  alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and\nC_i is a symmetric n x n matrix stored as either upper or lower.\n\nop( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == rocblas_operation_none\nop( A_i ) = A_i^T, op( B_i ) = B_i^T, and A_i and B_i are k by n if trans == rocblas_operation_transpose\nor for ssyr2k_strided_batched and dsyr2k_strided_batched when trans == rocblas_operation_conjugate_transpose\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C_i is an upper triangular matrix\n- rocblas_fill_lower:  C_i is a  lower triangular matrix\n\n@param[in]\ntrans  [rocblas_operation]\n- rocblas_operation_transpose:           op( A_i ) = A_i^T, op( B_i ) = B_i^T\n- rocblas_operation_none:                op( A_i ) = A_i, op( B_i ) = B_i\n- rocblas_operation_conjugate_transpose: op( A_i ) = A_i^T, op( B_i ) = B_i^T\n\nrocblas_operation_conjugate_transpose is not supported for complex types in csyr2k_strided_batched and zsyr2k_strided_batched.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C_i. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       Device pointer to the first matrix A_1 on the GPU of dimension (lda, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (lda, n).\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif trans = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1)\n\n@param[in]\nB       Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (ldb, n)\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B_i.\n\nif trans = rocblas_operation_none,  ldb >= max( 1, n ),\notherwise ldb >= max( 1, k ).\n\n@param[in]\nstride_B  [rocblas_stride]\nstride from the start of one matrix (B_i) and the next one (B_i+1)\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       Device pointer to the first matrix C_1 on the GPU.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n\n@param[in, out]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_ssyr2k_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -7696,7 +9954,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nsyrkx_strided_batched performs a batch of the matrix-matrix operations for a symmetric rank-k update:\n\nC_i := alpha*op( A_i )*op( B_i )^T + beta*C_i,\n\nwhere  alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and\nC_i is a symmetric n x n matrix stored as either upper or lower.\n\nThis routine should only be used when the caller can guarantee that the result of op( A_i )*op( B_i )^T will be symmetric.\n\nop( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == rocblas_operation_none\nop( A_i ) = A_i^T, op( B_i ) = B_i^T,  and A_i and B_i are k by n if trans == rocblas_operation_transpose\nor for ssyrkx_strided_batched and dsyrkx_strided_batched when trans == rocblas_operation_conjugate_transpose\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C_i is an upper triangular matrix\n- rocblas_fill_lower:  C_i is a  lower triangular matrix\n\n@param[in]\ntrans  [rocblas_operation]\n- rocblas_operation_transpose:           op( A_i ) = A_i^T, op( B_i ) = B_i^T\n- rocblas_operation_none:                op( A_i ) = A_i, op( B_i ) = B_i\n- rocblas_operation_conjugate_transpose: op( A_i ) = A_i^T, op( B_i ) = B_i^T\n\nrocblas_operation_conjugate_transpose is not supported for complex types in csyrkx_strided_batched and zsyrkx_strided_batched.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C_i. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       Device pointer to the first matrix A_1 on the GPU of dimension (lda, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (lda, n)\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif trans = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n\n@param[in]\nB       Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (ldb, n).\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B_i.\n\nif trans = rocblas_operation_none,  ldb >= max( 1, n ),\notherwise ldb >= max( 1, k ).\n\n@param[in]\nstride_B  [rocblas_stride]\nstride from the start of one matrix (B_i) and the next one (B_i+1).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       Device pointer to the first matrix C_1 on the GPU.\nonly the upper/lower triangular part of each C_i is accessed.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n\n@param[inout]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nsyrkx_strided_batched performs a batch of the matrix-matrix operations for a symmetric rank-k update:\n\nC_i := alpha*op( A_i )*op( B_i )^T + beta*C_i,\n\nwhere  alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and\nC_i is a symmetric n x n matrix stored as either upper or lower.\n\nThis routine should only be used when the caller can guarantee that the result of op( A_i )*op( B_i )^T will be symmetric.\n\nop( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == rocblas_operation_none\nop( A_i ) = A_i^T, op( B_i ) = B_i^T,  and A_i and B_i are k by n if trans == rocblas_operation_transpose\nor for ssyrkx_strided_batched and dsyrkx_strided_batched when trans == rocblas_operation_conjugate_transpose\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C_i is an upper triangular matrix\n- rocblas_fill_lower:  C_i is a  lower triangular matrix\n\n@param[in]\ntrans  [rocblas_operation]\n- rocblas_operation_transpose:           op( A_i ) = A_i^T, op( B_i ) = B_i^T\n- rocblas_operation_none:                op( A_i ) = A_i, op( B_i ) = B_i\n- rocblas_operation_conjugate_transpose: op( A_i ) = A_i^T, op( B_i ) = B_i^T\n\nrocblas_operation_conjugate_transpose is not supported for complex types in csyrkx_strided_batched and zsyrkx_strided_batched.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C_i. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       Device pointer to the first matrix A_1 on the GPU of dimension (lda, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (lda, n)\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif trans = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n\n@param[in]\nB       Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (ldb, n).\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B_i.\n\nif trans = rocblas_operation_none,  ldb >= max( 1, n ),\notherwise ldb >= max( 1, k ).\n\n@param[in]\nstride_B  [rocblas_stride]\nstride from the start of one matrix (B_i) and the next one (B_i+1).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       Device pointer to the first matrix C_1 on the GPU.\nonly the upper/lower triangular part of each C_i is accessed.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n\n@param[in, out]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
     pub fn rocblas_ssyrkx_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -7785,231 +10043,8 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrmm performs one of the matrix-matrix operations:\n\nB := alpha*op( A )*B,   or\nB := alpha*B*op( A ),\n\nwhere  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or\nnon-unit,  upper or lower triangular matrix  and  op( A )  is one  of\n\nop( A ) = A   or\nop( A ) = A^T   or\nop( A ) = A^H.\n\nWhen uplo == rocblas_fill_upper the  leading  k by k\nupper triangular part of the array  A must contain the upper\ntriangular matrix and the strictly lower triangular part of\nA is not referenced. Here k is m when side == rocblas_side_left\nand is n when side == rocblas_side_right.\n\nWhen uplo == rocblas_fill_lower the  leading  k by k\nlower triangular part of the array  A must contain the lower\ntriangular matrix  and the strictly upper triangular part of\nA is not referenced. Here k is m when  side == rocblas_side_left\nand is n when side == rocblas_side_right.\n\nNote that when  diag == rocblas_diagonal_unit  the diagonal elements of\nA  are not referenced either,  but are assumed to be  unity.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside    [rocblas_side]\nSpecifies whether op(A) multiplies B from the left or right as follows:\n- rocblas_side_left:       B := alpha*op( A )*B\n- rocblas_side_right:      B := alpha*B*op( A )\n\n@param[in]\nuplo    [rocblas_fill]\nSpecifies whether the matrix A is an upper or lower triangular matrix as follows:\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\nSpecifies the form of op(A) to be used in the matrix multiplication as follows:\n- rocblas_operation_none:    op(A) = A\n- rocblas_operation_transpose:      op(A) = A^T\n- rocblas_operation_conjugate_transpose:  op(A) = A^H\n\n@param[in]\ndiag    [rocblas_diagonal]\nSpecifies whether or not A is unit triangular as follows:\n- rocblas_diagonal_unit:      A is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B. n >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and B need not be set before\nentry.\n\n@param[in]\nA       Device pointer to matrix A on the GPU.\nA has dimension ( lda, k ), where k is m\nwhen  side == rocblas_side_left  and\nis  n  when  side == rocblas_side_right.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif side == rocblas_side_left,  lda >= max( 1, m ),\nif side == rocblas_side_right, lda >= max( 1, n ).\n\n@param[inout]\nB       Device pointer to the first matrix B_0 on the GPU.\nOn entry,  the leading  m by n part of the array  B must\ncontain the matrix  B,  and  on exit  is overwritten  by the\ntransformed matrix.\n\n@param[in]\nldb    [rocblas_int]\nldb specifies the first dimension of B. ldb >= max( 1, m ).\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrmm performs one of the matrix-matrix operations:\n\nC := alpha*op( A )*B,   or\nC := alpha*B*op( A ),\n\nThe Legacy BLAS in-place trmm functionality,\n\nB := alpha*op( A )*B,   or\nB := alpha*B*op( A ),\n\nis available by setting pointer C equal to pointer B, and ldc equal to ldb.\n\nalpha  is a scalar,  B  is an m by n matrix, C  is an m by n matrix,  A  is a unit, or\nnon-unit,  upper or lower triangular matrix  and  op( A )  is one  of\n\nop( A ) = A     or\nop( A ) = A^T   or\nop( A ) = A^H.\n\nWhen uplo == rocblas_fill_upper the  leading  k by k\nupper triangular part of the array  A must contain the upper\ntriangular matrix and the strictly lower triangular part of\nA is not referenced. Here k is m when side == rocblas_side_left\nand is n when side == rocblas_side_right.\n\nWhen uplo == rocblas_fill_lower the  leading  k by k\nlower triangular part of the array  A must contain the lower\ntriangular matrix  and the strictly upper triangular part of\nA is not referenced. Here k is m when  side == rocblas_side_left\nand is n when side == rocblas_side_right.\n\nNote that when  diag == rocblas_diagonal_unit  the diagonal elements of\nA  are not referenced either,  but are assumed to be  unity.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside    [rocblas_side]\nSpecifies whether op(A) multiplies B from the left or right as follows:\n- rocblas_side_left:       C := alpha*op( A )*B\n- rocblas_side_right:      C := alpha*B*op( A )\n\n@param[in]\nuplo    [rocblas_fill]\nSpecifies whether the matrix A is an upper or lower triangular matrix as follows:\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\nSpecifies the form of op(A) to be used in the matrix multiplication as follows:\n- rocblas_operation_none:    op(A) = A\n- rocblas_operation_transpose:      op(A) = A^T\n- rocblas_operation_conjugate_transpose:  op(A) = A^H\n\n@param[in]\ndiag    [rocblas_diagonal]\nSpecifies whether or not A is unit triangular as follows:\n- rocblas_diagonal_unit:      A is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B. n >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and B need not be set before\nentry.\n\n@param[in]\nA       Device pointer to matrix A on the GPU.\nA has dimension ( lda, k ), where k is m\nwhen  side == rocblas_side_left  and\nis  n  when  side == rocblas_side_right.\n\nWhen uplo == rocblas_fill_upper the  leading  k by k\nupper triangular part of the array  A must contain the upper\ntriangular matrix  and the strictly lower triangular part of\nA is not referenced.\n\nWhen uplo == rocblas_fill_lower the  leading  k by k\nlower triangular part of the array  A must contain the lower\ntriangular matrix  and the strictly upper triangular part of\nA is not referenced.\n\nNote that when  diag == rocblas_diagonal_unit  the diagonal elements of\nA  are not referenced either,  but are assumed to be  unity.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif side == rocblas_side_left,  lda >= max( 1, m ),\nif side == rocblas_side_right, lda >= max( 1, n ).\n\n@param[in]\nB       Device pointer to the matrix B on the GPU.\n\n@param[in]\nldb    [rocblas_int]\nldb specifies the first dimension of B. ldb >= max( 1, m ).\n\n@param[out]\nC      Device pointer to the matrix C on the GPU.\n\n@param[in]\nldc   [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, m).\nIf B and C are pointers to the same matrix then ldc must equal ldb or\nrocblas_status_invalid_value will be returned.\n"]
     pub fn rocblas_strmm(
-        handle: rocblas_handle,
-        side: rocblas_side,
-        uplo: rocblas_fill,
-        transA: rocblas_operation,
-        diag: rocblas_diagonal,
-        m: rocblas_int,
-        n: rocblas_int,
-        alpha: *const f32,
-        A: *const f32,
-        lda: rocblas_int,
-        B: *mut f32,
-        ldb: rocblas_int,
-    ) -> rocblas_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocblas_dtrmm(
-        handle: rocblas_handle,
-        side: rocblas_side,
-        uplo: rocblas_fill,
-        transA: rocblas_operation,
-        diag: rocblas_diagonal,
-        m: rocblas_int,
-        n: rocblas_int,
-        alpha: *const f64,
-        A: *const f64,
-        lda: rocblas_int,
-        B: *mut f64,
-        ldb: rocblas_int,
-    ) -> rocblas_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocblas_ctrmm(
-        handle: rocblas_handle,
-        side: rocblas_side,
-        uplo: rocblas_fill,
-        transA: rocblas_operation,
-        diag: rocblas_diagonal,
-        m: rocblas_int,
-        n: rocblas_int,
-        alpha: *const rocblas_float_complex,
-        A: *const rocblas_float_complex,
-        lda: rocblas_int,
-        B: *mut rocblas_float_complex,
-        ldb: rocblas_int,
-    ) -> rocblas_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocblas_ztrmm(
-        handle: rocblas_handle,
-        side: rocblas_side,
-        uplo: rocblas_fill,
-        transA: rocblas_operation,
-        diag: rocblas_diagonal,
-        m: rocblas_int,
-        n: rocblas_int,
-        alpha: *const rocblas_double_complex,
-        A: *const rocblas_double_complex,
-        lda: rocblas_int,
-        B: *mut rocblas_double_complex,
-        ldb: rocblas_int,
-    ) -> rocblas_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrmm_batched performs one of the batched matrix-matrix operations:\n\nB_i := alpha*op( A_i )*B_i,   or\nB_i := alpha*B_i*op( A_i )  for i = 0, 1, ... batch_count -1,\n\nwhere  alpha  is a scalar,  B_i  is an m by n matrix,  A_i  is a unit, or\nnon-unit,  upper or lower triangular matrix  and  op( A_i )  is one  of\n\nop( A_i ) = A_i   or   op( A_i ) = A_i^T   or   op( A_i ) = A_i^H.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside    [rocblas_side]\nSpecifies whether op(A_i) multiplies B_i from the left or right as follows:\n- rocblas_side_left:       B_i := alpha*op( A_i )*B_i\n- rocblas_side_right:      B_i := alpha*B_i*op( A_i )\n\n@param[in]\nuplo    [rocblas_fill]\nSpecifies whether the matrix A is an upper or lower triangular matrix as follows:\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\nSpecifies the form of op(A_i) to be used in the matrix multiplication as follows:\n- rocblas_operation_none:    op(A_i) = A_i\n- rocblas_operation_transpose:      op(A_i) = A_i^T\n- rocblas_operation_conjugate_transpose:  op(A_i) = A_i^H\n\n@param[in]\ndiag    [rocblas_diagonal]\nSpecifies whether or not A_i is unit triangular as follows:\n- rocblas_diagonal_unit:      A_i is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B_i. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B_i. n >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A_i is not referenced and B_i need not be set before\nentry.\n\n@param[in]\nA       Device array of device pointers storing each matrix A_i on the GPU.\nEach A_i is of dimension ( lda, k ), where k is m\nwhen  side == rocblas_side_left  and\nis  n  when  side == rocblas_side_right.\n\nWhen uplo == rocblas_fill_upper the  leading  k by k\nupper triangular part of the array  A must contain the upper\ntriangular matrix  and the strictly lower triangular part of\nA is not referenced.\n\nWhen uplo == rocblas_fill_lower the  leading  k by k\nlower triangular part of the array  A must contain the lower\ntriangular matrix  and the strictly upper triangular part of\nA is not referenced.\n\nNote that when  diag == rocblas_diagonal_unit  the diagonal elements of\nA_i  are not referenced either,  but are assumed to be  unity.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif side == rocblas_side_left,  lda >= max( 1, m ),\nif side == rocblas_side_right, lda >= max( 1, n ).\n\n@param[inout]\nB       device array of device pointers storing each matrix B_i on the GPU.\nOn entry,  the leading  m by n part of the array  B_i must\ncontain the matrix  B_i,  and  on exit  is overwritten  by the\ntransformed matrix.\n\n@param[in]\nldb    [rocblas_int]\nldb specifies the first dimension of B_i. ldb >= max( 1, m ).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances i in the batch."]
-    pub fn rocblas_strmm_batched(
-        handle: rocblas_handle,
-        side: rocblas_side,
-        uplo: rocblas_fill,
-        transA: rocblas_operation,
-        diag: rocblas_diagonal,
-        m: rocblas_int,
-        n: rocblas_int,
-        alpha: *const f32,
-        A: *const *const f32,
-        lda: rocblas_int,
-        B: *const *mut f32,
-        ldb: rocblas_int,
-        batch_count: rocblas_int,
-    ) -> rocblas_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocblas_dtrmm_batched(
-        handle: rocblas_handle,
-        side: rocblas_side,
-        uplo: rocblas_fill,
-        transA: rocblas_operation,
-        diag: rocblas_diagonal,
-        m: rocblas_int,
-        n: rocblas_int,
-        alpha: *const f64,
-        A: *const *const f64,
-        lda: rocblas_int,
-        B: *const *mut f64,
-        ldb: rocblas_int,
-        batch_count: rocblas_int,
-    ) -> rocblas_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocblas_ctrmm_batched(
-        handle: rocblas_handle,
-        side: rocblas_side,
-        uplo: rocblas_fill,
-        transA: rocblas_operation,
-        diag: rocblas_diagonal,
-        m: rocblas_int,
-        n: rocblas_int,
-        alpha: *const rocblas_float_complex,
-        A: *const *const rocblas_float_complex,
-        lda: rocblas_int,
-        B: *const *mut rocblas_float_complex,
-        ldb: rocblas_int,
-        batch_count: rocblas_int,
-    ) -> rocblas_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocblas_ztrmm_batched(
-        handle: rocblas_handle,
-        side: rocblas_side,
-        uplo: rocblas_fill,
-        transA: rocblas_operation,
-        diag: rocblas_diagonal,
-        m: rocblas_int,
-        n: rocblas_int,
-        alpha: *const rocblas_double_complex,
-        A: *const *const rocblas_double_complex,
-        lda: rocblas_int,
-        B: *const *mut rocblas_double_complex,
-        ldb: rocblas_int,
-        batch_count: rocblas_int,
-    ) -> rocblas_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrmm_strided_batched performs one of the strided_batched matrix-matrix operations:\n\nB_i := alpha*op( A_i )*B_i,   or\nB_i := alpha*B_i*op( A_i )  for i = 0, 1, ... batch_count -1,\n\nwhere  alpha  is a scalar,  B_i  is an m by n matrix,  A_i  is a unit, or\nnon-unit,  upper or lower triangular matrix  and  op( A_i )  is one  of\n\nop( A_i ) = A_i   or\nop( A_i ) = A_i^T   or\nop( A_i ) = A_i^H.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside    [rocblas_side]\nSpecifies whether op(A_i) multiplies B_i from the left or right as follows:\n- rocblas_side_left:       B_i := alpha*op( A_i )*B_i\n- rocblas_side_right:      B_i := alpha*B_i*op( A_i )\n\n@param[in]\nuplo    [rocblas_fill]\nSpecifies whether the matrix A is an upper or lower triangular matrix as follows:\n- rocblas_fill_upper:  A is an upper triangular matrix\n- rocblas_fill_lower:  A is a  lower triangular matrix\n\n@param[in]\ntransA  [rocblas_operation]\nSpecifies the form of op(A_i) to be used in the matrix multiplication as follows:\n- rocblas_operation_none:    op(A_i) = A_i\n- rocblas_operation_transpose:      op(A_i) = A_i^T\n- rocblas_operation_conjugate_transpose:  op(A_i) = A_i^H\n\n@param[in]\ndiag    [rocblas_diagonal]\nSpecifies whether or not A_i is unit triangular as follows:\n- rocblas_diagonal_unit:      A_i is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B_i. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B_i. n >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A_i is not referenced and B_i need not be set before\nentry.\n\n@param[in]\nA       Device pointer to the first matrix A_0 on the GPU.\nEach A_i is of dimension ( lda, k ), where k is m\nwhen  side == rocblas_side_left  and\nis  n  when  side == rocblas_side_right.\n\nWhen uplo == rocblas_fill_upper the  leading  k by k\nupper triangular part of the array  A must contain the upper\ntriangular matrix  and the strictly lower triangular part of\nA is not referenced.\n\nWhen uplo == rocblas_fill_lower the  leading  k by k\nlower triangular part of the array  A must contain the lower\ntriangular matrix  and the strictly upper triangular part of\nA is not referenced.\n\nNote that when  diag == rocblas_diagonal_unit  the diagonal elements of\nA_i  are not referenced either,  but are assumed to be  unity.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif side == rocblas_side_left,  lda >= max( 1, m ),\nif side == rocblas_side_right, lda >= max( 1, n ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n\n@param[inout]\nB       Device pointer to the first matrix B_0 on the GPU.\nOn entry,  the leading  m by n part of the array  B_i must\ncontain the matrix  B_i,  and  on exit  is overwritten  by the\ntransformed matrix.\n\n@param[in]\nldb    [rocblas_int]\nldb specifies the first dimension of B_i. ldb >= max( 1, m ).\n\n@param[in]\nstride_B  [rocblas_stride]\nstride from the start of one matrix (B_i) and the next one (B_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances i in the batch."]
-    pub fn rocblas_strmm_strided_batched(
-        handle: rocblas_handle,
-        side: rocblas_side,
-        uplo: rocblas_fill,
-        transA: rocblas_operation,
-        diag: rocblas_diagonal,
-        m: rocblas_int,
-        n: rocblas_int,
-        alpha: *const f32,
-        A: *const f32,
-        lda: rocblas_int,
-        stride_A: rocblas_stride,
-        B: *mut f32,
-        ldb: rocblas_int,
-        stride_B: rocblas_stride,
-        batch_count: rocblas_int,
-    ) -> rocblas_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocblas_dtrmm_strided_batched(
-        handle: rocblas_handle,
-        side: rocblas_side,
-        uplo: rocblas_fill,
-        transA: rocblas_operation,
-        diag: rocblas_diagonal,
-        m: rocblas_int,
-        n: rocblas_int,
-        alpha: *const f64,
-        A: *const f64,
-        lda: rocblas_int,
-        stride_A: rocblas_stride,
-        B: *mut f64,
-        ldb: rocblas_int,
-        stride_B: rocblas_stride,
-        batch_count: rocblas_int,
-    ) -> rocblas_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocblas_ctrmm_strided_batched(
-        handle: rocblas_handle,
-        side: rocblas_side,
-        uplo: rocblas_fill,
-        transA: rocblas_operation,
-        diag: rocblas_diagonal,
-        m: rocblas_int,
-        n: rocblas_int,
-        alpha: *const rocblas_float_complex,
-        A: *const rocblas_float_complex,
-        lda: rocblas_int,
-        stride_A: rocblas_stride,
-        B: *mut rocblas_float_complex,
-        ldb: rocblas_int,
-        stride_B: rocblas_stride,
-        batch_count: rocblas_int,
-    ) -> rocblas_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocblas_ztrmm_strided_batched(
-        handle: rocblas_handle,
-        side: rocblas_side,
-        uplo: rocblas_fill,
-        transA: rocblas_operation,
-        diag: rocblas_diagonal,
-        m: rocblas_int,
-        n: rocblas_int,
-        alpha: *const rocblas_double_complex,
-        A: *const rocblas_double_complex,
-        lda: rocblas_int,
-        stride_A: rocblas_stride,
-        B: *mut rocblas_double_complex,
-        ldb: rocblas_int,
-        stride_B: rocblas_stride,
-        batch_count: rocblas_int,
-    ) -> rocblas_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrmm_outofplace performs one of the matrix-matrix operations:\n\nC := alpha*op( A )*B,   or\nC := alpha*B*op( A ),\n\nwhere  alpha  is a scalar,  B and C are m by n matrices,  A  is a unit, or\nnon-unit,  upper or lower triangular matrix  and  op( A )  is one  of\n\nop( A ) = A   or\nop( A ) = A^T   or\nop( A ) = A^H.\n\nNote that trmm_outofplace can provide in-place functionality in the same way as trmm\nby passing in the same address for both matrices B and C.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside    [rocblas_side]\nSpecifies whether op(A) multiplies B from the left or right as follows:\n- rocblas_side_left:       C := alpha*op( A )*B\n- rocblas_side_right:      C := alpha*B*op( A )\n\n@param[in]\nuplo    [rocblas_fill]\nSpecifies whether the matrix A is an upper or lower triangular matrix as follows:\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\nSpecifies the form of op(A) to be used in the matrix multiplication as follows:\n- rocblas_operation_none:    op(A) = A\n- rocblas_operation_transpose:      op(A) = A^T\n- rocblas_operation_conjugate_transpose:  op(A) = A^H\n\n@param[in]\ndiag    [rocblas_diagonal]\nSpecifies whether or not A is unit triangular as follows:\n- rocblas_diagonal_unit:      A is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B. n >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and B need not be set before\nentry.\n\n@param[in]\nA       Device pointer to matrix A on the GPU.\nA has dimension ( lda, k ), where k is m\nwhen  side == rocblas_side_left  and\nis  n  when  side == rocblas_side_right.\n\nWhen uplo == rocblas_fill_upper the  leading  k by k\nupper triangular part of the array  A must contain the upper\ntriangular matrix  and the strictly lower triangular part of\nA is not referenced.\n\nWhen uplo == rocblas_fill_lower the  leading  k by k\nlower triangular part of the array  A must contain the lower\ntriangular matrix  and the strictly upper triangular part of\nA is not referenced.\n\nNote that when  diag == rocblas_diagonal_unit  the diagonal elements of\nA  are not referenced either,  but are assumed to be  unity.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif side == rocblas_side_left,  lda >= max( 1, m ),\nif side == rocblas_side_right, lda >= max( 1, n ).\n\n@param[in]\nB       Device pointer to the matrix B on the GPU.\n\n@param[in]\nldb    [rocblas_int]\nldb specifies the first dimension of B. ldb >= max( 1, m ).\n\n@param[out]\nC      Device pointer to the matrix C on the GPU.\n\n@param[in]\nldc   [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, m).\nIf B and C pointers are to the same matrix then ldc must equal ldb or\nrocblas_status_invalid_size will be returned.\n"]
-    pub fn rocblas_strmm_outofplace(
         handle: rocblas_handle,
         side: rocblas_side,
         uplo: rocblas_fill,
@@ -8028,7 +10063,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    pub fn rocblas_dtrmm_outofplace(
+    pub fn rocblas_dtrmm(
         handle: rocblas_handle,
         side: rocblas_side,
         uplo: rocblas_fill,
@@ -8047,7 +10082,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    pub fn rocblas_ctrmm_outofplace(
+    pub fn rocblas_ctrmm(
         handle: rocblas_handle,
         side: rocblas_side,
         uplo: rocblas_fill,
@@ -8066,7 +10101,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    pub fn rocblas_ztrmm_outofplace(
+    pub fn rocblas_ztrmm(
         handle: rocblas_handle,
         side: rocblas_side,
         uplo: rocblas_fill,
@@ -8085,8 +10120,8 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrmm_outofplace_batched performs one of the batched matrix-matrix operations:\n\nC_i := alpha*op( A_i )*B_i,   or\nC_i := alpha*B_i*op( A_i )  for i = 0, 1, ... batch_count -1,\n\nwhere  alpha  is a scalar,  B_i  is an m by n matrix,  A_i  is a unit, or\nnon-unit,  upper or lower triangular matrix  and  op( A_i )  is one  of\n\nop( A_i ) = A_i   or\nop( A_i ) = A_i^T   or\nop( A_i ) = A_i^H.\n\nNote that trmm_outofplace_batched can provide in-place functionality in the same way as trmm_batched\nby passing in the same address for both matrices B and C.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside    [rocblas_side]\nSpecifies whether op(A_i) multiplies B_i from the left or right as follows:\n- rocblas_side_left:       C_i := alpha*op( A_i )*B_i\n- rocblas_side_right:      C_i := alpha*B_i*op( A_i )\n\n@param[in]\nuplo    [rocblas_fill]\nSpecifies whether the matrix A is an upper or lower triangular matrix as follows:\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\nSpecifies the form of op(A_i) to be used in the matrix multiplication as follows:\n- rocblas_operation_none:    op(A_i) = A_i\n- rocblas_operation_transpose:      op(A_i) = A_i^T\n- rocblas_operation_conjugate_transpose:  op(A_i) = A_i^H\n\n@param[in]\ndiag    [rocblas_diagonal]\nSpecifies whether or not A_i is unit triangular as follows:\n- rocblas_diagonal_unit:      A_i is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B_i. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B_i. n >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A_i is not referenced and B_i need not be set before\nentry.\n\n@param[in]\nA       Device array of device pointers storing each matrix A_i on the GPU.\nEach A_i is of dimension ( lda, k ), where k is m\nwhen  side == rocblas_side_left  and\nis  n  when  side == rocblas_side_right.\n\nWhen uplo == rocblas_fill_upper the  leading  k by k\nupper triangular part of the array  A must contain the upper\ntriangular matrix  and the strictly lower triangular part of\nA is not referenced.\n\nWhen uplo == rocblas_fill_lower the  leading  k by k\nlower triangular part of the array  A must contain the lower\ntriangular matrix  and the strictly upper triangular part of\nA is not referenced.\n\nNote that when  diag == rocblas_diagonal_unit  the diagonal elements of\nA_i  are not referenced either,  but are assumed to be  unity.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif side == rocblas_side_left,  lda >= max( 1, m ),\nif side == rocblas_side_right, lda >= max( 1, n ).\n\n@param[in]\nB       device array of device pointers storing each matrix B_i on the GPU.\n\n@param[in]\nldb    [rocblas_int]\nldb specifies the first dimension of B_i. ldb >= max( 1, m ).\n\n@param[out]\nC      device array of device pointers storing each matrix C_i on the GPU.\n\n@param[in]\nldc   [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, m).\nIf B and C pointers are to the same matrix then ldc must equal ldb or\nrocblas_status_invalid_size will be returned.\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances i in the batch."]
-    pub fn rocblas_strmm_outofplace_batched(
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrmm_batched performs one of the matrix-matrix operations:\n\nC_i := alpha*op( A_i )*B_i,   or\nC_i := alpha*B_i*op( A_i )  for i = 0, 1, ... batch_count -1,\n\nThe Legacy BLAS in-place trmm_batched functionality,\n\nB_i := alpha*op( A_i )*B_i,   or\nB_i := alpha*B_i*op( A_i )  for i = 0, 1, ... batch_count -1,\n\nis available by setting pointer C equal to pointer B and ldc equal to ldb.\n\nalpha  is a scalar,  B_i  is an m by n matrix, C_i  is an m by n matrix,  A_i  is a unit, or\nnon-unit,  upper or lower triangular matrix  and  op( A_i )  is one  of\n\nop( A_i ) = A_i     or\nop( A_i ) = A_i^T   or\nop( A_i ) = A_i^H.\n\nWhen uplo == rocblas_fill_upper the  leading  k by k\nupper triangular part of the array  A must contain the upper\ntriangular matrix and the strictly lower triangular part of\nA is not referenced. Here k is m when side == rocblas_side_left\nand is n when side == rocblas_side_right.\n\nWhen uplo == rocblas_fill_lower the  leading  k by k\nlower triangular part of the array  A must contain the lower\ntriangular matrix  and the strictly upper triangular part of\nA is not referenced. Here k is m when  side == rocblas_side_left\nand is n when side == rocblas_side_right.\n\nNote that when  diag == rocblas_diagonal_unit  the diagonal elements of\nA  are not referenced either,  but are assumed to be  unity.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside    [rocblas_side]\nSpecifies whether op(A_i) multiplies B_i from the left or right as follows:\n- rocblas_side_left:       C_i := alpha*op( A_i )*B_i\n- rocblas_side_right:      C_i := alpha*B_i*op( A_i )\n\n@param[in]\nuplo    [rocblas_fill]\nSpecifies whether the matrix A is an upper or lower triangular matrix as follows:\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\nSpecifies the form of op(A_i) to be used in the matrix multiplication as follows:\n- rocblas_operation_none:    op(A_i) = A_i\n- rocblas_operation_transpose:      op(A_i) = A_i^T\n- rocblas_operation_conjugate_transpose:  op(A_i) = A_i^H\n\n@param[in]\ndiag    [rocblas_diagonal]\nSpecifies whether or not A_i is unit triangular as follows:\n- rocblas_diagonal_unit:      A_i is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B_i. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B_i. n >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A_i is not referenced and B_i need not be set before\nentry.\n\n@param[in]\nA       Device array of device pointers storing each matrix A_i on the GPU.\nEach A_i is of dimension ( lda, k ), where k is m\nwhen  side == rocblas_side_left  and\nis  n  when  side == rocblas_side_right.\n\nWhen uplo == rocblas_fill_upper the  leading  k by k\nupper triangular part of the array  A must contain the upper\ntriangular matrix  and the strictly lower triangular part of\nA is not referenced.\n\nWhen uplo == rocblas_fill_lower the  leading  k by k\nlower triangular part of the array  A must contain the lower\ntriangular matrix  and the strictly upper triangular part of\nA is not referenced.\n\nNote that when  diag == rocblas_diagonal_unit  the diagonal elements of\nA_i  are not referenced either,  but are assumed to be  unity.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif side == rocblas_side_left,  lda >= max( 1, m ),\nif side == rocblas_side_right, lda >= max( 1, n ).\n\n@param[in]\nB       device array of device pointers storing each matrix B_i on the GPU.\n\n@param[in]\nldb    [rocblas_int]\nldb specifies the first dimension of B_i. ldb >= max( 1, m ).\n\n@param[out]\nC      device array of device pointers storing each matrix C_i on the GPU.\n\n@param[in]\nldc   [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, m).\nIf B and C are pointers to the same array of pointers then ldc must\nequal ldb or rocblas_status_invalid_value will be returned.\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances i in the batch."]
+    pub fn rocblas_strmm_batched(
         handle: rocblas_handle,
         side: rocblas_side,
         uplo: rocblas_fill,
@@ -8106,7 +10141,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    pub fn rocblas_dtrmm_outofplace_batched(
+    pub fn rocblas_dtrmm_batched(
         handle: rocblas_handle,
         side: rocblas_side,
         uplo: rocblas_fill,
@@ -8126,7 +10161,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    pub fn rocblas_ctrmm_outofplace_batched(
+    pub fn rocblas_ctrmm_batched(
         handle: rocblas_handle,
         side: rocblas_side,
         uplo: rocblas_fill,
@@ -8146,7 +10181,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    pub fn rocblas_ztrmm_outofplace_batched(
+    pub fn rocblas_ztrmm_batched(
         handle: rocblas_handle,
         side: rocblas_side,
         uplo: rocblas_fill,
@@ -8166,8 +10201,8 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrmm_outofplace_strided_batched performs one of the strided_batched matrix-matrix operations:\n\nC_i := alpha*op( A_i )*B_i,   or\nC_i := alpha*B_i*op( A_i )  for i = 0, 1, ... batch_count -1,\n\nwhere  alpha  is a scalar,  B_i  is an m by n matrix,  A_i  is a unit, or\nnon-unit,  upper or lower triangular matrix  and  op( A_i )  is one  of\n\nop( A_i ) = A_i   or\nop( A_i ) = A_i^T   or\nop( A_i ) = A_i^H.\n\nNote that trmm_outofplace_strided_batched can provide in-place functionality in the same way as trmm_strided_batched\nby passing in the same address for both matrices B and C.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside    [rocblas_side]\nSpecifies whether op(A_i) multiplies B_i from the left or right as follows:\n- rocblas_side_left:       C_i := alpha*op( A_i )*B_i\n- rocblas_side_right:      C_i := alpha*B_i*op( A_i )\n\n@param[in]\nuplo    [rocblas_fill]\nSpecifies whether the matrix A is an upper or lower triangular matrix as follows:\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\nSpecifies the form of op(A_i) to be used in the matrix multiplication as follows:\n- rocblas_operation_none:    op(A_i) = A_i\n- rocblas_operation_transpose:      op(A_i) = A_i^T\n- rocblas_operation_conjugate_transpose:  op(A_i) = A_i^H\n\n@param[in]\ndiag    [rocblas_diagonal]\nSpecifies whether or not A_i is unit triangular as follows:\n- rocblas_diagonal_unit:      A_i is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B_i. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B_i. n >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A_i is not referenced and B_i need not be set before\nentry.\n\n@param[in]\nA       Device pointer to the first matrix A_0 on the GPU.\nEach A_i is of dimension ( lda, k ), where k is m\nwhen  side == rocblas_side_left  and\nis  n  when  side == rocblas_side_right.\n\nWhen uplo == rocblas_fill_upper the  leading  k by k\nupper triangular part of the array  A must contain the upper\ntriangular matrix  and the strictly lower triangular part of\nA is not referenced.\n\nWhen uplo == rocblas_fill_lower the  leading  k by k\nlower triangular part of the array  A must contain the lower\ntriangular matrix  and the strictly upper triangular part of\nA is not referenced.\n\nNote that when  diag == rocblas_diagonal_unit  the diagonal elements of\nA_i  are not referenced either,  but are assumed to be  unity.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif side == rocblas_side_left,  lda >= max( 1, m ),\nif side == rocblas_side_right, lda >= max( 1, n ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n\n@param[in]\nB       Device pointer to the first matrix B_0 on the GPU.\n\n@param[in]\nldb    [rocblas_int]\nldb specifies the first dimension of B_i. ldb >= max( 1, m ).\n\n@param[in]\nstride_B  [rocblas_stride]\nstride from the start of one matrix (B_i) and the next one (B_i+1).\n\n@param[out]\nC      Device pointer to the first matrix C_0 on the GPU.\n\n@param[in]\nldc   [rocblas_int]\nldc specifies the first dimension of C_i. ldc >= max( 1, m).\nIf B and C pointers are to the same matrix then ldc must equal ldb or\nrocblas_status_invalid_size will be returned.\n\n@param[in]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances i in the batch."]
-    pub fn rocblas_strmm_outofplace_strided_batched(
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrmm_strided_batched performs one of the matrix-matrix operations:\n\nC_i := alpha*op( A_i )*B_i,   or\nC_i := alpha*B_i*op( A_i )  for i = 0, 1, ... batch_count -1,\n\nThe Legacy BLAS in-place trmm_strided_batched functionality,\n\nB_i := alpha*op( A_i )*B_i,   or\nB_i := alpha*B_i*op( A_i )  for i = 0, 1, ... batch_count -1,\n\nis available by setting pointer C equal to pointer B, ldc equal to ldb, and stride_C equal to stride_B.\n\nalpha  is a scalar,  B_i  is an m by n matrix, C_i  is an m by n matrix,  A_i  is a unit, or\nnon-unit,  upper or lower triangular matrix  and  op( A_i )  is one  of\n\nop( A_i ) = A_i   or\nop( A_i ) = A_i^T   or\nop( A_i ) = A_i^H.\n\nWhen uplo == rocblas_fill_upper the  leading  k by k\nupper triangular part of the array  A must contain the upper\ntriangular matrix and the strictly lower triangular part of\nA is not referenced. Here k is m when side == rocblas_side_left\nand is n when side == rocblas_side_right.\n\nWhen uplo == rocblas_fill_lower the  leading  k by k\nlower triangular part of the array  A must contain the lower\ntriangular matrix  and the strictly upper triangular part of\nA is not referenced. Here k is m when  side == rocblas_side_left\nand is n when side == rocblas_side_right.\n\nNote that when  diag == rocblas_diagonal_unit  the diagonal elements of\nA  are not referenced either,  but are assumed to be  unity.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside    [rocblas_side]\nSpecifies whether op(A_i) multiplies B_i from the left or right as follows:\n- rocblas_side_left:       C_i := alpha*op( A_i )*B_i\n- rocblas_side_right:      C_i := alpha*B_i*op( A_i )\n\n@param[in]\nuplo    [rocblas_fill]\nSpecifies whether the matrix A is an upper or lower triangular matrix as follows:\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\nSpecifies the form of op(A_i) to be used in the matrix multiplication as follows:\n- rocblas_operation_none:    op(A_i) = A_i\n- rocblas_operation_transpose:      op(A_i) = A_i^T\n- rocblas_operation_conjugate_transpose:  op(A_i) = A_i^H\n\n@param[in]\ndiag    [rocblas_diagonal]\nSpecifies whether or not A_i is unit triangular as follows:\n- rocblas_diagonal_unit:      A_i is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B_i. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B_i. n >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A_i is not referenced and B_i need not be set before\nentry.\n\n@param[in]\nA       Device pointer to the first matrix A_0 on the GPU.\nEach A_i is of dimension ( lda, k ), where k is m\nwhen  side == rocblas_side_left  and\nis  n  when  side == rocblas_side_right.\n\nWhen uplo == rocblas_fill_upper the  leading  k by k\nupper triangular part of the array  A must contain the upper\ntriangular matrix  and the strictly lower triangular part of\nA is not referenced.\n\nWhen uplo == rocblas_fill_lower the  leading  k by k\nlower triangular part of the array  A must contain the lower\ntriangular matrix  and the strictly upper triangular part of\nA is not referenced.\n\nNote that when  diag == rocblas_diagonal_unit  the diagonal elements of\nA_i  are not referenced either,  but are assumed to be  unity.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif side == rocblas_side_left,  lda >= max( 1, m ),\nif side == rocblas_side_right, lda >= max( 1, n ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n\n@param[in]\nB       Device pointer to the first matrix B_0 on the GPU.\n\n@param[in]\nldb    [rocblas_int]\nldb specifies the first dimension of B_i. ldb >= max( 1, m ).\n\n@param[in]\nstride_B  [rocblas_stride]\nstride from the start of one matrix (B_i) and the next one (B_i+1).\n\n@param[out]\nC      Device pointer to the first matrix C_0 on the GPU.\n\n@param[in]\nldc   [rocblas_int]\nldc specifies the first dimension of C_i. ldc >= max( 1, m).\nIf B and C pointers are to the same matrix then ldc must equal ldb or\nrocblas_status_invalid_size will be returned.\n\n@param[in]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1).\nIf B == C and ldb == ldc then stride_C should equal stride_B or\nbehavior is undefined.\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances i in the batch."]
+    pub fn rocblas_strmm_strided_batched(
         handle: rocblas_handle,
         side: rocblas_side,
         uplo: rocblas_fill,
@@ -8190,7 +10225,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    pub fn rocblas_dtrmm_outofplace_strided_batched(
+    pub fn rocblas_dtrmm_strided_batched(
         handle: rocblas_handle,
         side: rocblas_side,
         uplo: rocblas_fill,
@@ -8213,7 +10248,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    pub fn rocblas_ctrmm_outofplace_strided_batched(
+    pub fn rocblas_ctrmm_strided_batched(
         handle: rocblas_handle,
         side: rocblas_side,
         uplo: rocblas_fill,
@@ -8236,7 +10271,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    pub fn rocblas_ztrmm_outofplace_strided_batched(
+    pub fn rocblas_ztrmm_strided_batched(
         handle: rocblas_handle,
         side: rocblas_side,
         uplo: rocblas_fill,
@@ -8259,7 +10294,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrtri  compute the inverse of a matrix A, namely, invA\nand write the result into invA;\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n\nif rocblas_fill_upper, the lower part of A is not referenced\nif rocblas_fill_lower, the upper part of A is not referenced\n@param[in]\ndiag      [rocblas_diagonal]\n- 'rocblas_diagonal_non_unit', A is non-unit triangular;\n- 'rocblas_diagonal_unit', A is unit triangular;\n@param[in]\nn         [rocblas_int]\nsize of matrix A and invA.\n@param[in]\nA         device pointer storing matrix A.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n@param[out]\ninvA      device pointer storing matrix invA.\n@param[in]\nldinvA    [rocblas_int]\nspecifies the leading dimension of invA."]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrtri  compute the inverse of a matrix A, namely, invA\nand write the result into invA;\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n\nif rocblas_fill_upper, the lower part of A is not referenced\nif rocblas_fill_lower, the upper part of A is not referenced\n@param[in]\ndiag      [rocblas_diagonal]\n- 'rocblas_diagonal_non_unit', A is non-unit triangular;\n- 'rocblas_diagonal_unit', A is unit triangular;\n@param[in]\nn         [rocblas_int]\nsize of matrix A and invA.\n@param[in]\nA         device pointer storing matrix A.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n@param[out]\ninvA      device pointer storing matrix invA.\nPartial inplace operation is supported. See below:\n-If UPLO = 'U', the leading N-by-N upper triangular part of the invA will store\nthe inverse of the upper triangular matrix, and the strictly lower\ntriangular part of invA may be cleared.\n- If UPLO = 'L', the leading N-by-N lower triangular part of the invA will store\nthe inverse of the lower triangular matrix, and the strictly upper\ntriangular part of invA may be cleared.\n@param[in]\nldinvA    [rocblas_int]\nspecifies the leading dimension of invA."]
     pub fn rocblas_strtri(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -8312,7 +10347,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrtri_batched  compute the inverse of A_i and write into invA_i where\nA_i and invA_i are the i-th matrices in the batch,\nfor i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n@param[in]\ndiag      [rocblas_diagonal]\n- 'rocblas_diagonal_non_unit', A is non-unit triangular;\n- 'rocblas_diagonal_unit', A is unit triangular;\n@param[in]\nn         [rocblas_int]\n@param[in]\nA         device array of device pointers storing each matrix A_i.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[out]\ninvA      device array of device pointers storing the inverse of each matrix A_i.\nPartial inplace operation is supported. See below:\n-If UPLO = 'U', the leading N-by-N upper triangular part of the invA will store\nthe inverse of the upper triangular matrix, and the strictly lower\ntriangular part of invA is cleared.\n- If UPLO = 'L', the leading N-by-N lower triangular part of the invA will store\nthe inverse of the lower triangular matrix, and the strictly upper\ntriangular part of invA is cleared.\n@param[in]\nldinvA    [rocblas_int]\nspecifies the leading dimension of each invA_i.\n@param[in]\nbatch_count [rocblas_int]\nnumbers of matrices in the batch."]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrtri_batched  compute the inverse of A_i and write into invA_i where\nA_i and invA_i are the i-th matrices in the batch,\nfor i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n@param[in]\ndiag      [rocblas_diagonal]\n- 'rocblas_diagonal_non_unit', A is non-unit triangular;\n- 'rocblas_diagonal_unit', A is unit triangular;\n@param[in]\nn         [rocblas_int]\n@param[in]\nA         device array of device pointers storing each matrix A_i.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[out]\ninvA      device array of device pointers storing the inverse of each matrix A_i.\nPartial inplace operation is supported. See below:\n-If UPLO = 'U', the leading N-by-N upper triangular part of the invA will store\nthe inverse of the upper triangular matrix, and the strictly lower\ntriangular part of invA may be cleared.\n- If UPLO = 'L', the leading N-by-N lower triangular part of the invA will store\nthe inverse of the lower triangular matrix, and the strictly upper\ntriangular part of invA may be cleared.\n@param[in]\nldinvA    [rocblas_int]\nspecifies the leading dimension of each invA_i.\n@param[in]\nbatch_count [rocblas_int]\nnumbers of matrices in the batch."]
     pub fn rocblas_strtri_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -8369,7 +10404,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrtri_strided_batched compute the inverse of A_i and write into invA_i where\nA_i and invA_i are the i-th matrices in the batch,\nfor i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n@param[in]\ndiag      [rocblas_diagonal]\n- 'rocblas_diagonal_non_unit', A is non-unit triangular;\n- 'rocblas_diagonal_unit', A is unit triangular;\n@param[in]\nn         [rocblas_int]\n@param[in]\nA         device pointer pointing to address of first matrix A_1.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A.\n@param[in]\nstride_a  [rocblas_stride]\n\"batch stride a\": stride from the start of one A_i matrix to the next A_(i + 1).\n@param[out]\ninvA      device pointer storing the inverses of each matrix A_i.\nPartial inplace operation is supported. See below:\n\n- If UPLO = 'U', the leading N-by-N upper triangular part of the invA will store\nthe inverse of the upper triangular matrix, and the strictly lower\ntriangular part of invA is cleared.\n\n- If UPLO = 'L', the leading N-by-N lower triangular part of the invA will store\nthe inverse of the lower triangular matrix, and the strictly upper\ntriangular part of invA is cleared.\n@param[in]\nldinvA    [rocblas_int]\nspecifies the leading dimension of each invA_i.\n@param[in]\nstride_invA  [rocblas_stride]\n\"batch stride invA\": stride from the start of one invA_i matrix to the next invA_(i + 1).\n@param[in]\nbatch_count  [rocblas_int]\nnumbers of matrices in the batch."]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrtri_strided_batched compute the inverse of A_i and write into invA_i where\nA_i and invA_i are the i-th matrices in the batch,\nfor i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n@param[in]\ndiag      [rocblas_diagonal]\n- 'rocblas_diagonal_non_unit', A is non-unit triangular;\n- 'rocblas_diagonal_unit', A is unit triangular;\n@param[in]\nn         [rocblas_int]\n@param[in]\nA         device pointer pointing to address of first matrix A_1.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A.\n@param[in]\nstride_a  [rocblas_stride]\n\"batch stride a\": stride from the start of one A_i matrix to the next A_(i + 1).\n@param[out]\ninvA      device pointer storing the inverses of each matrix A_i.\nPartial inplace operation is supported. See below:\n\n- If UPLO = 'U', the leading N-by-N upper triangular part of the invA will store\nthe inverse of the upper triangular matrix, and the strictly lower\ntriangular part of invA may be cleared.\n\n- If UPLO = 'L', the leading N-by-N lower triangular part of the invA will store\nthe inverse of the lower triangular matrix, and the strictly upper\ntriangular part of invA may be cleared.\n@param[in]\nldinvA    [rocblas_int]\nspecifies the leading dimension of each invA_i.\n@param[in]\nstride_invA  [rocblas_stride]\n\"batch stride invA\": stride from the start of one invA_i matrix to the next invA_(i + 1).\n@param[in]\nbatch_count  [rocblas_int]\nnumbers of matrices in the batch."]
     pub fn rocblas_strtri_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -8434,7 +10469,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrsm solves:\n\nop(A)*X = alpha*B or  X*op(A) = alpha*B,\n\nwhere alpha is a scalar, X and B are m by n matrices,\n\nA is triangular matrix and op(A) is one of\n\nop( A ) = A   or   op( A ) = A^T   or   op( A ) = A^H.\n\nThe matrix X is overwritten on B.\n\nNote about memory allocation:\nWhen trsm is launched with a k evenly divisible by the internal block size of 128,\nand is no larger than 10 of these blocks, the API takes advantage of utilizing pre-allocated\nmemory found in the handle to increase overall performance. This memory can be managed by using\nthe environment variable WORKBUF_TRSM_B_CHNK. When this variable is not set the device memory\nused for temporary storage will default to 1 MB and may result in chunking, which in turn may\nreduce performance. Under these circumstances it is recommended that WORKBUF_TRSM_B_CHNK be set\nto the desired chunk of right hand sides to be used at a time\n(where k is m when rocblas_side_left and is n when rocblas_side_right).\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside    [rocblas_side]\n- rocblas_side_left:       op(A)*X = alpha*B\n- rocblas_side_right:      X*op(A) = alpha*B\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\n- transB:    op(A) = A.\n- rocblas_operation_transpose:      op(A) = A^T\n- rocblas_operation_conjugate_transpose:  op(A) = A^H\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B. n >= 0.\n\n@param[in]\nalpha\ndevice pointer or host pointer specifying the scalar alpha. When alpha is\n&zero then A is not referenced and B need not be set before\nentry.\n\n@param[in]\nA       device pointer storing matrix A.\nof dimension ( lda, k ), where k is m\nwhen  rocblas_side_left  and\nis  n  when  rocblas_side_right\nonly the upper/lower triangular part is accessed.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif side = rocblas_side_left,  lda >= max( 1, m ),\nif side = rocblas_side_right, lda >= max( 1, n ).\n\n@param[in,out]\nB       device pointer storing matrix B.\n\n@param[in]\nldb    [rocblas_int]\nldb specifies the first dimension of B. ldb >= max( 1, m ).\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrsm solves:\n\nop(A)*X = alpha*B or  X*op(A) = alpha*B,\n\nwhere alpha is a scalar, X and B are m by n matrices,\n\nA is triangular matrix and op(A) is one of\n\nop( A ) = A   or   op( A ) = A^T   or   op( A ) = A^H.\n\nThe matrix X is overwritten on B.\n\nNote about memory allocation:\nWhen trsm is launched with a k evenly divisible by the internal block size of 128,\nand is no larger than 10 of these blocks, the API takes advantage of utilizing pre-allocated\nmemory found in the handle to increase overall performance. This memory can be managed by using\nthe environment variable WORKBUF_TRSM_B_CHNK. When this variable is not set the device memory\nused for temporary storage will default to 1 MB and may result in chunking, which in turn may\nreduce performance. Under these circumstances it is recommended that WORKBUF_TRSM_B_CHNK be set\nto the desired chunk of right hand sides to be used at a time\n(where k is m when rocblas_side_left and is n when rocblas_side_right).\n\nAlthough not widespread, some gemm kernels used by trsm may use atomic operations.\nSee Atomic Operations in the API Reference Guide for more information.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside    [rocblas_side]\n- rocblas_side_left:       op(A)*X = alpha*B\n- rocblas_side_right:      X*op(A) = alpha*B\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\n- transB:    op(A) = A.\n- rocblas_operation_transpose:      op(A) = A^T\n- rocblas_operation_conjugate_transpose:  op(A) = A^H\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B. n >= 0.\n\n@param[in]\nalpha\ndevice pointer or host pointer specifying the scalar alpha. When alpha is\n&zero then A is not referenced and B need not be set before\nentry.\n\n@param[in]\nA       device pointer storing matrix A.\nof dimension ( lda, k ), where k is m\nwhen  rocblas_side_left  and\nis  n  when  rocblas_side_right\nonly the upper/lower triangular part is accessed.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif side = rocblas_side_left,  lda >= max( 1, m ),\nif side = rocblas_side_right, lda >= max( 1, n ).\n\n@param[in,out]\nB       device pointer storing matrix B.\n\n@param[in]\nldb    [rocblas_int]\nldb specifies the first dimension of B. ldb >= max( 1, m ).\n"]
     pub fn rocblas_strsm(
         handle: rocblas_handle,
         side: rocblas_side,
@@ -8657,7 +10692,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ngemm performs one of the matrix-matrix operations:\n\nC = alpha*op( A )*op( B ) + beta*C,\n\nwhere op( X ) is one of\n\nop( X ) = X      or\nop( X ) = X**T   or\nop( X ) = X**H,\n\nalpha and beta are scalars, and A, B and C are matrices, with\nop( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntransA    [rocblas_operation]\nspecifies the form of op( A ).\n@param[in]\ntransB    [rocblas_operation]\nspecifies the form of op( B ).\n@param[in]\nm         [rocblas_int]\nnumber or rows of matrices op( A ) and C.\n@param[in]\nn         [rocblas_int]\nnumber of columns of matrices op( B ) and C.\n@param[in]\nk         [rocblas_int]\nnumber of columns of matrix op( A ) and number of rows of matrix op( B ).\n@param[in]\nalpha     device pointer or host pointer specifying the scalar alpha.\n@param[in]\nA         device pointer storing matrix A.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n@param[in]\nB         device pointer storing matrix B.\n@param[in]\nldb       [rocblas_int]\nspecifies the leading dimension of B.\n@param[in]\nbeta      device pointer or host pointer specifying the scalar beta.\n@param[in, out]\nC         device pointer storing matrix C on the GPU.\n@param[in]\nldc       [rocblas_int]\nspecifies the leading dimension of C.\n"]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ngemm performs one of the matrix-matrix operations:\n\nC = alpha*op( A )*op( B ) + beta*C,\n\nwhere op( X ) is one of\n\nop( X ) = X      or\nop( X ) = X**T   or\nop( X ) = X**H,\n\nalpha and beta are scalars, and A, B and C are matrices, with\nop( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.\n\nAlthough not widespread, some gemm kernels may use atomic operations. See Atomic Operations\nin the API Reference Guide for more information.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntransA    [rocblas_operation]\nspecifies the form of op( A ).\n@param[in]\ntransB    [rocblas_operation]\nspecifies the form of op( B ).\n@param[in]\nm         [rocblas_int]\nnumber or rows of matrices op( A ) and C.\n@param[in]\nn         [rocblas_int]\nnumber of columns of matrices op( B ) and C.\n@param[in]\nk         [rocblas_int]\nnumber of columns of matrix op( A ) and number of rows of matrix op( B ).\n@param[in]\nalpha     device pointer or host pointer specifying the scalar alpha.\n@param[in]\nA         device pointer storing matrix A.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n@param[in]\nB         device pointer storing matrix B.\n@param[in]\nldb       [rocblas_int]\nspecifies the leading dimension of B.\n@param[in]\nbeta      device pointer or host pointer specifying the scalar beta.\n@param[in, out]\nC         device pointer storing matrix C on the GPU.\n@param[in]\nldc       [rocblas_int]\nspecifies the leading dimension of C.\n"]
     pub fn rocblas_sgemm(
         handle: rocblas_handle,
         transA: rocblas_operation,
@@ -8753,7 +10788,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ngemm_batched performs one of the batched matrix-matrix operations:\n\nC_i = alpha*op( A_i )*op( B_i ) + beta*C_i, for i = 1, ..., batch_count,\n\nwhere op( X ) is one of\n\nop( X ) = X      or\nop( X ) = X**T   or\nop( X ) = X**H,\n\nalpha and beta are scalars, and A, B and C are strided batched matrices, with\n\nop( A ) an m by k by batch_count strided_batched matrix,\nop( B ) an k by n by batch_count strided_batched matrix and\nC an m by n by batch_count strided_batched matrix.\n\n@param[in]\nhandle    [rocblas_handle\nhandle to the rocblas library context queue.\n@param[in]\ntransA    [rocblas_operation]\nspecifies the form of op( A ).\n@param[in]\ntransB    [rocblas_operation]\nspecifies the form of op( B ).\n@param[in]\nm         [rocblas_int]\nmatrix dimention m.\n@param[in]\nn         [rocblas_int]\nmatrix dimention n.\n@param[in]\nk         [rocblas_int]\nmatrix dimention k.\n@param[in]\nalpha     device pointer or host pointer specifying the scalar alpha.\n@param[in]\nA         device array of device pointers storing each matrix A_i.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nB         device array of device pointers storing each matrix B_i.\n@param[in]\nldb       [rocblas_int]\nspecifies the leading dimension of each B_i.\n@param[in]\nbeta      device pointer or host pointer specifying the scalar beta.\n@param[in, out]\nC         device array of device pointers storing each matrix C_i.\n@param[in]\nldc       [rocblas_int]\nspecifies the leading dimension of each C_i.\n@param[in]\nbatch_count\n[rocblas_int]\nnumber of gemm operations in the batch."]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ngemm_batched performs one of the batched matrix-matrix operations:\n\nC_i = alpha*op( A_i )*op( B_i ) + beta*C_i, for i = 1, ..., batch_count,\n\nwhere op( X ) is one of\n\nop( X ) = X      or\nop( X ) = X**T   or\nop( X ) = X**H,\n\nalpha and beta are scalars, and A, B and C are strided batched matrices, with\n\nop( A ) an m by k by batch_count matrices,\nop( B ) an k by n by batch_count matrices and\nC an m by n by batch_count matrices.\n\n@param[in]\nhandle    [rocblas_handle\nhandle to the rocblas library context queue.\n@param[in]\ntransA    [rocblas_operation]\nspecifies the form of op( A ).\n@param[in]\ntransB    [rocblas_operation]\nspecifies the form of op( B ).\n@param[in]\nm         [rocblas_int]\nmatrix dimention m.\n@param[in]\nn         [rocblas_int]\nmatrix dimention n.\n@param[in]\nk         [rocblas_int]\nmatrix dimention k.\n@param[in]\nalpha     device pointer or host pointer specifying the scalar alpha.\n@param[in]\nA         device array of device pointers storing each matrix A_i.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nB         device array of device pointers storing each matrix B_i.\n@param[in]\nldb       [rocblas_int]\nspecifies the leading dimension of each B_i.\n@param[in]\nbeta      device pointer or host pointer specifying the scalar beta.\n@param[in, out]\nC         device array of device pointers storing each matrix C_i.\n@param[in]\nldc       [rocblas_int]\nspecifies the leading dimension of each C_i.\n@param[in]\nbatch_count\n[rocblas_int]\nnumber of gemm operations in the batch."]
     pub fn rocblas_sgemm_batched(
         handle: rocblas_handle,
         transA: rocblas_operation,
@@ -9481,7 +11516,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\ngemm_ex performs one of the matrix-matrix operations:\n\nD = alpha*op( A )*op( B ) + beta*C,\n\nwhere op( X ) is one of\n\nop( X ) = X      or\nop( X ) = X**T   or\nop( X ) = X**H,\n\nalpha and beta are scalars, and A, B, C, and D are matrices, with\nop( A ) an m by k matrix, op( B ) a k by n matrix and C and D are m by n matrices.\nC and D may point to the same matrix if their parameters are identical.\n\nSupported types are as follows:\n- rocblas_datatype_f64_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f32_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f16_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f16_r = a_type = b_type = c_type = d_type; rocblas_datatype_f32_r =\ncompute_type\n- rocblas_datatype_f16_r = a_type = b_type; rocblas_datatype_f32_r = c_type = d_type =\ncompute_type\n- rocblas_datatype_bf16_r = a_type = b_type = c_type = d_type; rocblas_datatype_f32_r =\ncompute_type\n- rocblas_datatype_bf16_r = a_type = b_type; rocblas_datatype_f32_r = c_type = d_type =\ncompute_type\n- rocblas_datatype_i8_r = a_type = b_type; rocblas_datatype_i32_r = c_type = d_type =\ncompute_type\n- rocblas_datatype_f32_c  = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f64_c  = a_type = b_type = c_type = d_type = compute_type\n\nTwo int8 datatypes are supported: int8_t and rocblas_int8x4. int8_t is the C99 signed\n8 bit integer. The default is int8_t and it is recommended int8_t be used. rocblas_int8x4\nis a packed datatype. The packed int 8 datatype occurs if the user sets:\n\n@code\nflags |= rocblas_gemm_flags_pack_int8x4;\n@endcode\n\nFor this packed int8 datatype matrices A and B are packed into int8x4 in the k dimension.\nThis will impose the following size restrictions on A or B:\n\n- k must be a multiple of 4\n- if transA == rocblas_operation_transpose then lda must be a multiple of 4\n- if transB == rocblas_operation_none then ldb must be a multiple of 4\n- if transA == rocblas_operation_none the matrix A must have each 4 consecutive\nvalues in the k dimension packed\n- if transB == rocblas_operation_transpose the matrix B must have each 4\nconsecutive values in the k dimension packed.\n\nThis packing can be achieved with the following pseudo-code. The code assumes the\noriginal matrices are in A and B, and the packed matrices are A_packed and B_packed.\nThe size of the A_packed and B_packed are the same as the size of the A and B respectively.\n\n@code\nif(transA == rocblas_operation_none)\n{\nint nb = 4;\nfor(int i_m = 0; i_m < m; i_m++)\n{\nfor(int i_k = 0; i_k < k; i_k++)\n{\nA_packed[i_k % nb + (i_m + (i_k / nb) * lda) * nb] = A[i_m + i_k * lda];\n}\n}\n}\nelse\n{\nA_packed = A;\n}\nif(transB == rocblas_operation_transpose)\n{\nint nb = 4;\nfor(int i_n = 0; i_n < m; i_n++)\n{\nfor(int i_k = 0; i_k < k; i_k++)\n{\nB_packed[i_k % nb + (i_n + (i_k / nb) * ldb) * nb] = B[i_n + i_k * ldb];\n}\n}\n}\nelse\n{\nB_packed = B;\n}\n@endcode\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntransA    [rocblas_operation]\nspecifies the form of op( A ).\n@param[in]\ntransB    [rocblas_operation]\nspecifies the form of op( B ).\n@param[in]\nm         [rocblas_int]\nmatrix dimension m.\n@param[in]\nn         [rocblas_int]\nmatrix dimension n.\n@param[in]\nk         [rocblas_int]\nmatrix dimension k.\n@param[in]\nalpha     [const void *]\ndevice pointer or host pointer specifying the scalar alpha. Same datatype as compute_type.\n@param[in]\na         [void *]\ndevice pointer storing matrix A.\n@param[in]\na_type    [rocblas_datatype]\nspecifies the datatype of matrix A.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n@param[in]\nb         [void *]\ndevice pointer storing matrix B.\n@param[in]\nb_type    [rocblas_datatype]\nspecifies the datatype of matrix B.\n@param[in]\nldb       [rocblas_int]\nspecifies the leading dimension of B.\n@param[in]\nbeta      [const void *]\ndevice pointer or host pointer specifying the scalar beta. Same datatype as compute_type.\n@param[in]\nc         [void *]\ndevice pointer storing matrix C.\n@param[in]\nc_type    [rocblas_datatype]\nspecifies the datatype of matrix C.\n@param[in]\nldc       [rocblas_int]\nspecifies the leading dimension of C.\n@param[out]\nd         [void *]\ndevice pointer storing matrix D.\nIf d and c pointers are to the same matrix then d_type must equal c_type and ldd must equal ldc\nor the respective invalid status will be returned.\n@param[in]\nd_type    [rocblas_datatype]\nspecifies the datatype of matrix D.\n@param[in]\nldd       [rocblas_int]\nspecifies the leading dimension of D.\n@param[in]\ncompute_type\n[rocblas_datatype]\nspecifies the datatype of computation.\n@param[in]\nalgo      [rocblas_gemm_algo]\nenumerant specifying the algorithm type.\n@param[in]\nsolution_index\n[int32_t]\nreserved for future use.\n@param[in]\nflags     [uint32_t]\noptional gemm flags.\n"]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\ngemm_ex performs one of the matrix-matrix operations:\n\nD = alpha*op( A )*op( B ) + beta*C,\n\nwhere op( X ) is one of\n\nop( X ) = X      or\nop( X ) = X**T   or\nop( X ) = X**H,\n\nalpha and beta are scalars, and A, B, C, and D are matrices, with\nop( A ) an m by k matrix, op( B ) a k by n matrix and C and D are m by n matrices.\nC and D may point to the same matrix if their parameters are identical.\n\nSupported types are as follows:\n- rocblas_datatype_f64_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f32_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f16_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f16_r = a_type = b_type = c_type = d_type; rocblas_datatype_f32_r =\ncompute_type\n- rocblas_datatype_f16_r = a_type = b_type; rocblas_datatype_f32_r = c_type = d_type =\ncompute_type\n- rocblas_datatype_bf16_r = a_type = b_type = c_type = d_type; rocblas_datatype_f32_r =\ncompute_type\n- rocblas_datatype_bf16_r = a_type = b_type; rocblas_datatype_f32_r = c_type = d_type =\ncompute_type\n- rocblas_datatype_i8_r = a_type = b_type; rocblas_datatype_i32_r = c_type = d_type =\ncompute_type\n- rocblas_datatype_f32_c  = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f64_c  = a_type = b_type = c_type = d_type = compute_type\n\nAlthough not widespread, some gemm kernels used by gemm_ex may use atomic operations.\nSee Atomic Operations in the API Reference Guide for more information.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntransA    [rocblas_operation]\nspecifies the form of op( A ).\n@param[in]\ntransB    [rocblas_operation]\nspecifies the form of op( B ).\n@param[in]\nm         [rocblas_int]\nmatrix dimension m.\n@param[in]\nn         [rocblas_int]\nmatrix dimension n.\n@param[in]\nk         [rocblas_int]\nmatrix dimension k.\n@param[in]\nalpha     [const void *]\ndevice pointer or host pointer specifying the scalar alpha. Same datatype as compute_type.\n@param[in]\na         [void *]\ndevice pointer storing matrix A.\n@param[in]\na_type    [rocblas_datatype]\nspecifies the datatype of matrix A.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n@param[in]\nb         [void *]\ndevice pointer storing matrix B.\n@param[in]\nb_type    [rocblas_datatype]\nspecifies the datatype of matrix B.\n@param[in]\nldb       [rocblas_int]\nspecifies the leading dimension of B.\n@param[in]\nbeta      [const void *]\ndevice pointer or host pointer specifying the scalar beta. Same datatype as compute_type.\n@param[in]\nc         [void *]\ndevice pointer storing matrix C.\n@param[in]\nc_type    [rocblas_datatype]\nspecifies the datatype of matrix C.\n@param[in]\nldc       [rocblas_int]\nspecifies the leading dimension of C.\n@param[out]\nd         [void *]\ndevice pointer storing matrix D.\nIf d and c pointers are to the same matrix then d_type must equal c_type and ldd must equal ldc\nor the respective invalid status will be returned.\n@param[in]\nd_type    [rocblas_datatype]\nspecifies the datatype of matrix D.\n@param[in]\nldd       [rocblas_int]\nspecifies the leading dimension of D.\n@param[in]\ncompute_type\n[rocblas_datatype]\nspecifies the datatype of computation.\n@param[in]\nalgo      [rocblas_gemm_algo]\nenumerant specifying the algorithm type.\n@param[in]\nsolution_index\n[int32_t]\nif algo is rocblas_gemm_algo_solution_index, this controls which solution is used.\nWhen algo is not rocblas_gemm_algo_solution_index, or if solution_index <= 0, the default solution is used.\nThis parameter was unused in previous releases and instead always used the default solution\n@param[in]\nflags     [uint32_t]\noptional gemm flags.\n"]
     pub fn rocblas_gemm_ex(
         handle: rocblas_handle,
         transA: rocblas_operation,
@@ -9511,7 +11546,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\ngemm_batched_ex performs one of the batched matrix-matrix operations:\nD_i = alpha*op(A_i)*op(B_i) + beta*C_i, for i = 1, ..., batch_count.\nwhere op( X ) is one of\nop( X ) = X      or\nop( X ) = X**T   or\nop( X ) = X**H,\nalpha and beta are scalars, and A, B, C, and D are batched pointers to matrices, with\nop( A ) an m by k by batch_count batched matrix,\nop( B ) a k by n by batch_count batched matrix and\nC and D are m by n by batch_count batched matrices.\nThe batched matrices are an array of pointers to matrices.\nThe number of pointers to matrices is batch_count.\nC and D may point to the same matrices if their parameters are identical.\n\nSupported types are as follows:\n- rocblas_datatype_f64_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f32_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f16_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f16_r = a_type = b_type = c_type = d_type; rocblas_datatype_f32_r =\ncompute_type\n- rocblas_datatype_bf16_r = a_type = b_type = c_type = d_type; rocblas_datatype_f32_r =\ncompute_type\n- rocblas_datatype_i8_r = a_type = b_type; rocblas_datatype_i32_r = c_type = d_type =\ncompute_type\n- rocblas_datatype_f32_c  = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f64_c  = a_type = b_type = c_type = d_type = compute_type\n\nTwo int8 datatypes are supported: int8_t and rocblas_int8x4. int8_t is the C99 signed\n8 bit integer. The default is int8_t and it is recommended int8_t be used. rocblas_int8x4\nis a packed datatype. The packed int 8 datatype occurs if the user sets:\n\n@code\nflags |= rocblas_gemm_flags_pack_int8x4;\n@endcode\n\nFor this packed int8 datatype matrices A and B are packed into int8x4 in the k dimension.\nThis will impose the following size restrictions on A or B:\n\n- k must be a multiple of 4\n- if transA == rocblas_operation_transpose then lda must be a multiple of 4\n- if transB == rocblas_operation_none then ldb must be a multiple of 4\n- if transA == rocblas_operation_none the matrix A must have each 4 consecutive\nvalues in the k dimension packed\n- if transB == rocblas_operation_transpose the matrix B must have each 4\nconsecutive values in the k dimension packed.\n\nThis packing can be achieved with the following pseudo-code. The code assumes the\noriginal matrices are in A and B, and the packed matrices are A_packed and B_packed.\nThe size of the A_packed and B_packed are the same as the size of the A and B respectively.\n\n@code\nif(transA == rocblas_operation_none)\n{\nint nb = 4;\nfor(int i_m = 0; i_m < m; i_m++)\n{\nfor(int i_k = 0; i_k < k; i_k++)\n{\nA_packed[i_k % nb + (i_m + (i_k / nb) * lda) * nb] = A[i_m + i_k * lda];\n}\n}\n}\nelse\n{\nA_packed = A;\n}\nif(transB == rocblas_operation_transpose)\n{\nint nb = 4;\nfor(int i_n = 0; i_n < m; i_n++)\n{\nfor(int i_k = 0; i_k < k; i_k++)\n{\nB_packed[i_k % nb + (i_n + (i_k / nb) * ldb) * nb] = B[i_n + i_k * ldb];\n}\n}\n}\nelse\n{\nB_packed = B;\n}\n@endcode\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntransA    [rocblas_operation]\nspecifies the form of op( A ).\n@param[in]\ntransB    [rocblas_operation]\nspecifies the form of op( B ).\n@param[in]\nm         [rocblas_int]\nmatrix dimension m.\n@param[in]\nn         [rocblas_int]\nmatrix dimension n.\n@param[in]\nk         [rocblas_int]\nmatrix dimension k.\n@param[in]\nalpha     [const void *]\ndevice pointer or host pointer specifying the scalar alpha. Same datatype as compute_type.\n@param[in]\na         [void *]\ndevice pointer storing array of pointers to each matrix A_i.\n@param[in]\na_type    [rocblas_datatype]\nspecifies the datatype of each matrix A_i.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nb         [void *]\ndevice pointer storing array of pointers to each matrix B_i.\n@param[in]\nb_type    [rocblas_datatype]\nspecifies the datatype of each matrix B_i.\n@param[in]\nldb       [rocblas_int]\nspecifies the leading dimension of each B_i.\n@param[in]\nbeta      [const void *]\ndevice pointer or host pointer specifying the scalar beta. Same datatype as compute_type.\n@param[in]\nc         [void *]\ndevice array of device pointers to each matrix C_i.\n@param[in]\nc_type    [rocblas_datatype]\nspecifies the datatype of each matrix C_i.\n@param[in]\nldc       [rocblas_int]\nspecifies the leading dimension of each C_i.\n@param[out]\nd         [void *]\ndevice array of device pointers to each matrix D_i.\nIf d and c are the same array of matrix pointers then d_type must equal c_type and ldd must equal ldc\nor the respective invalid status will be returned.\n@param[in]\nd_type    [rocblas_datatype]\nspecifies the datatype of each matrix D_i.\n@param[in]\nldd       [rocblas_int]\nspecifies the leading dimension of each D_i.\n@param[in]\nbatch_count\n[rocblas_int]\nnumber of gemm operations in the batch.\n@param[in]\ncompute_type\n[rocblas_datatype]\nspecifies the datatype of computation.\n@param[in]\nalgo      [rocblas_gemm_algo]\nenumerant specifying the algorithm type.\n@param[in]\nsolution_index\n[int32_t]\nreserved for future use.\n@param[in]\nflags     [uint32_t]\noptional gemm flags.\n"]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\ngemm_batched_ex performs one of the batched matrix-matrix operations:\nD_i = alpha*op(A_i)*op(B_i) + beta*C_i, for i = 1, ..., batch_count.\nwhere op( X ) is one of\nop( X ) = X      or\nop( X ) = X**T   or\nop( X ) = X**H,\nalpha and beta are scalars, and A, B, C, and D are batched pointers to matrices, with\nop( A ) an m by k by batch_count batched matrix,\nop( B ) a k by n by batch_count batched matrix and\nC and D are m by n by batch_count batched matrices.\nThe batched matrices are an array of pointers to matrices.\nThe number of pointers to matrices is batch_count.\nC and D may point to the same matrices if their parameters are identical.\n\nSupported types are as follows:\n- rocblas_datatype_f64_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f32_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f16_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f16_r = a_type = b_type = c_type = d_type; rocblas_datatype_f32_r =\ncompute_type\n- rocblas_datatype_bf16_r = a_type = b_type = c_type = d_type; rocblas_datatype_f32_r =\ncompute_type\n- rocblas_datatype_i8_r = a_type = b_type; rocblas_datatype_i32_r = c_type = d_type =\ncompute_type\n- rocblas_datatype_f32_c  = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f64_c  = a_type = b_type = c_type = d_type = compute_type\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntransA    [rocblas_operation]\nspecifies the form of op( A ).\n@param[in]\ntransB    [rocblas_operation]\nspecifies the form of op( B ).\n@param[in]\nm         [rocblas_int]\nmatrix dimension m.\n@param[in]\nn         [rocblas_int]\nmatrix dimension n.\n@param[in]\nk         [rocblas_int]\nmatrix dimension k.\n@param[in]\nalpha     [const void *]\ndevice pointer or host pointer specifying the scalar alpha. Same datatype as compute_type.\n@param[in]\na         [void *]\ndevice pointer storing array of pointers to each matrix A_i.\n@param[in]\na_type    [rocblas_datatype]\nspecifies the datatype of each matrix A_i.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nb         [void *]\ndevice pointer storing array of pointers to each matrix B_i.\n@param[in]\nb_type    [rocblas_datatype]\nspecifies the datatype of each matrix B_i.\n@param[in]\nldb       [rocblas_int]\nspecifies the leading dimension of each B_i.\n@param[in]\nbeta      [const void *]\ndevice pointer or host pointer specifying the scalar beta. Same datatype as compute_type.\n@param[in]\nc         [void *]\ndevice array of device pointers to each matrix C_i.\n@param[in]\nc_type    [rocblas_datatype]\nspecifies the datatype of each matrix C_i.\n@param[in]\nldc       [rocblas_int]\nspecifies the leading dimension of each C_i.\n@param[out]\nd         [void *]\ndevice array of device pointers to each matrix D_i.\nIf d and c are the same array of matrix pointers then d_type must equal c_type and ldd must equal ldc\nor the respective invalid status will be returned.\n@param[in]\nd_type    [rocblas_datatype]\nspecifies the datatype of each matrix D_i.\n@param[in]\nldd       [rocblas_int]\nspecifies the leading dimension of each D_i.\n@param[in]\nbatch_count\n[rocblas_int]\nnumber of gemm operations in the batch.\n@param[in]\ncompute_type\n[rocblas_datatype]\nspecifies the datatype of computation.\n@param[in]\nalgo      [rocblas_gemm_algo]\nenumerant specifying the algorithm type.\n@param[in]\nsolution_index\n[int32_t]\nif algo is rocblas_gemm_algo_solution_index, this controls which solution is used.\nWhen algo is not rocblas_gemm_algo_solution_index, or if solution_index <= 0, the default solution is used.\nThis parameter was unused in previous releases and instead always used the default solution\n@param[in]\nflags     [uint32_t]\noptional gemm flags.\n"]
     pub fn rocblas_gemm_batched_ex(
         handle: rocblas_handle,
         transA: rocblas_operation,
@@ -9542,7 +11577,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\ngemm_strided_batched_ex performs one of the strided_batched matrix-matrix operations:\n\nD_i = alpha*op(A_i)*op(B_i) + beta*C_i, for i = 1, ..., batch_count\n\nwhere op( X ) is one of\n\nop( X ) = X      or\nop( X ) = X**T   or\nop( X ) = X**H,\n\nalpha and beta are scalars, and A, B, C, and D are strided_batched matrices, with\nop( A ) an m by k by batch_count strided_batched matrix,\nop( B ) a k by n by batch_count strided_batched matrix and\nC and D are m by n by batch_count strided_batched matrices.\nC and D may point to the same matrices if their parameters are identical.\n\nThe strided_batched matrices are multiple matrices separated by a constant stride.\nThe number of matrices is batch_count.\n\nSupported types are as follows:\n- rocblas_datatype_f64_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f32_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f16_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f16_r = a_type = b_type = c_type = d_type; rocblas_datatype_f32_r =\ncompute_type\n- rocblas_datatype_bf16_r = a_type = b_type = c_type = d_type; rocblas_datatype_f32_r =\ncompute_type\n- rocblas_datatype_i8_r = a_type = b_type; rocblas_datatype_i32_r = c_type = d_type =\ncompute_type\n- rocblas_datatype_f32_c  = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f64_c  = a_type = b_type = c_type = d_type = compute_type\n\nTwo int8 datatypes are supported: int8_t and rocblas_int8x4. int8_t is the C99 signed\n8 bit integer. The default is int8_t and it is recommended int8_t be used. rocblas_int8x4\nis a packed datatype. The packed int 8 datatype occurs if the user sets:\n\n@code\nflags |= rocblas_gemm_flags_pack_int8x4;\n@endcode\n\nFor this packed int8 datatype matrices A and B are packed into int8x4 in the k dimension.\nThis will impose the following size restrictions on A or B:\n\n- k must be a multiple of 4\n- if transA == rocblas_operation_transpose then lda must be a multiple of 4\n- if transB == rocblas_operation_none then ldb must be a multiple of 4\n- if transA == rocblas_operation_none the matrix A must have each 4 consecutive\nvalues in the k dimension packed\n- if transB == rocblas_operation_transpose the matrix B must have each 4\nconsecutive values in the k dimension packed.\n\nThis packing can be achieved with the following pseudo-code. The code assumes the\noriginal matrices are in A and B, and the packed matrices are A_packed and B_packed.\nThe size of the A_packed and B_packed are the same as the size of the A and B respectively.\n\n@code\nif(transA == rocblas_operation_none)\n{\nint nb = 4;\nfor(int i_m = 0; i_m < m; i_m++)\n{\nfor(int i_k = 0; i_k < k; i_k++)\n{\nA_packed[i_k % nb + (i_m + (i_k / nb) * lda) * nb] = A[i_m + i_k * lda];\n}\n}\n}\nelse\n{\nA_packed = A;\n}\nif(transB == rocblas_operation_transpose)\n{\nint nb = 4;\nfor(int i_n = 0; i_n < m; i_n++)\n{\nfor(int i_k = 0; i_k < k; i_k++)\n{\nB_packed[i_k % nb + (i_n + (i_k / nb) * ldb) * nb] = B[i_n + i_k * ldb];\n}\n}\n}\nelse\n{\nB_packed = B;\n}\n@endcode\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntransA    [rocblas_operation]\nspecifies the form of op( A ).\n@param[in]\ntransB    [rocblas_operation]\nspecifies the form of op( B ).\n@param[in]\nm         [rocblas_int]\nmatrix dimension m.\n@param[in]\nn         [rocblas_int]\nmatrix dimension n.\n@param[in]\nk         [rocblas_int]\nmatrix dimension k.\n@param[in]\nalpha     [const void *]\ndevice pointer or host pointer specifying the scalar alpha. Same datatype as compute_type.\n@param[in]\na         [void *]\ndevice pointer pointing to first matrix A_1.\n@param[in]\na_type    [rocblas_datatype]\nspecifies the datatype of each matrix A_i.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nstride_a  [rocblas_stride]\nspecifies stride from start of one A_i matrix to the next A_(i + 1).\n@param[in]\nb         [void *]\ndevice pointer pointing to first matrix B_1.\n@param[in]\nb_type    [rocblas_datatype]\nspecifies the datatype of each matrix B_i.\n@param[in]\nldb       [rocblas_int]\nspecifies the leading dimension of each B_i.\n@param[in]\nstride_b  [rocblas_stride]\nspecifies stride from start of one B_i matrix to the next B_(i + 1).\n@param[in]\nbeta      [const void *]\ndevice pointer or host pointer specifying the scalar beta. Same datatype as compute_type.\n@param[in]\nc         [void *]\ndevice pointer pointing to first matrix C_1.\n@param[in]\nc_type    [rocblas_datatype]\nspecifies the datatype of each matrix C_i.\n@param[in]\nldc       [rocblas_int]\nspecifies the leading dimension of each C_i.\n@param[in]\nstride_c  [rocblas_stride]\nspecifies stride from start of one C_i matrix to the next C_(i + 1).\n@param[out]\nd         [void *]\ndevice pointer storing each matrix D_i.\nIf d and c pointers are to the same matrix then d_type must equal c_type and ldd must equal ldc\nand stride_d must equal stride_c or the respective invalid status will be returned.\n@param[in]\nd_type    [rocblas_datatype]\nspecifies the datatype of each matrix D_i.\n@param[in]\nldd       [rocblas_int]\nspecifies the leading dimension of each D_i.\n@param[in]\nstride_d  [rocblas_stride]\nspecifies stride from start of one D_i matrix to the next D_(i + 1).\n@param[in]\nbatch_count\n[rocblas_int]\nnumber of gemm operations in the batch.\n@param[in]\ncompute_type\n[rocblas_datatype]\nspecifies the datatype of computation.\n@param[in]\nalgo      [rocblas_gemm_algo]\nenumerant specifying the algorithm type.\n@param[in]\nsolution_index\n[int32_t]\nreserved for future use.\n@param[in]\nflags     [uint32_t]\noptional gemm flags.\n"]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\ngemm_strided_batched_ex performs one of the strided_batched matrix-matrix operations:\n\nD_i = alpha*op(A_i)*op(B_i) + beta*C_i, for i = 1, ..., batch_count\n\nwhere op( X ) is one of\n\nop( X ) = X      or\nop( X ) = X**T   or\nop( X ) = X**H,\n\nalpha and beta are scalars, and A, B, C, and D are strided_batched matrices, with\nop( A ) an m by k by batch_count strided_batched matrix,\nop( B ) a k by n by batch_count strided_batched matrix and\nC and D are m by n by batch_count strided_batched matrices.\nC and D may point to the same matrices if their parameters are identical.\n\nThe strided_batched matrices are multiple matrices separated by a constant stride.\nThe number of matrices is batch_count.\n\nSupported types are as follows:\n- rocblas_datatype_f64_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f32_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f16_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f16_r = a_type = b_type = c_type = d_type; rocblas_datatype_f32_r =\ncompute_type\n- rocblas_datatype_bf16_r = a_type = b_type = c_type = d_type; rocblas_datatype_f32_r =\ncompute_type\n- rocblas_datatype_i8_r = a_type = b_type; rocblas_datatype_i32_r = c_type = d_type =\ncompute_type\n- rocblas_datatype_f32_c  = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f64_c  = a_type = b_type = c_type = d_type = compute_type\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntransA    [rocblas_operation]\nspecifies the form of op( A ).\n@param[in]\ntransB    [rocblas_operation]\nspecifies the form of op( B ).\n@param[in]\nm         [rocblas_int]\nmatrix dimension m.\n@param[in]\nn         [rocblas_int]\nmatrix dimension n.\n@param[in]\nk         [rocblas_int]\nmatrix dimension k.\n@param[in]\nalpha     [const void *]\ndevice pointer or host pointer specifying the scalar alpha. Same datatype as compute_type.\n@param[in]\na         [void *]\ndevice pointer pointing to first matrix A_1.\n@param[in]\na_type    [rocblas_datatype]\nspecifies the datatype of each matrix A_i.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nstride_a  [rocblas_stride]\nspecifies stride from start of one A_i matrix to the next A_(i + 1).\n@param[in]\nb         [void *]\ndevice pointer pointing to first matrix B_1.\n@param[in]\nb_type    [rocblas_datatype]\nspecifies the datatype of each matrix B_i.\n@param[in]\nldb       [rocblas_int]\nspecifies the leading dimension of each B_i.\n@param[in]\nstride_b  [rocblas_stride]\nspecifies stride from start of one B_i matrix to the next B_(i + 1).\n@param[in]\nbeta      [const void *]\ndevice pointer or host pointer specifying the scalar beta. Same datatype as compute_type.\n@param[in]\nc         [void *]\ndevice pointer pointing to first matrix C_1.\n@param[in]\nc_type    [rocblas_datatype]\nspecifies the datatype of each matrix C_i.\n@param[in]\nldc       [rocblas_int]\nspecifies the leading dimension of each C_i.\n@param[in]\nstride_c  [rocblas_stride]\nspecifies stride from start of one C_i matrix to the next C_(i + 1).\n@param[out]\nd         [void *]\ndevice pointer storing each matrix D_i.\nIf d and c pointers are to the same matrix then d_type must equal c_type and ldd must equal ldc\nand stride_d must equal stride_c or the respective invalid status will be returned.\n@param[in]\nd_type    [rocblas_datatype]\nspecifies the datatype of each matrix D_i.\n@param[in]\nldd       [rocblas_int]\nspecifies the leading dimension of each D_i.\n@param[in]\nstride_d  [rocblas_stride]\nspecifies stride from start of one D_i matrix to the next D_(i + 1).\n@param[in]\nbatch_count\n[rocblas_int]\nnumber of gemm operations in the batch.\n@param[in]\ncompute_type\n[rocblas_datatype]\nspecifies the datatype of computation.\n@param[in]\nalgo      [rocblas_gemm_algo]\nenumerant specifying the algorithm type.\n@param[in]\nsolution_index\n[int32_t]\nif algo is rocblas_gemm_algo_solution_index, this controls which solution is used.\nWhen algo is not rocblas_gemm_algo_solution_index, or if solution_index <= 0, the default solution is used.\nThis parameter was unused in previous releases and instead always used the default solution\n@param[in]\nflags     [uint32_t]\noptional gemm flags.\n"]
     pub fn rocblas_gemm_strided_batched_ex(
         handle: rocblas_handle,
         transA: rocblas_operation,
@@ -9577,39 +11612,286 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\ngemm_ext2 performs the matrix-matrix operations:\n\nD = alpha * A * B  + beta * C,\n\nalpha and beta are scalars, and A, B, C, and D are matrices, with A a m by k\nmatrtix, B a k by n matrix, and C and D are m by n matrices. Each matrix A, B, C, D\nhas independent row and column strides.\n\nThis is a beta feature.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nm         [rocblas_int]\nmatrix dimension m.\n@param[in]\nn         [rocblas_int]\nmatrix dimension n.\n@param[in]\nk         [rocblas_int]\nmatrix dimension k.\n@param[in]\nalpha     [const void *]\ndevice pointer or host pointer specifying the scalar alpha. Same datatype as compute_type.\n@param[in]\na         [void *]\ndevice pointer storing matrix A.\n@param[in]\na_type    [rocblas_datatype]\nspecifies the datatype of matrix A.\n@param[in]\nrow_stride_a [rocblas_int]\nspecifies the row stride of A.\n@param[in]\ncol_stride_a [rocblas_int]\nspecifies the column stride of A.\n@param[in]\nb         [void *]\ndevice pointer storing matrix B.\n@param[in]\nb_type    [rocblas_datatype]\nspecifies the datatype of matrix B.\n@param[in]\nrow_stride_b    [rocblas_int]\nspecifies the row stride of B.\n@param[in]\ncol_stride_b    [rocblas_int]\nspecifies the column stride of B.\n@param[in]\nbeta      [const void *]\ndevice pointer or host pointer specifying the scalar beta. Same datatype as compute_type.\n@param[in]\nc         [void *]\ndevice pointer storing matrix C.\n@param[in]\nc_type    [rocblas_datatype]\nspecifies the datatype of matrix C.\n@param[in]\nrow_stride_c [rocblas_int]\nspecifies the row stride of C.\n@param[in]\ncol_stride_c [rocblas_int]\nspecifies the column stride of C.\n@param[out]\nd         [void *]\ndevice pointer storing matrix D.\n@param[in]\nd_type    [rocblas_datatype]\nspecifies the datatype of matrix D.\n@param[in]\nrow_stride_d [rocblas_int]\nspecifies the row stride of D.\n@param[in]\ncol_stride_d [rocblas_int]\nspecifies the column stride of D.\n@param[in]\ncompute_type\n[rocblas_datatype]\nspecifies the datatype of computation.\n@param[in]\nalgo      [rocblas_gemm_algo]\nenumerant specifying the algorithm type.\n@param[in]\nsolution_index\n[int32_t]\nreserved for future use.\n@param[in]\nflags     [uint32_t]\noptional gemm flags.\n"]
-    pub fn rocblas_gemm_ext2(
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ngemmt performs matrix-matrix operations and updates the upper or lower triangular part of the result matrix:\n\nC = alpha*op( A )*op( B ) + beta*C,\n\nwhere op( X ) is one of\n\nop( X ) = X      or\nop( X ) = X**T   or\nop( X ) = X**H,\n\nalpha and beta are scalars. A, B  are general matrices and C is either an upper or lower triangular matrix, with\nop( A ) an n by k matrix, op( B ) a k by n matrix and C an n by n matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C is an upper triangular matrix\n- rocblas_fill_lower:  C is a  lower triangular matrix\n@param[in]\ntransA    [rocblas_operation]\n- rocblas_operation_none:    op(A) = A.\n- rocblas_operation_transpose:      op(A) = A^T\n- rocblas_operation_conjugate_transpose:  op(A) = A^H\n@param[in]\ntransB    [rocblas_operation]\n- rocblas_operation_none:    op(B) = B.\n- rocblas_operation_transpose:      op(B) = B^T\n- rocblas_operation_conjugate_transpose:  op(B) = B^H\n@param[in]\nn         [rocblas_int]\nnumber or rows of matrices op( A ), columns of op( B ), and (rows, columns) of C.\n@param[in]\nk         [rocblas_int]\nnumber of rows of matrices op( B ) and columns of op( A ).\n@param[in]\nalpha     device pointer or host pointer specifying the scalar alpha.\n@param[in]\nA         device pointer storing matrix A. If transa = rocblas_operation_none, then, the leading n-by-k part of the array contains the matrix A, otherwise the leading k-by-n part of the array contains the matrix A.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A. If transA == rocblas_operation_none, must have lda >= max(1, n), otherwise, must have lda >= max(1, k).\n@param[in]\nB         device pointer storing matrix B. If transB = rocblas_operation_none, then, the leading k-by-n part of the array contains the matrix B, otherwise the leading n-by-k part of the array contains the matrix B.\n@param[in]\nldb       [rocblas_int]\nspecifies the leading dimension of B. If transB == rocblas_operation_none, must have ldb >= max(1, k), otherwise, must have ldb >= max(1, n)\n@param[in]\nbeta      device pointer or host pointer specifying the scalar beta.\n@param[in, out]\nC         device pointer storing matrix C on the GPU. If uplo == rocblas_fill_upper, the upper triangular part of the leading n-by-n array contains the matrix C, otherwise the lower triangular part of the leading n-by-n array contains the matrix C.\n@param[in]\nldc       [rocblas_int]\nspecifies the leading dimension of C. Must have ldc >= max(1, n).\n"]
+    pub fn rocblas_sgemmt(
         handle: rocblas_handle,
-        m: rocblas_int,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
         n: rocblas_int,
         k: rocblas_int,
-        alpha: *const ::std::os::raw::c_void,
-        a: *const ::std::os::raw::c_void,
-        a_type: rocblas_datatype,
-        row_stride_a: rocblas_stride,
-        col_stride_a: rocblas_stride,
-        b: *const ::std::os::raw::c_void,
-        b_type: rocblas_datatype,
-        row_stride_b: rocblas_stride,
-        col_stride_b: rocblas_stride,
-        beta: *const ::std::os::raw::c_void,
-        c: *const ::std::os::raw::c_void,
-        c_type: rocblas_datatype,
-        row_stride_c: rocblas_stride,
-        col_stride_c: rocblas_stride,
-        d: *mut ::std::os::raw::c_void,
-        d_type: rocblas_datatype,
-        row_stride_d: rocblas_stride,
-        col_stride_d: rocblas_stride,
-        compute_type: rocblas_datatype,
-        algo: rocblas_gemm_algo,
-        solution_index: i32,
-        flags: u32,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        B: *const f32,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocblas_int,
     ) -> rocblas_status;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\ntrsm_ex solves:\n\nop(A)*X = alpha*B or X*op(A) = alpha*B,\n\nwhere alpha is a scalar, X and B are m by n matrices,\nA is triangular matrix and op(A) is one of\n\nop( A ) = A   or   op( A ) = A^T   or   op( A ) = A^H.\n\nThe matrix X is overwritten on B.\n\nThis function gives the user the ability to reuse the invA matrix between runs.\nIf invA == NULL, rocblas_trsm_ex will automatically calculate invA on every run.\n\nSetting up invA:\nThe accepted invA matrix consists of the packed 128x128 inverses of the diagonal blocks of\nmatrix A, followed by any smaller diagonal block that remains.\nTo set up invA it is recommended that rocblas_trtri_batched be used with matrix A as the input.\n\nDevice memory of size 128 x k should be allocated for invA ahead of time, where k is m when\nrocblas_side_left and is n when rocblas_side_right. The actual number of elements in invA\nshould be passed as invA_size.\n\nTo begin, rocblas_trtri_batched must be called on the full 128x128-sized diagonal blocks of\nmatrix A. Below are the restricted parameters:\n- n = 128\n- ldinvA = 128\n- stride_invA = 128x128\n- batch_count = k / 128,\n\nThen any remaining block may be added:\n- n = k % 128\n- invA = invA + stride_invA * previous_batch_count\n- ldinvA = 128\n- batch_count = 1\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside    [rocblas_side]\n- rocblas_side_left:       op(A)*X = alpha*B\n- rocblas_side_right:      X*op(A) = alpha*B\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\n- transB:    op(A) = A.\n- rocblas_operation_transpose:      op(A) = A^T\n- rocblas_operation_conjugate_transpose:  op(A) = A^H\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B. n >= 0.\n\n@param[in]\nalpha   [void *]\ndevice pointer or host pointer specifying the scalar alpha. When alpha is\n&zero then A is not referenced, and B need not be set before\nentry.\n\n@param[in]\nA       [void *]\ndevice pointer storing matrix A.\nof dimension ( lda, k ), where k is m\nwhen rocblas_side_left and\nis n when rocblas_side_right\nonly the upper/lower triangular part is accessed.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif side = rocblas_side_left,  lda >= max( 1, m ),\nif side = rocblas_side_right, lda >= max( 1, n ).\n\n@param[in, out]\nB       [void *]\ndevice pointer storing matrix B.\nB is of dimension ( ldb, n ).\nBefore entry, the leading m by n part of the array B must\ncontain the right-hand side matrix B, and on exit is\noverwritten by the solution matrix X.\n\n@param[in]\nldb    [rocblas_int]\nldb specifies the first dimension of B. ldb >= max( 1, m ).\n\n@param[in]\ninvA    [void *]\ndevice pointer storing the inverse diagonal blocks of A.\ninvA is of dimension ( ld_invA, k ), where k is m\nwhen rocblas_side_left and\nis n when rocblas_side_right.\nld_invA must be equal to 128.\n\n@param[in]\ninvA_size [rocblas_int]\ninvA_size specifies the number of elements of device memory in invA.\n\n@param[in]\ncompute_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_dgemmt(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        B: *const f64,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgemmt(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgemmt(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ngemmt_batched performs matrix-matrix operations and updates the upper or lower triangular part of the result matrix:\n\nC_i = alpha*op( A_i )*op( B_i ) + beta*C_i, for i = 1, ..., batch_count,\n\nwhere op( X ) is one of\n\nop( X ) = X      or\nop( X ) = X**T   or\nop( X ) = X**H,\n\nalpha and beta are scalars. A, B  are general matrices and C is either an upper or lower triangular matrix, with\n\nop( A ) an n by k by batch_count matrices,\nop( B ) an k by n by batch_count matrices and\nC an n by n by batch_count matrices.\n\n@param[in]\nhandle    [rocblas_handle\nhandle to the rocblas library context queue.\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C is an upper triangular matrix\n- rocblas_fill_lower:  C is a  lower triangular matrix\n@param[in]\ntransA    [rocblas_operation]\n- rocblas_operation_none:    op(A_i) = A_i.\n- rocblas_operation_transpose:      op(A_i) = A_i^T\n- rocblas_operation_conjugate_transpose:  op(A_i) = A_i^H\n@param[in]\ntransB    [rocblas_operation]\n- rocblas_operation_none:    op(B_i) = B_i.\n- rocblas_operation_transpose:      op(B_i) = B_i^T\n- rocblas_operation_conjugate_transpose:  op(B_i) = B_i^H\n@param[in]\nn         [rocblas_int]\nnumber or rows of matrices op( A_i ), columns of op( B_i ), and (rows, columns) of C_i.\n@param[in]\nk         [rocblas_int]\nnumber of rows of matrices op( B_i ) and columns of op( A_i ).\n@param[in]\nalpha     device pointer or host pointer specifying the scalar alpha.\n@param[in]\nA         device array of device pointers storing each matrix A_i. If transa = rocblas_operation_none, then, the leading n-by-k part of the array contains each matrix A_i, otherwise the leading k-by-n part of the array contains each matrix A_i.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. If transA == rocblas_operation_none, must have lda >= max(1, n), otherwise, must have lda >= max(1, k).\n@param[in]\nB         device array of device pointers storing each matrix B_i. If transB = rocblas_operation_none, then, the leading k-by-n part of the array contains each matrix B_i, otherwise the leading n-by-k part of the array contains each matrix B_i.\n@param[in]\nldb       [rocblas_int]\nspecifies the leading dimension of each B_i. If transB == rocblas_operation_none, must have ldb >= max(1, k), otherwise, must have ldb >= max(1, n).\n@param[in]\nbeta      device pointer or host pointer specifying the scalar beta.\n@param[in, out]\nC         device array of device pointers storing each matrix C_i. If uplo == rocblas_fill_upper, the upper triangular part of the leading n-by-n array contains each matrix C_i, otherwise the lower triangular part of the leading n-by-n array contains each matrix C_i.\n@param[in]\nldc       [rocblas_int]\nspecifies the leading dimension of each C_i. Must have ldc >= max(1, n).\n@param[in]\nbatch_count\n[rocblas_int]\nnumber of gemm operations in the batch."]
+    pub fn rocblas_sgemmt_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: rocblas_int,
+        B: *const *const f32,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *const *mut f32,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemmt_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: rocblas_int,
+        B: *const *const f64,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *const *mut f64,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgemmt_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *const *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgemmt_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *const *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ngemmt_strided_batched performs matrix-matrix operations and updates the upper or lower triangular part of the result matrix:\n\nC_i = alpha*op( A_i )*op( B_i ) + beta*C_i, for i = 1, ..., batch_count,\n\nwhere op( X ) is one of\n\nop( X ) = X      or\nop( X ) = X**T   or\nop( X ) = X**H,\n\nalpha and beta are scalars. A, B  are general matrices and C is either an upper or lower triangular matrix, with\nop( A ) an n by k by batch_count strided_batched matrix,\nop( B ) an k by n by batch_count strided_batched matrix and\nC an n by n by batch_count strided_batched matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C is an upper triangular matrix\n- rocblas_fill_lower:  C is a  lower triangular matrix\n@param[in]\ntransA    [rocblas_operation]\n- rocblas_operation_none:    op(A_i) = A_i.\n- rocblas_operation_transpose:      op(A_i) = A_i^T\n- rocblas_operation_conjugate_transpose:  op(A_i) = A_i^H\n@param[in]\ntransB    [rocblas_operation]\n- rocblas_operation_none:    op(B_i) = B_i.\n- rocblas_operation_transpose:      op(B_i) = B_i^T\n- rocblas_operation_conjugate_transpose:  op(B_i) = B_i^H\n@param[in]\nn         [rocblas_int]\nnumber or rows of matrices op( A_i ), columns of op( B_i ), and (rows, columns) of C_i.\n@param[in]\nk         [rocblas_int]\nnumber of rows of matrices op( B_i ) and columns of op( A_i ).\n@param[in]\nalpha     device pointer or host pointer specifying the scalar alpha.\n@param[in]\nA         device array of device pointers storing each matrix A_i. If transa = rocblas_operation_none, then, the leading n-by-k part of the array contains each matrix A_i, otherwise the leading k-by-n part of the array contains each matrix A_i.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. If transA == rocblas_operation_none, must have lda >= max(1, n), otherwise, must have lda >= max(1, k).\n@param[in]\nstride_a  [rocblas_stride]\nstride from the start of one A_i matrix to the next A_(i + 1).\n@param[in]\nB         device array of device pointers storing each matrix B_i. If transB = rocblas_operation_none, then, the leading k-by-n part of the array contains each matrix B_i, otherwise the leading n-by-k part of the array contains each matrix B_i.\n@param[in]\nldb       [rocblas_int]\nspecifies the leading dimension of each B_i. If transB == rocblas_operation_none, must have ldb >= max(1, k), otherwise, must have ldb >= max(1, n).\n@param[in]\nstride_b  [rocblas_stride]\nstride from the start of one B_i matrix to the next B_(i + 1).\n@param[in]\nbeta      device pointer or host pointer specifying the scalar beta.\n@param[in, out]\nC         device array of device pointers storing each matrix C_i. If uplo == rocblas_fill_upper, the upper triangular part of the leading n-by-n array contains each matrix C_i, otherwise the lower triangular part of the leading n-by-n array contains each matrix C_i.\n@param[in]\nldc       [rocblas_int]\nspecifies the leading dimension of each C_i. Must have ldc >= max(1, n).\n@param[in]\nstride_c  [rocblas_stride]\nstride from the start of one C_i matrix to the next C_(i + 1).\n@param[in]\nbatch_count\n[rocblas_int]\nnumber of gemm operatons in the batch.\n"]
+    pub fn rocblas_sgemmt_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *const f32,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocblas_int,
+        stride_c: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemmt_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *const f64,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocblas_int,
+        stride_c: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgemmt_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        stride_c: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgemmt_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        stride_c: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\ngeam_ex performs one of the matrix-matrix operations:\n\nDij = min(alpha * (Aik + Bkj), beta * Cij)\nDij = min(alpha * Aik, alpha * Bkj) + beta * Cij\n\nalpha and beta are scalars, and A, B, C, and D are matrices, with\nop( A ) an m by k matrix, op( B ) a k by n matrix and C and D are m by n matrices.\nC and D may point to the same matrix if their type and leading dimensions are identical.\n\nAik refers to the element at the i-th row and k-th column of op( A ), Bkj refers to\nthe element at the k-th row and j-th column of op( B ), and Cij/Dij refers to the element\nat the i-th row and j-th column of C/D.\n\nSupported types are as follows:\n- rocblas_datatype_f64_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f32_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f16_r = a_type = b_type = c_type = d_type = compute_type\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntransA    [rocblas_operation]\nspecifies the form of op( A ).\n@param[in]\ntransB    [rocblas_operation]\nspecifies the form of op( B ).\n@param[in]\nm         [rocblas_int]\nmatrix dimension m.\n@param[in]\nn         [rocblas_int]\nmatrix dimension n.\n@param[in]\nk         [rocblas_int]\nmatrix dimension k.\n@param[in]\nalpha     [const void *]\ndevice pointer or host pointer specifying the scalar alpha. Same datatype as compute_type.\n@param[in]\nA         [void *]\ndevice pointer storing matrix A.\n@param[in]\na_type    [rocblas_datatype]\nspecifies the datatype of matrix A.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A\n\nif transA == N, must have lda >= max(1, m)\notherwise, must have lda >= max(1, k)\n@param[in]\nB         [void *]\ndevice pointer storing matrix B.\n@param[in]\nb_type    [rocblas_datatype]\nspecifies the datatype of matrix B.\n@param[in]\nldb       [rocblas_int]\nspecifies the leading dimension of B\n\nif transB == N, must have ldb >= max(1, k)\notherwise, must have ldb >= max(1, n)\n@param[in]\nbeta      [const void *]\ndevice pointer or host pointer specifying the scalar beta. Same datatype as compute_type.\n@param[in]\nC         [void *]\ndevice pointer storing matrix C.\n@param[in]\nc_type    [rocblas_datatype]\nspecifies the datatype of matrix C.\n@param[in]\nldc       [rocblas_int]\nspecifies the leading dimension of C, must have ldc >= max(1, m).\n@param[out]\nD         [void *]\ndevice pointer storing matrix D.\nIf D and C pointers are to the same matrix then d_type must equal c_type and ldd must equal ldc\nor the respective invalid status will be returned.\n@param[in]\nd_type    [rocblas_datatype]\nspecifies the datatype of matrix D.\n@param[in]\nldd       [rocblas_int]\nspecifies the leading dimension of D, must have ldd >= max(1, m).\n@param[in]\ncompute_type\n[rocblas_datatype]\nspecifies the datatype of computation.\n@param[in]\ngeam_ex_op [rocblas_geam_ex_operation]\nenumerant specifying the operation type, support for rocblas_geam_ex_operation_min_plus and rocblas_geam_ex_operation_plus_min.\n"]
+    pub fn rocblas_geam_ex(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const ::std::os::raw::c_void,
+        A: *const ::std::os::raw::c_void,
+        a_type: rocblas_datatype,
+        lda: rocblas_int,
+        B: *const ::std::os::raw::c_void,
+        b_type: rocblas_datatype,
+        ldb: rocblas_int,
+        beta: *const ::std::os::raw::c_void,
+        C: *const ::std::os::raw::c_void,
+        c_type: rocblas_datatype,
+        ldc: rocblas_int,
+        D: *mut ::std::os::raw::c_void,
+        d_type: rocblas_datatype,
+        ldd: rocblas_int,
+        compute_type: rocblas_datatype,
+        geam_ex_op: rocblas_geam_ex_operation,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\ntrsm_ex solves:\n\nop(A)*X = alpha*B or X*op(A) = alpha*B,\n\nwhere alpha is a scalar, X and B are m by n matrices,\nA is triangular matrix and op(A) is one of\n\nop( A ) = A   or   op( A ) = A^T   or   op( A ) = A^H.\n\nThe matrix X is overwritten on B.\n\nThis function gives the user the ability to reuse the invA matrix between runs.\nIf invA == NULL, rocblas_trsm_ex will automatically calculate invA on every run.\n\nSetting up invA:\nThe accepted invA matrix consists of the packed 128x128 inverses of the diagonal blocks of\nmatrix A, followed by any smaller diagonal block that remains.\nTo set up invA it is recommended that rocblas_trtri_batched be used with matrix A as the input.\n\nDevice memory of size 128 x k should be allocated for invA ahead of time, where k is m when\nrocblas_side_left and is n when rocblas_side_right. The actual number of elements in invA\nshould be passed as invA_size.\n\nTo begin, rocblas_trtri_batched must be called on the full 128x128-sized diagonal blocks of\nmatrix A. Below are the restricted parameters:\n- n = 128\n- ldinvA = 128\n- stride_invA = 128x128\n- batch_count = k / 128,\n\nThen any remaining block may be added:\n- n = k % 128\n- invA = invA + stride_invA * previous_batch_count\n- ldinvA = 128\n- batch_count = 1\n\nAlthough not widespread, some gemm kernels used by trsm_ex may use atomic operations.\nSee Atomic Operations in the API Reference Guide for more information.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside    [rocblas_side]\n- rocblas_side_left:       op(A)*X = alpha*B\n- rocblas_side_right:      X*op(A) = alpha*B\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\n- transB:    op(A) = A.\n- rocblas_operation_transpose:      op(A) = A^T\n- rocblas_operation_conjugate_transpose:  op(A) = A^H\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B. n >= 0.\n\n@param[in]\nalpha   [void *]\ndevice pointer or host pointer specifying the scalar alpha. When alpha is\n&zero then A is not referenced, and B need not be set before\nentry.\n\n@param[in]\nA       [void *]\ndevice pointer storing matrix A.\nof dimension ( lda, k ), where k is m\nwhen rocblas_side_left and\nis n when rocblas_side_right\nonly the upper/lower triangular part is accessed.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif side = rocblas_side_left,  lda >= max( 1, m ),\nif side = rocblas_side_right, lda >= max( 1, n ).\n\n@param[in, out]\nB       [void *]\ndevice pointer storing matrix B.\nB is of dimension ( ldb, n ).\nBefore entry, the leading m by n part of the array B must\ncontain the right-hand side matrix B, and on exit is\noverwritten by the solution matrix X.\n\n@param[in]\nldb    [rocblas_int]\nldb specifies the first dimension of B. ldb >= max( 1, m ).\n\n@param[in]\ninvA    [void *]\ndevice pointer storing the inverse diagonal blocks of A.\ninvA is of dimension ( ld_invA, k ), where k is m\nwhen rocblas_side_left and\nis n when rocblas_side_right.\nld_invA must be equal to 128.\n\n@param[in]\ninvA_size [rocblas_int]\ninvA_size specifies the number of elements of device memory in invA.\n\n@param[in]\ncompute_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
     pub fn rocblas_trsm_ex(
         handle: rocblas_handle,
         side: rocblas_side,
@@ -9677,7 +11959,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\naxpy_ex   computes constant alpha multiplied by vector x, plus vector y.\n\ny := alpha * x + y\n\nCurrently supported datatypes are as follows:\n\n-------------------------------------------------\n| alpha_type | x_type | y_type | execution_type |\n|------------|--------|--------|----------------|\n|  f16_r     | f16_r  |  f16_r |      f16_r     |\n|  f16_r     | f16_r  |  f16_r |      f32_r     |\n|  f32_r     | f16_r  |  f16_r |      f32_r     |\n|  f32_r     | f32_r  |  f32_r |      f32_r     |\n|  f64_r     | f64_r  |  f64_r |      f64_r     |\n|  f32_c     | f32_c  |  f32_c |      f32_c     |\n|  f64_c     | f64_c  |  f64_c |      f64_c     |\n-------------------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x and y.\n@param[in]\nalpha     device pointer or host pointer to specify the scalar alpha.\n@param[in]\nalpha_type [rocblas_datatype]\nspecifies the datatype of alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[inout]\ny         device pointer storing vector y.\n@param[in]\ny_type [rocblas_datatype]\nspecifies the datatype of vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\naxpy_ex   computes constant alpha multiplied by vector x, plus vector y.\n\ny := alpha * x + y\n\nCurrently supported datatypes are as follows:\n\n-------------------------------------------------\n| alpha_type | x_type | y_type | execution_type |\n|------------|--------|--------|----------------|\n|  bf16_r    | bf16_r |  bf16_r|      f32_r     |\n|  f32_r     | bf16_r |  bf16_r|      f32_r     |\n|  f16_r     | f16_r  |  f16_r |      f16_r     |\n|  f16_r     | f16_r  |  f16_r |      f32_r     |\n|  f32_r     | f16_r  |  f16_r |      f32_r     |\n|  f32_r     | f32_r  |  f32_r |      f32_r     |\n|  f64_r     | f64_r  |  f64_r |      f64_r     |\n|  f32_c     | f32_c  |  f32_c |      f32_c     |\n|  f64_c     | f64_c  |  f64_c |      f64_c     |\n-------------------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x and y.\n@param[in]\nalpha     device pointer or host pointer to specify the scalar alpha.\n@param[in]\nalpha_type [rocblas_datatype]\nspecifies the datatype of alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in, out]\ny         device pointer storing vector y.\n@param[in]\ny_type [rocblas_datatype]\nspecifies the datatype of vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
     pub fn rocblas_axpy_ex(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -9694,7 +11976,23 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\naxpy_batched_ex   computes constant alpha multiplied by vector x, plus vector y over\na set of batched vectors.\n\ny := alpha * x + y\n\nCurrently supported datatypes are as follows:\n\n-------------------------------------------------\n| alpha_type | x_type | y_type | execution_type |\n|------------|--------|--------|----------------|\n|  f16_r     | f16_r  |  f16_r |      f16_r     |\n|  f16_r     | f16_r  |  f16_r |      f32_r     |\n|  f32_r     | f16_r  |  f16_r |      f32_r     |\n|  f32_r     | f32_r  |  f32_r |      f32_r     |\n|  f64_r     | f64_r  |  f64_r |      f64_r     |\n|  f32_c     | f32_c  |  f32_c |      f32_c     |\n|  f64_c     | f64_c  |  f64_c |      f64_c     |\n-------------------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i and y_i.\n@param[in]\nalpha     device pointer or host pointer to specify the scalar alpha.\n@param[in]\nalpha_type [rocblas_datatype]\nspecifies the datatype of alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[inout]\ny         device array of device pointers storing each vector y_i.\n@param[in]\ny_type [rocblas_datatype]\nspecifies the datatype of each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_axpy_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const ::std::os::raw::c_void,
+        alpha_type: rocblas_datatype,
+        x: *const ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        y: *mut ::std::os::raw::c_void,
+        y_type: rocblas_datatype,
+        incy: i64,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\naxpy_batched_ex   computes constant alpha multiplied by vector x, plus vector y over\na set of batched vectors.\n\ny := alpha * x + y\n\nCurrently supported datatypes are as follows:\n\n-------------------------------------------------\n| alpha_type | x_type | y_type | execution_type |\n|------------|--------|--------|----------------|\n|  bf16_r    | bf16_r |  bf16_r|      f32_r     |\n|  f32_r     | bf16_r |  bf16_r|      f32_r     |\n|  f16_r     | f16_r  |  f16_r |      f16_r     |\n|  f16_r     | f16_r  |  f16_r |      f32_r     |\n|  f32_r     | f16_r  |  f16_r |      f32_r     |\n|  f32_r     | f32_r  |  f32_r |      f32_r     |\n|  f64_r     | f64_r  |  f64_r |      f64_r     |\n|  f32_c     | f32_c  |  f32_c |      f32_c     |\n|  f64_c     | f64_c  |  f64_c |      f64_c     |\n-------------------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i and y_i.\n@param[in]\nalpha     device pointer or host pointer to specify the scalar alpha.\n@param[in]\nalpha_type [rocblas_datatype]\nspecifies the datatype of alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in, out]\ny         device array of device pointers storing each vector y_i.\n@param[in]\ny_type [rocblas_datatype]\nspecifies the datatype of each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
     pub fn rocblas_axpy_batched_ex(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -9712,7 +12010,24 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\naxpy_strided_batched_ex   computes constant alpha multiplied by vector x, plus vector y over\na set of strided batched vectors.\n\ny := alpha * x + y\n\nCurrently supported datatypes are as follows:\n\n-------------------------------------------------\n| alpha_type | x_type | y_type | execution_type |\n|------------|--------|--------|----------------|\n|  f16_r     | f16_r  |  f16_r |      f16_r     |\n|  f16_r     | f16_r  |  f16_r |      f32_r     |\n|  f32_r     | f16_r  |  f16_r |      f32_r     |\n|  f32_r     | f32_r  |  f32_r |      f32_r     |\n|  f64_r     | f64_r  |  f64_r |      f64_r     |\n|  f32_c     | f32_c  |  f32_c |      f32_c     |\n|  f64_c     | f64_c  |  f64_c |      f64_c     |\n-------------------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i and y_i.\n@param[in]\nalpha     device pointer or host pointer to specify the scalar alpha.\n@param[in]\nalpha_type [rocblas_datatype]\nspecifies the datatype of alpha.\n@param[in]\nx         device pointer to the first vector x_1.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstridex   [rocblas_stride]\nstride from the start of one vector (x_i) to the next one (x_i+1).\nThere are no restrictions placed on stridex. However, ensure that stridex is of appropriate size. For a typical\ncase this means stridex >= n * incx.\n@param[inout]\ny         device pointer to the first vector y_1.\n@param[in]\ny_type [rocblas_datatype]\nspecifies the datatype of each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nstridey   [rocblas_stride]\nstride from the start of one vector (y_i) to the next one (y_i+1).\nThere are no restrictions placed on stridey. However, ensure that stridey is of appropriate size. For a typical\ncase this means stridey >= n * incy.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_axpy_batched_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const ::std::os::raw::c_void,
+        alpha_type: rocblas_datatype,
+        x: *const ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        y: *mut ::std::os::raw::c_void,
+        y_type: rocblas_datatype,
+        incy: i64,
+        batch_count: i64,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\naxpy_strided_batched_ex   computes constant alpha multiplied by vector x, plus vector y over\na set of strided batched vectors.\n\ny := alpha * x + y\n\nCurrently supported datatypes are as follows:\n\n-------------------------------------------------\n| alpha_type | x_type | y_type | execution_type |\n|------------|--------|--------|----------------|\n|  bf16_r    | bf16_r |  bf16_r|      f32_r     |\n|  f32_r     | bf16_r |  bf16_r|      f32_r     |\n|  f16_r     | f16_r  |  f16_r |      f16_r     |\n|  f16_r     | f16_r  |  f16_r |      f32_r     |\n|  f32_r     | f16_r  |  f16_r |      f32_r     |\n|  f32_r     | f32_r  |  f32_r |      f32_r     |\n|  f64_r     | f64_r  |  f64_r |      f64_r     |\n|  f32_c     | f32_c  |  f32_c |      f32_c     |\n|  f64_c     | f64_c  |  f64_c |      f64_c     |\n-------------------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i and y_i.\n@param[in]\nalpha     device pointer or host pointer to specify the scalar alpha.\n@param[in]\nalpha_type [rocblas_datatype]\nspecifies the datatype of alpha.\n@param[in]\nx         device pointer to the first vector x_1.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstridex   [rocblas_stride]\nstride from the start of one vector (x_i) to the next one (x_i+1).\nThere are no restrictions placed on stridex. However, ensure that stridex is of appropriate size. For a typical\ncase this means stridex >= n * incx.\n@param[in, out]\ny         device pointer to the first vector y_1.\n@param[in]\ny_type [rocblas_datatype]\nspecifies the datatype of each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nstridey   [rocblas_stride]\nstride from the start of one vector (y_i) to the next one (y_i+1).\nThere are no restrictions placed on stridey. However, ensure that stridey is of appropriate size. For a typical\ncase this means stridey >= n * incy.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
     pub fn rocblas_axpy_strided_batched_ex(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -9732,7 +12047,26 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\ndot_ex  performs the dot product of vectors x and y.\n\nresult = x * y;\n\ndotc_ex  performs the dot product of the conjugate of complex vector x and complex vector y\n\nresult = conjugate (x) * y;\n\nCurrently supported datatypes are as follows:\n\n--------------------------------------------------\n| x_type | y_type | result_type | execution_type |\n|--------|--------|-------------|----------------|\n| f16_r  | f16_r  |    f16_r    |     f16_r      |\n| f16_r  | f16_r  |    f16_r    |     f32_r      |\n| bf16_r | bf16_r |    bf16_r   |     f32_r      |\n| f32_r  | f32_r  |    f32_r    |     f32_r      |\n| f64_r  | f64_r  |    f64_r    |     f64_r      |\n| f32_c  | f32_c  |    f32_c    |     f32_c      |\n| f64_c  | f64_c  |    f64_c    |     f64_c      |\n--------------------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x and y.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\ny         device pointer storing vector y.\n@param[in]\ny_type [rocblas_datatype]\nspecifies the datatype of vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[inout]\nresult\ndevice pointer or host pointer to store the dot product.\nreturn is 0.0 if n <= 0.\n@param[in]\nresult_type [rocblas_datatype]\nspecifies the datatype of the result.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_axpy_strided_batched_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const ::std::os::raw::c_void,
+        alpha_type: rocblas_datatype,
+        x: *const ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut ::std::os::raw::c_void,
+        y_type: rocblas_datatype,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\ndot_ex  performs the dot product of vectors x and y.\n\nresult = x * y;\n\ndotc_ex  performs the dot product of the conjugate of complex vector x and complex vector y\n\nresult = conjugate (x) * y;\n\nCurrently supported datatypes are as follows:\n\n--------------------------------------------------\n| x_type | y_type | result_type | execution_type |\n|--------|--------|-------------|----------------|\n| f16_r  | f16_r  |    f16_r    |     f16_r      |\n| f16_r  | f16_r  |    f16_r    |     f32_r      |\n| bf16_r | bf16_r |    bf16_r   |     f32_r      |\n| f32_r  | f32_r  |    f32_r    |     f32_r      |\n| f64_r  | f64_r  |    f64_r    |     f64_r      |\n| f32_c  | f32_c  |    f32_c    |     f32_c      |\n| f64_c  | f64_c  |    f64_c    |     f64_c      |\n--------------------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x and y.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\ny         device pointer storing vector y.\n@param[in]\ny_type [rocblas_datatype]\nspecifies the datatype of vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in, out]\nresult\ndevice pointer or host pointer to store the dot product.\nreturn is 0.0 if n <= 0.\n@param[in]\nresult_type [rocblas_datatype]\nspecifies the datatype of the result.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
     pub fn rocblas_dot_ex(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -9765,7 +12099,39 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\ndot_batched_ex performs a batch of dot products of vectors x and y.\n\nresult_i = x_i * y_i;\n\ndotc_batched_ex  performs a batch of dot products of the conjugate of complex vector x and complex vector y\n\nresult_i = conjugate (x_i) * y_i;\n\nwhere (x_i, y_i) is the i-th instance of the batch.\nx_i and y_i are vectors, for i = 1, ..., batch_count\n\nCurrently supported datatypes are as follows:\n\n--------------------------------------------------\n| x_type | y_type | result_type | execution_type |\n|--------|--------|-------------|----------------|\n| f16_r  | f16_r  |    f16_r    |     f16_r      |\n| f16_r  | f16_r  |    f16_r    |     f32_r      |\n| bf16_r | bf16_r |    bf16_r   |     f32_r      |\n| f32_r  | f32_r  |    f32_r    |     f32_r      |\n| f64_r  | f64_r  |    f64_r    |     f64_r      |\n| f32_c  | f32_c  |    f32_c    |     f32_c      |\n| f64_c  | f64_c  |    f64_c    |     f64_c      |\n--------------------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i and y_i.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\ny         device array of device pointers storing each vector y_i.\n@param[in]\ny_type [rocblas_datatype]\nspecifies the datatype of each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[inout]\nresult\ndevice array or host array of batch_count size to store the dot products of each batch.\nreturn 0.0 for each element if n <= 0.\n@param[in]\nresult_type [rocblas_datatype]\nspecifies the datatype of the result.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_dot_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        y: *const ::std::os::raw::c_void,
+        y_type: rocblas_datatype,
+        incy: i64,
+        result: *mut ::std::os::raw::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dotc_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        y: *const ::std::os::raw::c_void,
+        y_type: rocblas_datatype,
+        incy: i64,
+        result: *mut ::std::os::raw::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\ndot_batched_ex performs a batch of dot products of vectors x and y.\n\nresult_i = x_i * y_i;\n\ndotc_batched_ex  performs a batch of dot products of the conjugate of complex vector x and complex vector y\n\nresult_i = conjugate (x_i) * y_i;\n\nwhere (x_i, y_i) is the i-th instance of the batch.\nx_i and y_i are vectors, for i = 1, ..., batch_count\n\nCurrently supported datatypes are as follows:\n\n--------------------------------------------------\n| x_type | y_type | result_type | execution_type |\n|--------|--------|-------------|----------------|\n| f16_r  | f16_r  |    f16_r    |     f16_r      |\n| f16_r  | f16_r  |    f16_r    |     f32_r      |\n| bf16_r | bf16_r |    bf16_r   |     f32_r      |\n| f32_r  | f32_r  |    f32_r    |     f32_r      |\n| f64_r  | f64_r  |    f64_r    |     f64_r      |\n| f32_c  | f32_c  |    f32_c    |     f32_c      |\n| f64_c  | f64_c  |    f64_c    |     f64_c      |\n--------------------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i and y_i.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\ny         device array of device pointers storing each vector y_i.\n@param[in]\ny_type [rocblas_datatype]\nspecifies the datatype of each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[in, out]\nresult\ndevice array or host array of batch_count size to store the dot products of each batch.\nreturn 0.0 for each element if n <= 0.\n@param[in]\nresult_type [rocblas_datatype]\nspecifies the datatype of the result.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
     pub fn rocblas_dot_batched_ex(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -9800,7 +12166,41 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\ndot_strided_batched_ex  performs a batch of dot products of vectors x and y.\n\nresult_i = x_i * y_i;\n\ndotc_strided_batched_ex  performs a batch of dot products of the conjugate of complex vector x and complex vector y\n\nresult_i = conjugate (x_i) * y_i;\n\nwhere (x_i, y_i) is the i-th instance of the batch.\nx_i and y_i are vectors, for i = 1, ..., batch_count\n\nCurrently supported datatypes are as follows:\n\n--------------------------------------------------\n| x_type | y_type | result_type | execution_type |\n|--------|--------|-------------|----------------|\n| f16_r  | f16_r  |    f16_r    |     f16_r      |\n| f16_r  | f16_r  |    f16_r    |     f32_r      |\n| bf16_r | bf16_r |    bf16_r   |     f32_r      |\n| f32_r  | f32_r  |    f32_r    |     f32_r      |\n| f64_r  | f64_r  |    f64_r    |     f64_r      |\n| f32_c  | f32_c  |    f32_c    |     f32_c      |\n| f64_c  | f64_c  |    f64_c    |     f64_c      |\n--------------------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i and y_i.\n@param[in]\nx         device pointer to the first vector (x_1) in the batch.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x    [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1)\n@param[in]\ny         device pointer to the first vector (y_1) in the batch.\n@param[in]\ny_type [rocblas_datatype]\nspecifies the datatype of each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nstride_y    [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1)\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[inout]\nresult\ndevice array or host array of batch_count size to store the dot products of each batch.\nreturn 0.0 for each element if n <= 0.\n@param[in]\nresult_type [rocblas_datatype]\nspecifies the datatype of the result.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_dot_batched_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        y: *const ::std::os::raw::c_void,
+        y_type: rocblas_datatype,
+        incy: i64,
+        batch_count: i64,
+        result: *mut ::std::os::raw::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dotc_batched_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        y: *const ::std::os::raw::c_void,
+        y_type: rocblas_datatype,
+        incy: i64,
+        batch_count: i64,
+        result: *mut ::std::os::raw::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\ndot_strided_batched_ex  performs a batch of dot products of vectors x and y.\n\nresult_i = x_i * y_i;\n\ndotc_strided_batched_ex  performs a batch of dot products of the conjugate of complex vector x and complex vector y\n\nresult_i = conjugate (x_i) * y_i;\n\nwhere (x_i, y_i) is the i-th instance of the batch.\nx_i and y_i are vectors, for i = 1, ..., batch_count\n\nCurrently supported datatypes are as follows:\n\n--------------------------------------------------\n| x_type | y_type | result_type | execution_type |\n|--------|--------|-------------|----------------|\n| f16_r  | f16_r  |    f16_r    |     f16_r      |\n| f16_r  | f16_r  |    f16_r    |     f32_r      |\n| bf16_r | bf16_r |    bf16_r   |     f32_r      |\n| f32_r  | f32_r  |    f32_r    |     f32_r      |\n| f64_r  | f64_r  |    f64_r    |     f64_r      |\n| f32_c  | f32_c  |    f32_c    |     f32_c      |\n| f64_c  | f64_c  |    f64_c    |     f64_c      |\n--------------------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i and y_i.\n@param[in]\nx         device pointer to the first vector (x_1) in the batch.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x    [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1)\n@param[in]\ny         device pointer to the first vector (y_1) in the batch.\n@param[in]\ny_type [rocblas_datatype]\nspecifies the datatype of each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nstride_y    [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1)\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[in, out]\nresult\ndevice array or host array of batch_count size to store the dot products of each batch.\nreturn 0.0 for each element if n <= 0.\n@param[in]\nresult_type [rocblas_datatype]\nspecifies the datatype of the result.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
     pub fn rocblas_dot_strided_batched_ex(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -9839,7 +12239,45 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief BLAS_EX API\n\n\\details\nnrm2_ex computes the euclidean norm of a real or complex vector.\n\nresult := sqrt( x'*x ) for real vectors\nresult := sqrt( x**H*x ) for complex vectors\n\nCurrently supported datatypes are as follows:\n\n-------------------------------------\n|  x_type | result | execution_type |\n|---------|--------|----------------|\n|  f16_r  |  f16_r |     f32_r      |\n|  f32_r  |  f32_r |     f32_r      |\n|  f64_r  |  f64_r |     f64_r      |\n|  f32_c  |  f32_r |     f32_r      |\n|  f64_c  |  f64_r |     f64_r      |\n-------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of the vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[inout]\nresults\ndevice pointer or host pointer to store the nrm2 product.\nreturn is 0.0 if n, incx<=0.\n@param[in]\nresult_type [rocblas_datatype]\nspecifies the datatype of the result.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation."]
+    pub fn rocblas_dot_strided_batched_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *const ::std::os::raw::c_void,
+        y_type: rocblas_datatype,
+        incy: i64,
+        stride_y: rocblas_stride,
+        batch_count: i64,
+        result: *mut ::std::os::raw::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dotc_strided_batched_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *const ::std::os::raw::c_void,
+        y_type: rocblas_datatype,
+        incy: i64,
+        stride_y: rocblas_stride,
+        batch_count: i64,
+        result: *mut ::std::os::raw::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief BLAS_EX API\n\n\\details\nnrm2_ex computes the euclidean norm of a real or complex vector.\n\nresult := sqrt( x'*x ) for real vectors\nresult := sqrt( x**H*x ) for complex vectors\n\nCurrently supported datatypes are as follows:\n\n-------------------------------------\n|  x_type | result | execution_type |\n|---------|--------|----------------|\n|  bf16_r |  bf16_r|     f32_r      |\n|  f16_r  |  f16_r |     f32_r      |\n|  f32_r  |  f32_r |     f32_r      |\n|  f64_r  |  f64_r |     f64_r      |\n|  f32_c  |  f32_r |     f32_r      |\n|  f64_c  |  f64_r |     f64_r      |\n-------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of the vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in, out]\nresults\ndevice pointer or host pointer to store the nrm2 product.\nreturn is 0.0 if n, incx<=0.\n@param[in]\nresult_type [rocblas_datatype]\nspecifies the datatype of the result.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
     pub fn rocblas_nrm2_ex(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -9853,7 +12291,20 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief BLAS_EX API\n\n\\details\nnrm2_batched_ex computes the euclidean norm over a batch of real or complex vectors.\n\nresult := sqrt( x_i'*x_i ) for real vectors x, for i = 1, ..., batch_count\nresult := sqrt( x_i**H*x_i ) for complex vectors x, for i = 1, ..., batch_count\n\nCurrently supported datatypes are as follows:\n\n-------------------------------------\n|  x_type | result | execution_type |\n|---------|--------|----------------|\n|  f16_r  |  f16_r |     f32_r      |\n|  f32_r  |  f32_r |     f32_r      |\n|  f64_r  |  f64_r |     f64_r      |\n|  f32_c  |  f32_r |     f32_r      |\n|  f64_c  |  f64_r |     f64_r      |\n-------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nnumber of elements in each x_i.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i. incx must be > 0.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[out]\nresults\ndevice pointer or host pointer to array of batch_count size for nrm2 results.\nreturn is 0.0 for each element if n <= 0, incx<=0.\n@param[in]\nresult_type [rocblas_datatype]\nspecifies the datatype of the result.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_nrm2_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        results: *mut ::std::os::raw::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief BLAS_EX API\n\n\\details\nnrm2_batched_ex computes the euclidean norm over a batch of real or complex vectors.\n\nresult := sqrt( x_i'*x_i ) for real vectors x, for i = 1, ..., batch_count\nresult := sqrt( x_i**H*x_i ) for complex vectors x, for i = 1, ..., batch_count\n\nCurrently supported datatypes are as follows:\n\n-------------------------------------\n|  x_type | result | execution_type |\n|---------|--------|----------------|\n|  bf16_r |  bf16_r|     f32_r      |\n|  f16_r  |  f16_r |     f32_r      |\n|  f32_r  |  f32_r |     f32_r      |\n|  f64_r  |  f64_r |     f64_r      |\n|  f32_c  |  f32_r |     f32_r      |\n|  f64_c  |  f64_r |     f64_r      |\n-------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nnumber of elements in each x_i.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i. incx must be > 0.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[out]\nresults\ndevice pointer or host pointer to array of batch_count size for nrm2 results.\nreturn is 0.0 for each element if n <= 0, incx<=0.\n@param[in]\nresult_type [rocblas_datatype]\nspecifies the datatype of the result.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
     pub fn rocblas_nrm2_batched_ex(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -9868,7 +12319,21 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief BLAS_EX API\n\n\\details\nnrm2_strided_batched_ex computes the euclidean norm over a batch of real or complex vectors.\n\nresult := sqrt( x_i'*x_i ) for real vectors x, for i = 1, ..., batch_count\nresult := sqrt( x_i**H*x_i ) for complex vectors, for i = 1, ..., batch_count\n\nCurrently supported datatypes are as follows:\n\n-------------------------------------\n|  x_type | result | execution_type |\n|---------|--------|----------------|\n|  f16_r  |  f16_r |     f32_r      |\n|  f32_r  |  f32_r |     f32_r      |\n|  f64_r  |  f64_r |     f64_r      |\n|  f32_c  |  f32_r |     f32_r      |\n|  f64_c  |  f64_r |     f64_r      |\n-------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nnumber of elements in each x_i.\n@param[in]\nx         device pointer to the first vector x_1.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i. incx must be > 0.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\nThere are no restrictions placed on stride_x. However, ensure that stride_x is of appropriate size. For a typical\ncase this means stride_x >= n * incx.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[out]\nresults\ndevice pointer or host pointer to array for storing contiguous batch_count results.\nreturn is 0.0 for each element if n <= 0, incx<=0.\n@param[in]\nresult_type [rocblas_datatype]\nspecifies the datatype of the result.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_nrm2_batched_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        batch_count: i64,
+        results: *mut ::std::os::raw::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief BLAS_EX API\n\n\\details\nnrm2_strided_batched_ex computes the euclidean norm over a batch of real or complex vectors.\n\nresult := sqrt( x_i'*x_i ) for real vectors x, for i = 1, ..., batch_count\nresult := sqrt( x_i**H*x_i ) for complex vectors, for i = 1, ..., batch_count\n\nCurrently supported datatypes are as follows:\n\n-------------------------------------\n|  x_type | result | execution_type |\n|---------|--------|----------------|\n|  bf16_r |  bf16_r|     f32_r      |\n|  f16_r  |  f16_r |     f32_r      |\n|  f32_r  |  f32_r |     f32_r      |\n|  f64_r  |  f64_r |     f64_r      |\n|  f32_c  |  f32_r |     f32_r      |\n|  f64_c  |  f64_r |     f64_r      |\n-------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nnumber of elements in each x_i.\n@param[in]\nx         device pointer to the first vector x_1.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i. incx must be > 0.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\nThere are no restrictions placed on stride_x. However, ensure that stride_x is of appropriate size. For a typical\ncase this means stride_x >= n * incx.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[out]\nresults\ndevice pointer or host pointer to array for storing contiguous batch_count results.\nreturn is 0.0 for each element if n <= 0, incx<=0.\n@param[in]\nresult_type [rocblas_datatype]\nspecifies the datatype of the result.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
     pub fn rocblas_nrm2_strided_batched_ex(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -9884,7 +12349,22 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\nrot_ex applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to vectors x and y.\nScalars c and s may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode.\n\nIn the case where cs_type is real:\n\nx := c * x + s * y\ny := c * y - s * x\n\nIn the case where cs_type is complex, the imaginary part of c is ignored:\n\nx := real(c) * x + s * y\ny := real(c) * y - conj(s) * x\n\nCurrently supported datatypes are as follows:\n\n------------------------------------------------\n|  x_type | y_type  | cs_type | execution_type |\n|---------|---------|---------|----------------|\n|  bf16_r |  bf16_r | bf16_r  |  f32_r         |\n|  f16_r  |  f16_r  | f16_r   |  f32_r         |\n|  f32_r  |  f32_r  | f32_r   |  f32_r         |\n|  f64_r  |  f64_r  | f64_r   |  f64_r         |\n|  f32_c  |  f32_c  | f32_c   |  f32_c         |\n|  f32_c  |  f32_c  | f32_r   |  f32_c         |\n|  f64_c  |  f64_c  | f64_c   |  f64_c         |\n|  f64_c  |  f64_c  | f64_r   |  f64_c         |\n------------------------------------------------\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn       [rocblas_int]\nnumber of elements in the x and y vectors.\n@param[inout]\nx       device pointer storing vector x.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of vector x.\n@param[in]\nincx    [rocblas_int]\nspecifies the increment between elements of x.\n@param[inout]\ny       device pointer storing vector y.\n@param[in]\ny_type [rocblas_datatype]\nspecifies the datatype of vector y.\n@param[in]\nincy    [rocblas_int]\nspecifies the increment between elements of y.\n@param[in]\nc       device pointer or host pointer storing scalar cosine component of the rotation matrix.\n@param[in]\ns       device pointer or host pointer storing scalar sine component of the rotation matrix.\n@param[in]\ncs_type [rocblas_datatype]\nspecifies the datatype of c and s.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_nrm2_strided_batched_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+        results: *mut ::std::os::raw::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\nrot_ex applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to vectors x and y.\nScalars c and s may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode.\n\nIn the case where cs_type is real:\n\nx := c * x + s * y\ny := c * y - s * x\n\nIn the case where cs_type is complex, the imaginary part of c is ignored:\n\nx := real(c) * x + s * y\ny := real(c) * y - conj(s) * x\n\nCurrently supported datatypes are as follows:\n\n------------------------------------------------\n|  x_type | y_type  | cs_type | execution_type |\n|---------|---------|---------|----------------|\n|  bf16_r |  bf16_r | bf16_r  |  f32_r         |\n|  f16_r  |  f16_r  | f16_r   |  f32_r         |\n|  f32_r  |  f32_r  | f32_r   |  f32_r         |\n|  f64_r  |  f64_r  | f64_r   |  f64_r         |\n|  f32_c  |  f32_c  | f32_c   |  f32_c         |\n|  f32_c  |  f32_c  | f32_r   |  f32_c         |\n|  f64_c  |  f64_c  | f64_c   |  f64_c         |\n|  f64_c  |  f64_c  | f64_r   |  f64_c         |\n------------------------------------------------\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn       [rocblas_int]\nnumber of elements in the x and y vectors.\n@param[in, out]\nx       device pointer storing vector x.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of vector x.\n@param[in]\nincx    [rocblas_int]\nspecifies the increment between elements of x.\n@param[in, out]\ny       device pointer storing vector y.\n@param[in]\ny_type [rocblas_datatype]\nspecifies the datatype of vector y.\n@param[in]\nincy    [rocblas_int]\nspecifies the increment between elements of y.\n@param[in]\nc       device pointer or host pointer storing scalar cosine component of the rotation matrix.\n@param[in]\ns       device pointer or host pointer storing scalar sine component of the rotation matrix.\n@param[in]\ncs_type [rocblas_datatype]\nspecifies the datatype of c and s.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
     pub fn rocblas_rot_ex(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -9902,7 +12382,24 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\nrot_batched_ex applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to batched vectors x_i and y_i, for i = 1, ..., batch_count.\nScalars c and s may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode.\n\nIn the case where cs_type is real:\n\nx := c * x + s * y\ny := c * y - s * x\n\nIn the case where cs_type is complex, the imaginary part of c is ignored:\n\nx := real(c) * x + s * y\ny := real(c) * y - conj(s) * x\n\nCurrently supported datatypes are as follows:\n\n------------------------------------------------\n|  x_type | y_type  | cs_type | execution_type |\n|---------|---------|---------|----------------|\n|  bf16_r |  bf16_r | bf16_r  |  f32_r         |\n|  f16_r  |  f16_r  | f16_r   |  f32_r         |\n|  f32_r  |  f32_r  | f32_r   |  f32_r         |\n|  f64_r  |  f64_r  | f64_r   |  f64_r         |\n|  f32_c  |  f32_c  | f32_c   |  f32_c         |\n|  f32_c  |  f32_c  | f32_r   |  f32_c         |\n|  f64_c  |  f64_c  | f64_c   |  f64_c         |\n|  f64_c  |  f64_c  | f64_r   |  f64_c         |\n------------------------------------------------\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn       [rocblas_int]\nnumber of elements in each x_i and y_i vectors.\n@param[inout]\nx       device array of deivce pointers storing each vector x_i.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx    [rocblas_int]\nspecifies the increment between elements of each x_i.\n@param[inout]\ny       device array of device pointers storing each vector y_i.\n@param[in]\ny_type [rocblas_datatype]\nspecifies the datatype of each vector y_i.\n@param[in]\nincy    [rocblas_int]\nspecifies the increment between elements of each y_i.\n@param[in]\nc       device pointer or host pointer to scalar cosine component of the rotation matrix.\n@param[in]\ns       device pointer or host pointer to scalar sine component of the rotation matrix.\n@param[in]\ncs_type [rocblas_datatype]\nspecifies the datatype of c and s.\n@param[in]\nbatch_count [rocblas_int]\nthe number of x and y arrays, the number of batches.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_rot_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        y: *mut ::std::os::raw::c_void,
+        y_type: rocblas_datatype,
+        incy: i64,
+        c: *const ::std::os::raw::c_void,
+        s: *const ::std::os::raw::c_void,
+        cs_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\nrot_batched_ex applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to batched vectors x_i and y_i, for i = 1, ..., batch_count.\nScalars c and s may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode.\n\nIn the case where cs_type is real:\n\nx := c * x + s * y\ny := c * y - s * x\n\nIn the case where cs_type is complex, the imaginary part of c is ignored:\n\nx := real(c) * x + s * y\ny := real(c) * y - conj(s) * x\n\nCurrently supported datatypes are as follows:\n\n------------------------------------------------\n|  x_type | y_type  | cs_type | execution_type |\n|---------|---------|---------|----------------|\n|  bf16_r |  bf16_r | bf16_r  |  f32_r         |\n|  f16_r  |  f16_r  | f16_r   |  f32_r         |\n|  f32_r  |  f32_r  | f32_r   |  f32_r         |\n|  f64_r  |  f64_r  | f64_r   |  f64_r         |\n|  f32_c  |  f32_c  | f32_c   |  f32_c         |\n|  f32_c  |  f32_c  | f32_r   |  f32_c         |\n|  f64_c  |  f64_c  | f64_c   |  f64_c         |\n|  f64_c  |  f64_c  | f64_r   |  f64_c         |\n------------------------------------------------\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn       [rocblas_int]\nnumber of elements in each x_i and y_i vectors.\n@param[in, out]\nx       device array of deivce pointers storing each vector x_i.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx    [rocblas_int]\nspecifies the increment between elements of each x_i.\n@param[in, out]\ny       device array of device pointers storing each vector y_i.\n@param[in]\ny_type [rocblas_datatype]\nspecifies the datatype of each vector y_i.\n@param[in]\nincy    [rocblas_int]\nspecifies the increment between elements of each y_i.\n@param[in]\nc       device pointer or host pointer to scalar cosine component of the rotation matrix.\n@param[in]\ns       device pointer or host pointer to scalar sine component of the rotation matrix.\n@param[in]\ncs_type [rocblas_datatype]\nspecifies the datatype of c and s.\n@param[in]\nbatch_count [rocblas_int]\nthe number of x and y arrays, the number of batches.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
     pub fn rocblas_rot_batched_ex(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -9921,7 +12418,25 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrot_strided_batched_ex applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to strided batched vectors x_i and y_i, for i = 1, ..., batch_count.\nScalars c and s may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode.\n\nIn the case where cs_type is real:\n\nx := c * x + s * y\ny := c * y - s * x\n\nIn the case where cs_type is complex, the imaginary part of c is ignored:\n\nx := real(c) * x + s * y\ny := real(c) * y - conj(s) * x\n\nCurrently supported datatypes are as follows:\n\n------------------------------------------------\n|  x_type | y_type  | cs_type | execution_type |\n|---------|---------|---------|----------------|\n|  bf16_r |  bf16_r | bf16_r  |  f32_r         |\n|  f16_r  |  f16_r  | f16_r   |  f32_r         |\n|  f32_r  |  f32_r  | f32_r   |  f32_r         |\n|  f64_r  |  f64_r  | f64_r   |  f64_r         |\n|  f32_c  |  f32_c  | f32_c   |  f32_c         |\n|  f32_c  |  f32_c  | f32_r   |  f32_c         |\n|  f64_c  |  f64_c  | f64_c   |  f64_c         |\n|  f64_c  |  f64_c  | f64_r   |  f64_c         |\n------------------------------------------------\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn       [rocblas_int]\nnumber of elements in each x_i and y_i vectors.\n@param[inout]\nx       device pointer to the first vector x_1.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx    [rocblas_int]\nspecifies the increment between elements of each x_i.\n@param[in]\nstride_x [rocblas_stride]\nspecifies the increment from the beginning of x_i to the beginning of x_(i+1)\n@param[inout]\ny       device pointer to the first vector y_1.\n@param[in]\ny_type [rocblas_datatype]\nspecifies the datatype of each vector y_i.\n@param[in]\nincy    [rocblas_int]\nspecifies the increment between elements of each y_i.\n@param[in]\nstride_y [rocblas_stride]\nspecifies the increment from the beginning of y_i to the beginning of y_(i+1)\n@param[in]\nc       device pointer or host pointer to scalar cosine component of the rotation matrix.\n@param[in]\ns       device pointer or host pointer to scalar sine component of the rotation matrix.\n@param[in]\ncs_type [rocblas_datatype]\nspecifies the datatype of c and s.\n@param[in]\nbatch_count [rocblas_int]\nthe number of x and y arrays, the number of batches.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_rot_batched_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        y: *mut ::std::os::raw::c_void,
+        y_type: rocblas_datatype,
+        incy: i64,
+        c: *const ::std::os::raw::c_void,
+        s: *const ::std::os::raw::c_void,
+        cs_type: rocblas_datatype,
+        batch_count: i64,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrot_strided_batched_ex applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to strided batched vectors x_i and y_i, for i = 1, ..., batch_count.\nScalars c and s may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode.\n\nIn the case where cs_type is real:\n\nx := c * x + s * y\ny := c * y - s * x\n\nIn the case where cs_type is complex, the imaginary part of c is ignored:\n\nx := real(c) * x + s * y\ny := real(c) * y - conj(s) * x\n\nCurrently supported datatypes are as follows:\n\n------------------------------------------------\n|  x_type | y_type  | cs_type | execution_type |\n|---------|---------|---------|----------------|\n|  bf16_r |  bf16_r | bf16_r  |  f32_r         |\n|  f16_r  |  f16_r  | f16_r   |  f32_r         |\n|  f32_r  |  f32_r  | f32_r   |  f32_r         |\n|  f64_r  |  f64_r  | f64_r   |  f64_r         |\n|  f32_c  |  f32_c  | f32_c   |  f32_c         |\n|  f32_c  |  f32_c  | f32_r   |  f32_c         |\n|  f64_c  |  f64_c  | f64_c   |  f64_c         |\n|  f64_c  |  f64_c  | f64_r   |  f64_c         |\n------------------------------------------------\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn       [rocblas_int]\nnumber of elements in each x_i and y_i vectors.\n@param[in, out]\nx       device pointer to the first vector x_1.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx    [rocblas_int]\nspecifies the increment between elements of each x_i.\n@param[in]\nstride_x [rocblas_stride]\nspecifies the increment from the beginning of x_i to the beginning of x_(i+1)\n@param[in, out]\ny       device pointer to the first vector y_1.\n@param[in]\ny_type [rocblas_datatype]\nspecifies the datatype of each vector y_i.\n@param[in]\nincy    [rocblas_int]\nspecifies the increment between elements of each y_i.\n@param[in]\nstride_y [rocblas_stride]\nspecifies the increment from the beginning of y_i to the beginning of y_(i+1)\n@param[in]\nc       device pointer or host pointer to scalar cosine component of the rotation matrix.\n@param[in]\ns       device pointer or host pointer to scalar sine component of the rotation matrix.\n@param[in]\ncs_type [rocblas_datatype]\nspecifies the datatype of c and s.\n@param[in]\nbatch_count [rocblas_int]\nthe number of x and y arrays, the number of batches.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
     pub fn rocblas_rot_strided_batched_ex(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -9942,7 +12457,27 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\nscal_ex  scales each element of vector x with scalar alpha.\n\nx := alpha * x\n\nCurrently supported datatypes are as follows:\n\n----------------------------------------\n| alpha_type | x_type | execution_type |\n|------------|--------|----------------|\n|  f16_r     | f16_r  |     f16_r      |\n|  f16_r     | f16_r  |     f32_r      |\n|  f32_r     | f16_r  |     f32_r      |\n|  f32_r     | f32_r  |     f32_r      |\n|  f64_r     | f64_r  |     f64_r      |\n|  f32_c     | f32_c  |     f32_c      |\n|  f64_c     | f64_c  |     f64_c      |\n|  f32_r     | f32_c  |     f32_c      |\n|  f64_r     | f64_c  |     f64_c      |\n----------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x.\n@param[in]\nalpha     device pointer or host pointer for the scalar alpha.\n@param[in]\nalpha_type [rocblas_datatype]\nspecifies the datatype of alpha.\n@param[inout]\nx         device pointer storing vector x.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_rot_strided_batched_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *mut ::std::os::raw::c_void,
+        y_type: rocblas_datatype,
+        incy: i64,
+        stride_y: rocblas_stride,
+        c: *const ::std::os::raw::c_void,
+        s: *const ::std::os::raw::c_void,
+        cs_type: rocblas_datatype,
+        batch_count: i64,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\nscal_ex  scales each element of vector x with scalar alpha.\n\nx := alpha * x\n\nCurrently supported datatypes are as follows:\n\n----------------------------------------\n| alpha_type | x_type | execution_type |\n|------------|--------|----------------|\n|  f32_r     | bf16_r |     f32_r      |\n|  bf16_r    | bf16_r |     f32_r      |\n|  f16_r     | f16_r  |     f16_r      |\n|  f16_r     | f16_r  |     f32_r      |\n|  f32_r     | f16_r  |     f32_r      |\n|  f32_r     | f32_r  |     f32_r      |\n|  f64_r     | f64_r  |     f64_r      |\n|  f32_c     | f32_c  |     f32_c      |\n|  f64_c     | f64_c  |     f64_c      |\n|  f32_r     | f32_c  |     f32_c      |\n|  f64_r     | f64_c  |     f64_c      |\n----------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x.\n@param[in]\nalpha     device pointer or host pointer for the scalar alpha.\n@param[in]\nalpha_type [rocblas_datatype]\nspecifies the datatype of alpha.\n@param[in, out]\nx         device pointer storing vector x.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
     pub fn rocblas_scal_ex(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -9956,7 +12491,20 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\nscal_batched_ex  scales each element of each vector x_i with scalar alpha.\n\nx_i := alpha * x_i\n\nCurrently supported datatypes are as follows:\n\n----------------------------------------\n| alpha_type | x_type | execution_type |\n|------------|--------|----------------|\n|  f16_r     | f16_r  |     f16_r      |\n|  f16_r     | f16_r  |     f32_r      |\n|  f32_r     | f16_r  |     f32_r      |\n|  f32_r     | f32_r  |     f32_r      |\n|  f64_r     | f64_r  |     f64_r      |\n|  f32_c     | f32_c  |     f32_c      |\n|  f64_c     | f64_c  |     f64_c      |\n|  f32_r     | f32_c  |     f32_c      |\n|  f64_r     | f64_c  |     f64_c      |\n----------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x.\n@param[in]\nalpha     device pointer or host pointer for the scalar alpha.\n@param[in]\nalpha_type [rocblas_datatype]\nspecifies the datatype of alpha.\n@param[inout]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_scal_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const ::std::os::raw::c_void,
+        alpha_type: rocblas_datatype,
+        x: *mut ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\nscal_batched_ex  scales each element of each vector x_i with scalar alpha.\n\nx_i := alpha * x_i\n\nCurrently supported datatypes are as follows:\n\n----------------------------------------\n| alpha_type | x_type | execution_type |\n|------------|--------|----------------|\n|  f32_r     | bf16_r |     f32_r      |\n|  bf16_r    | bf16_r |     f32_r      |\n|  f16_r     | f16_r  |     f16_r      |\n|  f16_r     | f16_r  |     f32_r      |\n|  f32_r     | f16_r  |     f32_r      |\n|  f32_r     | f32_r  |     f32_r      |\n|  f64_r     | f64_r  |     f64_r      |\n|  f32_c     | f32_c  |     f32_c      |\n|  f64_c     | f64_c  |     f64_c      |\n|  f32_r     | f32_c  |     f32_c      |\n|  f64_r     | f64_c  |     f64_c      |\n----------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x.\n@param[in]\nalpha     device pointer or host pointer for the scalar alpha.\n@param[in]\nalpha_type [rocblas_datatype]\nspecifies the datatype of alpha.\n@param[in, out]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
     pub fn rocblas_scal_batched_ex(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -9971,7 +12519,21 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\nscal_strided_batched_ex  scales each element of vector x with scalar alpha over a set\nof strided batched vectors.\n\nx := alpha * x\n\nCurrently supported datatypes are as follows:\n\n----------------------------------------\n| alpha_type | x_type | execution_type |\n|------------|--------|----------------|\n|  f16_r     | f16_r  |     f16_r      |\n|  f16_r     | f16_r  |     f32_r      |\n|  f32_r     | f16_r  |     f32_r      |\n|  f32_r     | f32_r  |     f32_r      |\n|  f64_r     | f64_r  |     f64_r      |\n|  f32_c     | f32_c  |     f32_c      |\n|  f64_c     | f64_c  |     f64_c      |\n|  f32_r     | f32_c  |     f32_c      |\n|  f64_r     | f64_c  |     f64_c      |\n----------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x.\n@param[in]\nalpha     device pointer or host pointer for the scalar alpha.\n@param[in]\nalpha_type [rocblas_datatype]\nspecifies the datatype of alpha.\n@param[inout]\nx         device pointer to the first vector x_1.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstridex   [rocblas_stride]\nstride from the start of one vector (x_i) to the next one (x_i+1).\nThere are no restrictions placed on stridex. However, ensure that stridex is of appropriate size. For a typical\ncase this means stridex >= n * incx.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_scal_batched_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const ::std::os::raw::c_void,
+        alpha_type: rocblas_datatype,
+        x: *mut ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        batch_count: i64,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\nscal_strided_batched_ex  scales each element of vector x with scalar alpha over a set\nof strided batched vectors.\n\nx := alpha * x\n\nCurrently supported datatypes are as follows:\n\n----------------------------------------\n| alpha_type | x_type | execution_type |\n|------------|--------|----------------|\n|  f32_r     | bf16_r |     f32_r      |\n|  bf16_r    | bf16_r |     f32_r      |\n|  f16_r     | f16_r  |     f16_r      |\n|  f16_r     | f16_r  |     f32_r      |\n|  f32_r     | f16_r  |     f32_r      |\n|  f32_r     | f32_r  |     f32_r      |\n|  f64_r     | f64_r  |     f64_r      |\n|  f32_c     | f32_c  |     f32_c      |\n|  f64_c     | f64_c  |     f64_c      |\n|  f32_r     | f32_c  |     f32_c      |\n|  f64_r     | f64_c  |     f64_c      |\n----------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x.\n@param[in]\nalpha     device pointer or host pointer for the scalar alpha.\n@param[in]\nalpha_type [rocblas_datatype]\nspecifies the datatype of alpha.\n@param[in, out]\nx         device pointer to the first vector x_1.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstridex   [rocblas_stride]\nstride from the start of one vector (x_i) to the next one (x_i+1).\nThere are no restrictions placed on stridex. However, ensure that stridex is of appropriate size. For a typical\ncase this means stridex >= n * incx.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n\n"]
     pub fn rocblas_scal_strided_batched_ex(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -9985,6 +12547,21 @@ extern "C" {
         execution_type: rocblas_datatype,
     ) -> rocblas_status;
 }
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scal_strided_batched_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const ::std::os::raw::c_void,
+        alpha_type: rocblas_datatype,
+        x: *mut ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
 extern "C" {
     #[doc = " BLAS Auxiliary API\n\n\\details\nrocblas_status_to_string\n\nReturns string representing rocblas_status value\n\n@param[in]\nstatus  [rocblas_status]\nrocBLAS status to convert to string"]
     pub fn rocblas_status_to_string(status: rocblas_status) -> *const ::std::os::raw::c_char;
diff --git a/rocsolver-sys/README b/rocsolver-sys/README
index ef4a25a..b80d3ba 100644
--- a/rocsolver-sys/README
+++ b/rocsolver-sys/README
@@ -1 +1 @@
-bindgen /opt/rocm/include/rocsolver/rocsolver.h -o src/rocsolver.rs --no-layout-tests --default-enum-style=newtype --no-derive-debug --allowlist-function "rocsolver_.*" --allowlist-var "ROCSOLVER_*" --must-use-type rocblas_status -- -I/opt/rocm/include
\ No newline at end of file
+bindgen $Env:HIP_PATH/include/rocsolver/rocsolver.h -o src/rocsolver.rs --no-layout-tests --default-enum-style=newtype --no-derive-debug --allowlist-function "rocsolver_.*" --allowlist-var "ROCSOLVER_*" --must-use-type rocblas_status -- -I"$Env:HIP_PATH/include"
\ No newline at end of file
diff --git a/rocsolver-sys/src/rocsolver.rs b/rocsolver-sys/src/rocsolver.rs
index 3bc9285..c6e877b 100644
--- a/rocsolver-sys/src/rocsolver.rs
+++ b/rocsolver-sys/src/rocsolver.rs
@@ -1,4 +1,4 @@
-/* automatically generated by rust-bindgen 0.66.1 */
+/* automatically generated by rust-bindgen 0.69.4 */
 
 #[doc = " \\brief Used to specify the logging layer mode using a bitwise combination\nof rocblas_layer_mode values."]
 pub type rocblas_layer_mode_flags = u32;
@@ -13,7 +13,7 @@ impl rocblas_direct_ {
 #[repr(transparent)]
 #[doc = " \\brief Used to specify the order in which multiple Householder matrices are\napplied together"]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocblas_direct_(pub ::std::os::raw::c_uint);
+pub struct rocblas_direct_(pub ::std::os::raw::c_int);
 #[doc = " \\brief Used to specify the order in which multiple Householder matrices are\napplied together"]
 pub use self::rocblas_direct_ as rocblas_direct;
 impl rocblas_storev_ {
@@ -27,7 +27,7 @@ impl rocblas_storev_ {
 #[repr(transparent)]
 #[doc = " \\brief Used to specify how householder vectors are stored in a matrix of\nvectors"]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocblas_storev_(pub ::std::os::raw::c_uint);
+pub struct rocblas_storev_(pub ::std::os::raw::c_int);
 #[doc = " \\brief Used to specify how householder vectors are stored in a matrix of\nvectors"]
 pub use self::rocblas_storev_ as rocblas_storev;
 impl rocblas_svect_ {
@@ -49,7 +49,7 @@ impl rocblas_svect_ {
 #[repr(transparent)]
 #[doc = " \\brief Used to specify how the singular vectors are to be computed and\nstored"]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocblas_svect_(pub ::std::os::raw::c_uint);
+pub struct rocblas_svect_(pub ::std::os::raw::c_int);
 #[doc = " \\brief Used to specify how the singular vectors are to be computed and\nstored"]
 pub use self::rocblas_svect_ as rocblas_svect;
 impl rocblas_workmode_ {
@@ -63,7 +63,7 @@ impl rocblas_workmode_ {
 #[repr(transparent)]
 #[doc = " \\brief Used to enable the use of fast algorithms (with out-of-place\ncomputations) in some of the routines"]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocblas_workmode_(pub ::std::os::raw::c_uint);
+pub struct rocblas_workmode_(pub ::std::os::raw::c_int);
 #[doc = " \\brief Used to enable the use of fast algorithms (with out-of-place\ncomputations) in some of the routines"]
 pub use self::rocblas_workmode_ as rocblas_workmode;
 impl rocblas_evect_ {
@@ -81,7 +81,7 @@ impl rocblas_evect_ {
 #[repr(transparent)]
 #[doc = " \\brief Used to specify how the eigenvectors are to be computed"]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocblas_evect_(pub ::std::os::raw::c_uint);
+pub struct rocblas_evect_(pub ::std::os::raw::c_int);
 #[doc = " \\brief Used to specify how the eigenvectors are to be computed"]
 pub use self::rocblas_evect_ as rocblas_evect;
 impl rocblas_eform_ {
@@ -99,7 +99,7 @@ impl rocblas_eform_ {
 #[repr(transparent)]
 #[doc = " \\brief Used to specify the form of the generalized eigenproblem"]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocblas_eform_(pub ::std::os::raw::c_uint);
+pub struct rocblas_eform_(pub ::std::os::raw::c_int);
 #[doc = " \\brief Used to specify the form of the generalized eigenproblem"]
 pub use self::rocblas_eform_ as rocblas_eform;
 impl rocblas_erange_ {
@@ -117,7 +117,7 @@ impl rocblas_erange_ {
 #[repr(transparent)]
 #[doc = " \\brief Used to specify the type of range in which eigenvalues will be found\nin partial eigenvalue decompositions"]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocblas_erange_(pub ::std::os::raw::c_uint);
+pub struct rocblas_erange_(pub ::std::os::raw::c_int);
 #[doc = " \\brief Used to specify the type of range in which eigenvalues will be found\nin partial eigenvalue decompositions"]
 pub use self::rocblas_erange_ as rocblas_erange;
 impl rocblas_eorder_ {
@@ -131,7 +131,7 @@ impl rocblas_eorder_ {
 #[repr(transparent)]
 #[doc = " \\brief Used to specify whether the eigenvalues are grouped and ordered by blocks"]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocblas_eorder_(pub ::std::os::raw::c_uint);
+pub struct rocblas_eorder_(pub ::std::os::raw::c_int);
 #[doc = " \\brief Used to specify whether the eigenvalues are grouped and ordered by blocks"]
 pub use self::rocblas_eorder_ as rocblas_eorder;
 impl rocblas_esort_ {
@@ -145,7 +145,7 @@ impl rocblas_esort_ {
 #[repr(transparent)]
 #[doc = " \\brief Used in the Jacobi methods to specify whether the eigenvalues are sorted\nin increasing order"]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocblas_esort_(pub ::std::os::raw::c_uint);
+pub struct rocblas_esort_(pub ::std::os::raw::c_int);
 #[doc = " \\brief Used in the Jacobi methods to specify whether the eigenvalues are sorted\nin increasing order"]
 pub use self::rocblas_esort_ as rocblas_esort;
 impl rocblas_srange_ {
@@ -163,7 +163,7 @@ impl rocblas_srange_ {
 #[repr(transparent)]
 #[doc = " \\brief Used to specify the type of range in which singular values will be found\nin partial singular value decompositions"]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocblas_srange_(pub ::std::os::raw::c_uint);
+pub struct rocblas_srange_(pub ::std::os::raw::c_int);
 #[doc = " \\brief Used to specify the type of range in which singular values will be found\nin partial singular value decompositions"]
 pub use self::rocblas_srange_ as rocblas_srange;
 #[doc = " \\brief Forward-declaration of opaque struct containing data used for the re-factorization interfaces."]
@@ -174,6 +174,18 @@ pub struct rocsolver_rfinfo_ {
 }
 #[doc = " \\brief A handle to a structure containing matrix descriptors and metadata required to interact\nwith rocSPARSE when using the rocSOLVER re-factorization functionality. It needs to be initialized\nwith \\ref rocsolver_create_rfinfo and destroyed with \\ref rocsolver_destroy_rfinfo."]
 pub type rocsolver_rfinfo = *mut rocsolver_rfinfo_;
+impl rocsolver_rfinfo_mode_ {
+    pub const rocsolver_rfinfo_mode_lu: rocsolver_rfinfo_mode_ = rocsolver_rfinfo_mode_(271);
+}
+impl rocsolver_rfinfo_mode_ {
+    pub const rocsolver_rfinfo_mode_cholesky: rocsolver_rfinfo_mode_ = rocsolver_rfinfo_mode_(272);
+}
+#[repr(transparent)]
+#[doc = " \\brief Used to specify the mode of the rfinfo struct required by the re-factorization functionality."]
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocsolver_rfinfo_mode_(pub ::std::os::raw::c_int);
+#[doc = " \\brief Used to specify the mode of the rfinfo struct required by the re-factorization functionality."]
+pub use self::rocsolver_rfinfo_mode_ as rocsolver_rfinfo_mode;
 #[repr(C)]
 #[derive(Copy, Clone)]
 pub struct _rocblas_handle {
@@ -189,6 +201,7 @@ pub struct ihipStream_t {
 #[doc = " \\brief Forward declaration of hipStream_t"]
 pub type hipStream_t = *mut ihipStream_t;
 pub type rocblas_int = i32;
+#[doc = " \\brief Stride between matrices or vectors in strided_batched functions"]
 pub type rocblas_stride = i64;
 #[doc = " \\brief Struct to represent a complex number with single precision real and imaginary parts."]
 #[repr(C)]
@@ -218,7 +231,7 @@ impl rocblas_operation_ {
 #[repr(transparent)]
 #[doc = " \\brief Used to specify whether the matrix is to be transposed or not."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocblas_operation_(pub ::std::os::raw::c_uint);
+pub struct rocblas_operation_(pub ::std::os::raw::c_int);
 #[doc = " \\brief Used to specify whether the matrix is to be transposed or not."]
 pub use self::rocblas_operation_ as rocblas_operation;
 impl rocblas_fill_ {
@@ -235,7 +248,7 @@ impl rocblas_fill_ {
 #[repr(transparent)]
 #[doc = " \\brief Used by the Hermitian, symmetric and triangular matrix\n routines to specify whether the upper, or lower triangle is being referenced."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocblas_fill_(pub ::std::os::raw::c_uint);
+pub struct rocblas_fill_(pub ::std::os::raw::c_int);
 #[doc = " \\brief Used by the Hermitian, symmetric and triangular matrix\n routines to specify whether the upper, or lower triangle is being referenced."]
 pub use self::rocblas_fill_ as rocblas_fill;
 impl rocblas_diagonal_ {
@@ -249,7 +262,7 @@ impl rocblas_diagonal_ {
 #[repr(transparent)]
 #[doc = " \\brief It is used by the triangular matrix routines to specify whether the\n matrix is unit triangular."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocblas_diagonal_(pub ::std::os::raw::c_uint);
+pub struct rocblas_diagonal_(pub ::std::os::raw::c_int);
 #[doc = " \\brief It is used by the triangular matrix routines to specify whether the\n matrix is unit triangular."]
 pub use self::rocblas_diagonal_ as rocblas_diagonal;
 impl rocblas_side_ {
@@ -266,7 +279,7 @@ impl rocblas_side_ {
 #[repr(transparent)]
 #[doc = " \\brief Indicates the side matrix A is located relative to matrix B during multiplication."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocblas_side_(pub ::std::os::raw::c_uint);
+pub struct rocblas_side_(pub ::std::os::raw::c_int);
 #[doc = " \\brief Indicates the side matrix A is located relative to matrix B during multiplication."]
 pub use self::rocblas_side_ as rocblas_side;
 impl rocblas_status_ {
@@ -324,10 +337,16 @@ impl rocblas_status_ {
 impl rocblas_status_ {
     pub const rocblas_status_check_numerics_fail: rocblas_status_ = rocblas_status_(13);
 }
+impl rocblas_status_ {
+    pub const rocblas_status_excluded_from_build: rocblas_status_ = rocblas_status_(14);
+}
+impl rocblas_status_ {
+    pub const rocblas_status_arch_mismatch: rocblas_status_ = rocblas_status_(15);
+}
 #[repr(transparent)]
 #[doc = "   @brief rocblas status codes definition"]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocblas_status_(pub ::std::os::raw::c_uint);
+pub struct rocblas_status_(pub ::std::os::raw::c_int);
 #[doc = "   @brief rocblas status codes definition"]
 pub use self::rocblas_status_ as rocblas_status;
 pub type rocsolver_int = rocblas_int;
@@ -409,7 +428,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\brief GET_VERSION_STRING_SIZE Queries the minimum buffer size for a\nsuccessful call to \\ref rocsolver_get_version_string.\n\n\\details\n@param[out]\nlen         pointer to size_t.\\n\nThe minimum length of buffer to pass to\n\\ref rocsolver_get_version_string."]
+    #[doc = " \\brief GET_VERSION_STRING_SIZE Queries the minimum buffer size for a\nsuccessful call to \\ref rocsolver_get_version_string.\n\n\\details\n@param[out]\nlen         pointer to size_t.\nThe minimum length of buffer to pass to\n\\ref rocsolver_get_version_string."]
     pub fn rocsolver_get_version_string_size(len: *mut usize) -> rocblas_status;
 }
 extern "C" {
@@ -424,12 +443,12 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\brief LOG_SET_LAYER_MODE sets the logging mode for the rocSOLVER multi-level\nlogging environment.\n\n\\details\n@param[in]\nlayer_mode  rocblas_layer_mode_flags.\\n\nSpecifies the logging mode."]
+    #[doc = " \\brief LOG_SET_LAYER_MODE sets the logging mode for the rocSOLVER multi-level\nlogging environment.\n\n\\details\n@param[in]\nlayer_mode  rocblas_layer_mode_flags.\nSpecifies the logging mode."]
     pub fn rocsolver_log_set_layer_mode(layer_mode: rocblas_layer_mode_flags) -> rocblas_status;
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\brief LOG_SET_MAX_LEVELS sets the maximum trace log depth for the rocSOLVER\nmulti-level logging environment.\n\n\\details\n@param[in]\nmax_levels  rocblas_int. max_levels >= 1.\\n\nSpecifies the maximum depth at which nested function calls\nwill appear in the trace and profile logs."]
+    #[doc = " \\brief LOG_SET_MAX_LEVELS sets the maximum trace log depth for the rocSOLVER\nmulti-level logging environment.\n\n\\details\n@param[in]\nmax_levels  rocblas_int. max_levels >= 1.\nSpecifies the maximum depth at which nested function calls\nwill appear in the trace and profile logs."]
     pub fn rocsolver_log_set_max_levels(max_levels: rocblas_int) -> rocblas_status;
 }
 extern "C" {
@@ -449,7 +468,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief LACGV conjugates the complex vector x.\n\n\\details\nIt conjugates the n entries of a complex vector x with increment incx.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe dimension of vector x.\n@param[inout]\nx           pointer to type. Array on the GPU of size at least n (size depends on the value of incx).\\n\nOn entry, the vector x.\nOn exit, each entry is overwritten with its conjugate value.\n@param[in]\nincx        rocblas_int. incx != 0.\\n\nThe distance between two consecutive elements of x.\nIf incx is negative, the elements of x are indexed in\nreverse order."]
+    #[doc = " @{\n\\brief LACGV conjugates the complex vector x.\n\n\\details\nIt conjugates the n entries of a complex vector x with increment incx.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\nThe dimension of vector x.\n@param[inout]\nx           pointer to type. Array on the GPU of size at least n (size depends on the value of incx).\nOn entry, the vector x.\nOn exit, each entry is overwritten with its conjugate value.\n@param[in]\nincx        rocblas_int. incx != 0.\nThe distance between two consecutive elements of x.\nIf incx is negative, the elements of x are indexed in\nreverse order."]
     pub fn rocsolver_clacgv(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -468,7 +487,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief LASWP performs a series of row interchanges on the matrix A.\n\n\\details\nRow interchanges are done one by one. If \\f$\\text{ipiv}[k_1 + (j - k_1) \\cdot \\text{abs}(\\text{incx})] = r\\f$, then the j-th row of A\nwill be interchanged with the r-th row of A, for \\f$j = k_1,k_1+1,\\dots,k_2\\f$. Indices \\f$k_1\\f$ and \\f$k_2\\f$ are 1-based indices.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n. \\n\nOn entry, the matrix to which the row\ninterchanges will be applied. On exit, the resulting permuted matrix.\n@param[in]\nlda         rocblas_int. lda > 0.\\n\nThe leading dimension of the array A.\n@param[in]\nk1          rocblas_int. k1 > 0.\\n\nThe k_1 index. It is the first element of ipiv for which a row interchange will\nbe done. This is a 1-based index.\n@param[in]\nk2          rocblas_int. k2 > k1 > 0.\\n\nThe k_2 index. k_2 - k_1 + 1 is the number of elements of ipiv for which a row\ninterchange will be done. This is a 1-based index.\n@param[in]\nipiv        pointer to rocblas_int. Array on the GPU of dimension at least k_1 + (k_2 - k_1)*abs(incx).\\n\nThe vector of pivot indices. Only the elements in positions\nk_1 through k_1 + (k_2 - k_1)*abs(incx) of this vector are accessed.\nElements of ipiv are considered 1-based.\n@param[in]\nincx        rocblas_int. incx != 0.\\n\nThe distance between successive values of ipiv.  If incx\nis negative, the pivots are applied in reverse order."]
+    #[doc = " @{\n\\brief LASWP performs a series of row interchanges on the matrix A.\n\n\\details\nRow interchanges are done one by one. If \\f$\\text{ipiv}[k_1 + (j - k_1) \\cdot \\text{abs}(\\text{incx})] = r\\f$, then the j-th row of A\nwill be interchanged with the r-th row of A, for \\f$j = k_1,k_1+1,\\dots,k_2\\f$. Indices \\f$k_1\\f$ and \\f$k_2\\f$ are 1-based indices.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix to which the row\ninterchanges will be applied. On exit, the resulting permuted matrix.\n@param[in]\nlda         rocblas_int. lda > 0.\nThe leading dimension of the array A.\n@param[in]\nk1          rocblas_int. k1 > 0.\nThe k_1 index. It is the first element of ipiv for which a row interchange will\nbe done. This is a 1-based index.\n@param[in]\nk2          rocblas_int. k2 > k1 > 0.\nThe k_2 index. k_2 - k_1 + 1 is the number of elements of ipiv for which a row\ninterchange will be done. This is a 1-based index.\n@param[in]\nipiv        pointer to rocblas_int. Array on the GPU of dimension at least \\f$k_1 + (k_2 - k_1)\\cdot \\text{abs}(\\text{incx})\\f$.\nThe vector of pivot indices. Only the elements in positions\n\\f$k_1\\f$ through \\f$k_1 + (k_2 - k_1)\\cdot \\text{abs}(\\text{incx})\\f$ of this vector are accessed.\nElements of ipiv are considered 1-based.\n@param[in]\nincx        rocblas_int. incx != 0.\nThe distance between successive values of ipiv.  If incx\nis negative, the pivots are applied in reverse order."]
     pub fn rocsolver_slaswp(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -521,7 +540,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief LARFG generates a Householder reflector H of order n.\n\n\\details\nThe reflector H is such that\n\n\\f[\nH'\\left[\\begin{array}{c}\n\\text{alpha}\\\\\nx\n\\end{array}\\right]=\\left[\\begin{array}{c}\n\\text{beta}\\\\\n0\n\\end{array}\\right]\n\\f]\n\nwhere x is an n-1 vector, and alpha and beta are scalars. Matrix H can be\ngenerated as\n\n\\f[\nH = I - \\text{tau}\\left[\\begin{array}{c}\n1\\\\\nv\n\\end{array}\\right]\\left[\\begin{array}{cc}\n1 & v'\n\\end{array}\\right]\n\\f]\n\nwhere v is an n-1 vector, and tau is a scalar known as the Householder scalar. The vector\n\n\\f[\n\\bar{v}=\\left[\\begin{array}{c}\n1\\\\\nv\n\\end{array}\\right]\n\\f]\n\nis the Householder vector associated with the reflection.\n\n\\note\nThe matrix H is orthogonal/unitary (i.e. \\f$H'H=HH'=I\\f$). It is symmetric when real (i.e. \\f$H^T=H\\f$), but not Hermitian when complex\n(i.e. \\f$H^H\\neq H\\f$ in general).\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe order (size) of reflector H.\n@param[inout]\nalpha       pointer to type. A scalar on the GPU.\\n\nOn entry, the scalar alpha.\nOn exit, it is overwritten with beta.\n@param[inout]\nx           pointer to type. Array on the GPU of size at least n-1 (size depends on the value of incx).\\n\nOn entry, the vector x,\nOn exit, it is overwritten with vector v.\n@param[in]\nincx        rocblas_int. incx > 0.\\n\nThe distance between two consecutive elements of x.\n@param[out]\ntau         pointer to type. A scalar on the GPU.\\n\nThe Householder scalar tau."]
+    #[doc = " @{\n\\brief LARFG generates a Householder reflector H of order n.\n\n\\details\nThe reflector H is such that\n\n\\f[\nH'\\left[\\begin{array}{c}\n\\text{alpha}\\\\\nx\n\\end{array}\\right]=\\left[\\begin{array}{c}\n\\text{beta}\\\\\n0\n\\end{array}\\right]\n\\f]\n\nwhere x is an n-1 vector, and alpha and beta are scalars. Matrix H can be\ngenerated as\n\n\\f[\nH = I - \\text{tau}\\left[\\begin{array}{c}\n1\\\\\nv\n\\end{array}\\right]\\left[\\begin{array}{cc}\n1 & v'\n\\end{array}\\right]\n\\f]\n\nwhere v is an n-1 vector, and tau is a scalar known as the Householder scalar. The vector\n\n\\f[\n\\bar{v}=\\left[\\begin{array}{c}\n1\\\\\nv\n\\end{array}\\right]\n\\f]\n\nis the Householder vector associated with the reflection.\n\n\\note\nThe matrix H is orthogonal/unitary (i.e. \\f$H'H=HH'=I\\f$). It is symmetric when real (i.e. \\f$H^T=H\\f$), but not Hermitian when complex\n(i.e. \\f$H^H\\neq H\\f$ in general).\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\nThe order (size) of reflector H.\n@param[inout]\nalpha       pointer to type. A scalar on the GPU.\nOn entry, the scalar alpha.\nOn exit, it is overwritten with beta.\n@param[inout]\nx           pointer to type. Array on the GPU of size at least n-1 (size depends on the value of incx).\nOn entry, the vector x,\nOn exit, it is overwritten with vector v.\n@param[in]\nincx        rocblas_int. incx > 0.\nThe distance between two consecutive elements of x.\n@param[out]\ntau         pointer to type. A scalar on the GPU.\nThe Householder scalar tau."]
     pub fn rocsolver_slarfg(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -566,7 +585,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief LARFT generates the triangular factor T of a block reflector H of\norder n.\n\n\\details\nThe block reflector H is defined as the product of k Householder matrices\n\n\\f[\n\\begin{array}{cl}\nH = H_1H_2\\cdots H_k & \\: \\text{if direct indicates forward direction, or} \\\\\nH = H_k\\cdots H_2H_1 & \\: \\text{if direct indicates backward direction}\n\\end{array}\n\\f]\n\nThe triangular factor T is upper triangular in the forward direction and lower triangular in the backward direction.\nIf storev is column-wise, then\n\n\\f[\nH = I - VTV'\n\\f]\n\nwhere the i-th column of matrix V contains the Householder vector associated with \\f$H_i\\f$. If storev is row-wise, then\n\n\\f[\nH = I - V'TV\n\\f]\n\nwhere the i-th row of matrix V contains the Householder vector associated with \\f$H_i\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\ndirect      #rocblas_direct.\\n\nSpecifies the direction in which the Householder matrices are applied.\n@param[in]\nstorev      #rocblas_storev.\\n\nSpecifies how the Householder vectors are stored in matrix V.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe order (size) of the block reflector.\n@param[in]\nk           rocblas_int. k >= 1.\\n\nThe number of Householder matrices forming H.\n@param[in]\nV           pointer to type. Array on the GPU of size ldv*k if column-wise, or ldv*n if row-wise.\\n\nThe matrix of Householder vectors.\n@param[in]\nldv         rocblas_int. ldv >= n if column-wise, or ldv >= k if row-wise.\\n\nLeading dimension of V.\n@param[in]\ntau         pointer to type. Array of k scalars on the GPU.\\n\nThe vector of all the Householder scalars.\n@param[out]\nT           pointer to type. Array on the GPU of dimension ldt*k.\\n\nThe triangular factor. T is upper triangular if direct indicates forward direction, otherwise it is\nlower triangular. The rest of the array is not used.\n@param[in]\nldt         rocblas_int. ldt >= k.\\n\nThe leading dimension of T."]
+    #[doc = " @{\n\\brief LARFT generates the triangular factor T of a block reflector H of\norder n.\n\n\\details\nThe block reflector H is defined as the product of k Householder matrices\n\n\\f[\n\\begin{array}{cl}\nH = H(1)H(2)\\cdots H(k) & \\: \\text{if direct indicates forward direction, or} \\\\\nH = H(k)\\cdots H(2)H(1) & \\: \\text{if direct indicates backward direction}\n\\end{array}\n\\f]\n\nThe triangular factor T is upper triangular in the forward direction and lower triangular in the backward direction.\nIf storev is column-wise, then\n\n\\f[\nH = I - VTV'\n\\f]\n\nwhere the \\f$j\\f$th column of matrix V contains the Householder vector associated with \\f$H(j)\\f$. If storev is row-wise, then\n\n\\f[\nH = I - V'TV\n\\f]\n\nwhere the \\f$i\\f$th row of matrix V contains the Householder vector associated with \\f$H(i)\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\ndirect      #rocblas_direct.\nSpecifies the direction in which the Householder matrices are applied.\n@param[in]\nstorev      #rocblas_storev.\nSpecifies how the Householder vectors are stored in matrix V.\n@param[in]\nn           rocblas_int. n >= 0.\nThe order (size) of the block reflector.\n@param[in]\nk           rocblas_int. k >= 1.\nThe number of Householder matrices forming H.\n@param[in]\nV           pointer to type. Array on the GPU of size ldv*k if column-wise, or ldv*n if row-wise.\nThe matrix of Householder vectors.\n@param[in]\nldv         rocblas_int. ldv >= n if column-wise, or ldv >= k if row-wise.\nLeading dimension of V.\n@param[in]\ntau         pointer to type. Array of k scalars on the GPU.\nThe vector of all the Householder scalars.\n@param[out]\nT           pointer to type. Array on the GPU of dimension ldt*k.\nThe triangular factor. T is upper triangular if direct indicates forward direction, otherwise it is\nlower triangular. The rest of the array is not used.\n@param[in]\nldt         rocblas_int. ldt >= k.\nThe leading dimension of T."]
     pub fn rocsolver_slarft(
         handle: rocblas_handle,
         direct: rocblas_direct,
@@ -627,7 +646,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief LARF applies a Householder reflector H to a general matrix A.\n\n\\details\nThe Householder reflector H, of order m or n, is to be applied to an m-by-n matrix A\nfrom the left or the right, depending on the value of side. H is given by\n\n\\f[\nH = I - \\text{alpha}\\cdot xx'\n\\f]\n\nwhere alpha is the Householder scalar and x is a Householder vector. H is never actually computed.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\\n\nDetermines whether H is applied from the left or the right.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nNumber of rows of A.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of columns of A.\n@param[in]\nx           pointer to type. Array on the GPU of size at least 1 + (m-1)*abs(incx) if left side, or\nat least 1 + (n-1)*abs(incx) if right side.\\n\nThe Householder vector x.\n@param[in]\nincx        rocblas_int. incx != 0.\\n\nDistance between two consecutive elements of x.\nIf incx < 0, the elements of x are indexed in reverse order.\n@param[in]\nalpha       pointer to type. A scalar on the GPU.\\n\nThe Householder scalar. If alpha = 0, then H = I (A will remain the same; x is never used)\n@param[inout]\nA           pointer to type. Array on the GPU of size lda*n.\\n\nOn entry, the matrix A. On exit, it is overwritten with\nH*A (or A*H).\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nLeading dimension of A."]
+    #[doc = " @{\n\\brief LARF applies a Householder reflector H to a general matrix A.\n\n\\details\nThe Householder reflector H, of order m or n, is to be applied to an m-by-n matrix A\nfrom the left or the right, depending on the value of side. H is given by\n\n\\f[\nH = I - \\text{alpha}\\cdot xx'\n\\f]\n\nwhere alpha is the Householder scalar and x is a Householder vector. H is never actually computed.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\nDetermines whether H is applied from the left or the right.\n@param[in]\nm           rocblas_int. m >= 0.\nNumber of rows of A.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of columns of A.\n@param[in]\nx           pointer to type. Array on the GPU of size at least 1 + (m-1)*abs(incx) if left side, or\nat least 1 + (n-1)*abs(incx) if right side.\nThe Householder vector x.\n@param[in]\nincx        rocblas_int. incx != 0.\nDistance between two consecutive elements of x.\nIf incx < 0, the elements of x are indexed in reverse order.\n@param[in]\nalpha       pointer to type. A scalar on the GPU.\nThe Householder scalar. If alpha = 0, then H = I (A will remain the same; x is never used)\n@param[inout]\nA           pointer to type. Array on the GPU of size lda*n.\nOn entry, the matrix A. On exit, it is overwritten with\nH*A (or A*H).\n@param[in]\nlda         rocblas_int. lda >= m.\nLeading dimension of A."]
     pub fn rocsolver_slarf(
         handle: rocblas_handle,
         side: rocblas_side,
@@ -684,7 +703,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief LARFB applies a block reflector H to a general m-by-n matrix A.\n\n\\details\nThe block reflector H is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nHA & \\: \\text{(No transpose from the left),}\\\\\nH'A & \\:  \\text{(Transpose or conjugate transpose from the left),}\\\\\nAH & \\: \\text{(No transpose from the right), or}\\\\\nAH' & \\: \\text{(Transpose or conjugate transpose from the right).}\n\\end{array}\n\\f]\n\nThe block reflector H is defined as the product of k Householder matrices as\n\n\\f[\n\\begin{array}{cl}\nH = H_1H_2\\cdots H_k & \\: \\text{if direct indicates forward direction, or} \\\\\nH = H_k\\cdots H_2H_1 & \\: \\text{if direct indicates backward direction}\n\\end{array}\n\\f]\n\nH is never stored. It is calculated as\n\n\\f[\nH = I - VTV'\n\\f]\n\nwhere the i-th column of matrix V contains the Householder vector associated with \\f$H_i\\f$, if storev is column-wise; or\n\n\\f[\nH = I - V'TV\n\\f]\n\nwhere the i-th row of matrix V contains the Householder vector associated with \\f$H_i\\f$, if storev is row-wise.\nT is the associated triangular factor as computed by \\ref rocsolver_slarft \"LARFT\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\\n\nSpecifies from which side to apply H.\n@param[in]\ntrans       rocblas_operation.\\n\nSpecifies whether the block reflector or its transpose/conjugate transpose is to be applied.\n@param[in]\ndirect      #rocblas_direct.\\n\nSpecifies the direction in which the Householder matrices are to be applied to generate H.\n@param[in]\nstorev      #rocblas_storev.\\n\nSpecifies how the Householder vectors are stored in matrix V.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nNumber of rows of matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of columns of matrix A.\n@param[in]\nk           rocblas_int. k >= 1.\\n\nThe number of Householder matrices.\n@param[in]\nV           pointer to type. Array on the GPU of size ldv*k if column-wise, ldv*n if row-wise and applying from the right,\nor ldv*m if row-wise and applying from the left.\\n\nThe matrix of Householder vectors.\n@param[in]\nldv         rocblas_int. ldv >= k if row-wise, ldv >= m if column-wise and applying from the left, or ldv >= n if\ncolumn-wise and applying from the right.\\n\nLeading dimension of V.\n@param[in]\nT           pointer to type. Array on the GPU of dimension ldt*k.\\n\nThe triangular factor of the block reflector.\n@param[in]\nldt         rocblas_int. ldt >= k.\\n\nThe leading dimension of T.\n@param[inout]\nA           pointer to type. Array on the GPU of size lda*n.\\n\nOn entry, the matrix A. On exit, it is overwritten with\nH*A, A*H, H'*A, or A*H'.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nLeading dimension of A."]
+    #[doc = " @{\n\\brief LARFB applies a block reflector H to a general m-by-n matrix A.\n\n\\details\nThe block reflector H is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nHA & \\: \\text{(No transpose from the left),}\\\\\nH'A & \\:  \\text{(Transpose or conjugate transpose from the left),}\\\\\nAH & \\: \\text{(No transpose from the right), or}\\\\\nAH' & \\: \\text{(Transpose or conjugate transpose from the right).}\n\\end{array}\n\\f]\n\nThe block reflector H is defined as the product of k Householder matrices as\n\n\\f[\n\\begin{array}{cl}\nH = H(1)H(2)\\cdots H(k) & \\: \\text{if direct indicates forward direction, or} \\\\\nH = H(k)\\cdots H(2)H(1) & \\: \\text{if direct indicates backward direction}\n\\end{array}\n\\f]\n\nH is never stored. It is calculated as\n\n\\f[\nH = I - VTV'\n\\f]\n\nwhere the \\f$j\\f$th column of matrix V contains the Householder vector associated with \\f$H(j)\\f$, if storev is column-wise; or\n\n\\f[\nH = I - V'TV\n\\f]\n\nwhere the \\f$i\\f$th row of matrix V contains the Householder vector associated with \\f$H(i)\\f$, if storev is row-wise.\nT is the associated triangular factor as computed by \\ref rocsolver_slarft \"LARFT\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\nSpecifies from which side to apply H.\n@param[in]\ntrans       rocblas_operation.\nSpecifies whether the block reflector or its transpose/conjugate transpose is to be applied.\n@param[in]\ndirect      #rocblas_direct.\nSpecifies the direction in which the Householder matrices are to be applied to generate H.\n@param[in]\nstorev      #rocblas_storev.\nSpecifies how the Householder vectors are stored in matrix V.\n@param[in]\nm           rocblas_int. m >= 0.\nNumber of rows of matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of columns of matrix A.\n@param[in]\nk           rocblas_int. k >= 1.\nThe number of Householder matrices.\n@param[in]\nV           pointer to type. Array on the GPU of size ldv*k if column-wise, ldv*n if row-wise and applying from the right,\nor ldv*m if row-wise and applying from the left.\nThe matrix of Householder vectors.\n@param[in]\nldv         rocblas_int. ldv >= k if row-wise, ldv >= m if column-wise and applying from the left, or ldv >= n if\ncolumn-wise and applying from the right.\nLeading dimension of V.\n@param[in]\nT           pointer to type. Array on the GPU of dimension ldt*k.\nThe triangular factor of the block reflector.\n@param[in]\nldt         rocblas_int. ldt >= k.\nThe leading dimension of T.\n@param[inout]\nA           pointer to type. Array on the GPU of size lda*n.\nOn entry, the matrix A. On exit, it is overwritten with\nH*A, A*H, H'*A, or A*H'.\n@param[in]\nlda         rocblas_int. lda >= m.\nLeading dimension of A."]
     pub fn rocsolver_slarfb(
         handle: rocblas_handle,
         side: rocblas_side,
@@ -761,7 +780,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief LABRD computes the bidiagonal form of the first k rows and columns of\na general m-by-n matrix A, as well as the matrices X and Y needed to reduce\nthe remaining part of A.\n\n\\details\nThe reduced form is given by:\n\n\\f[\nB = Q'AP\n\\f]\n\nwhere the leading k-by-k block of B is upper bidiagonal if m >= n, or lower bidiagonal if m < n. Q and\nP are orthogonal/unitary matrices represented as the product of Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ = H_1H_2\\cdots H_k, & \\text{and} \\\\\nP = G_1G_2\\cdots G_k.\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_i\\f$ and \\f$G_i\\f$ is given by\n\n\\f[\n\\begin{array}{cl}\nH_i = I - \\text{tauq}[i]\\cdot v_iv_i', & \\text{and} \\\\\nG_i = I - \\text{taup}[i]\\cdot u_iu_i'.\n\\end{array}\n\\f]\n\nIf m >= n, the first i-1 elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i]=1\\f$;\nwhile the first i elements of the Householder vector \\f$u_i\\f$ are zero, and  \\f$u_i[i+1]=1\\f$.\nIf m < n, the first i elements of the Householder vector  \\f$v_i\\f$ are zero, and  \\f$v_i[i+1]=1\\f$;\nwhile the first i-1 elements of the Householder vector \\f$u_i\\f$ are zero, and \\f$u_i[i]=1\\f$.\n\nThe unreduced part of the matrix A can be updated using the block update\n\n\\f[\nA = A - VY' - XU'\n\\f]\n\nwhere V and U are the m-by-k and n-by-k matrices formed with the vectors \\f$v_i\\f$ and \\f$u_i\\f$, respectively.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of the matrix A.\n@param[in]\nk           rocblas_int. min(m,n) >= k >= 0.\\n\nThe number of leading rows and columns of matrix A that will be reduced.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrix to be reduced.\nOn exit, the first k elements on the diagonal and superdiagonal (if m >= n), or\nsubdiagonal (if m < n), contain the bidiagonal form B.\nIf m >= n, the elements below the diagonal of the first k columns are the possibly non-zero elements\nof the Householder vectors associated with Q, while the elements above the\nsuperdiagonal of the first k rows are the n - i - 1 possibly non-zero elements of the Householder vectors related to P.\nIf m < n, the elements below the subdiagonal of the first k columns are the m - i - 1 possibly non-zero\nelements of the Householder vectors related to Q, while the elements above the\ndiagonal of the first k rows are the n - i possibly non-zero elements of the vectors associated with P.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[out]\nD           pointer to real type. Array on the GPU of dimension k.\\n\nThe diagonal elements of B.\n@param[out]\nE           pointer to real type. Array on the GPU of dimension k.\\n\nThe off-diagonal elements of B.\n@param[out]\ntauq        pointer to type. Array on the GPU of dimension k.\\n\nThe Householder scalars associated with matrix Q.\n@param[out]\ntaup        pointer to type. Array on the GPU of dimension k.\\n\nThe Householder scalars associated with matrix P.\n@param[out]\nX           pointer to type. Array on the GPU of dimension ldx*k.\\n\nThe m-by-k matrix needed to update the unreduced part of A.\n@param[in]\nldx         rocblas_int. ldx >= m.\\n\nThe leading dimension of X.\n@param[out]\nY           pointer to type. Array on the GPU of dimension ldy*k.\\n\nThe n-by-k matrix needed to update the unreduced part of A.\n@param[in]\nldy         rocblas_int. ldy >= n.\\n\nThe leading dimension of Y."]
+    #[doc = " @{\n\\brief LABRD computes the bidiagonal form of the first k rows and columns of\na general m-by-n matrix A, as well as the matrices X and Y needed to reduce\nthe remaining part of A.\n\n\\details\nThe reduced form is given by:\n\n\\f[\nB = Q'AP\n\\f]\n\nwhere the leading k-by-k block of B is upper bidiagonal if m >= n, or lower bidiagonal if m < n. Q and\nP are orthogonal/unitary matrices represented as the product of Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ = H(1)H(2)\\cdots H(k), & \\text{and} \\\\\nP = G(1)G(2)\\cdots G(k).\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H(i)\\f$ and \\f$G(i)\\f$ is given by\n\n\\f[\n\\begin{array}{cl}\nH(i) = I - \\text{tauq}[i]\\cdot v_i^{}v_i', & \\text{and} \\\\\nG(i) = I - \\text{taup}[i]\\cdot u_i^{}u_i'.\n\\end{array}\n\\f]\n\nIf m >= n, the first \\f$i-1\\f$ elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i]=1\\f$;\nwhile the first \\f$i\\f$ elements of the Householder vector \\f$u_i\\f$ are zero, and  \\f$u_i[i+1]=1\\f$.\nIf m < n, the first \\f$i\\f$ elements of the Householder vector  \\f$v_i\\f$ are zero, and  \\f$v_i[i+1]=1\\f$;\nwhile the first \\f$i-1\\f$ elements of the Householder vector \\f$u_i\\f$ are zero, and \\f$u_i[i]=1\\f$.\n\nThe unreduced part of the matrix A can be updated using the block update\n\n\\f[\nA = A - VY' - XU'\n\\f]\n\nwhere V and U are the m-by-k and n-by-k matrices formed with the vectors \\f$v_i\\f$ and \\f$u_i\\f$, respectively.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of the matrix A.\n@param[in]\nk           rocblas_int. min(m,n) >= k >= 0.\nThe number of leading rows and columns of matrix A that will be reduced.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrix to be reduced.\nOn exit, the first k elements on the diagonal and superdiagonal (if m >= n), or\nsubdiagonal (if m < n), contain the bidiagonal form B.\nIf m >= n, the elements below the diagonal of the first k columns are the possibly non-zero elements\nof the Householder vectors associated with Q, while the elements above the\nsuperdiagonal of the first k rows are the n - i - 1 possibly non-zero elements of the Householder vectors related to P.\nIf m < n, the elements below the subdiagonal of the first k columns are the m - i - 1 possibly non-zero\nelements of the Householder vectors related to Q, while the elements above the\ndiagonal of the first k rows are the n - i possibly non-zero elements of the vectors associated with P.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[out]\nD           pointer to real type. Array on the GPU of dimension k.\nThe diagonal elements of B.\n@param[out]\nE           pointer to real type. Array on the GPU of dimension k.\nThe off-diagonal elements of B.\n@param[out]\ntauq        pointer to type. Array on the GPU of dimension k.\nThe Householder scalars associated with matrix Q.\n@param[out]\ntaup        pointer to type. Array on the GPU of dimension k.\nThe Householder scalars associated with matrix P.\n@param[out]\nX           pointer to type. Array on the GPU of dimension ldx*k.\nThe m-by-k matrix needed to update the unreduced part of A.\n@param[in]\nldx         rocblas_int. ldx >= m.\nThe leading dimension of X.\n@param[out]\nY           pointer to type. Array on the GPU of dimension ldy*k.\nThe n-by-k matrix needed to update the unreduced part of A.\n@param[in]\nldy         rocblas_int. ldy >= n.\nThe leading dimension of Y."]
     pub fn rocsolver_slabrd(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -838,7 +857,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief LATRD computes the tridiagonal form of k rows and columns of\na symmetric/hermitian matrix A, as well as the matrix W needed to update\nthe remaining part of A.\n\n\\details\nThe reduced form is given by:\n\n\\f[\nT = Q'AQ\n\\f]\n\nIf uplo is lower, the first k rows and columns of T form the tridiagonal block. If uplo is upper, then the last\nk rows and columns of T form the tridiagonal block. Q is an orthogonal/unitary matrix represented as the\nproduct of Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ = H_1H_2\\cdots H_k & \\text{if uplo indicates lower, or}\\\\\nQ = H_nH_{n-1}\\cdots H_{n-k+1} & \\text{if uplo is upper}.\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_i\\f$ is given by\n\n\\f[\nH_i = I - \\text{tau}[i]\\cdot v_iv_i'\n\\f]\n\nwhere tau[i] is the corresponding Householder scalar. When uplo indicates lower, the first i\nelements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i+1] = 1\\f$. If uplo is upper,\nthe last n-i elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$.\n\nThe unreduced part of the matrix A can be updated using a rank update of the form:\n\n\\f[\nA = A - VW' - WV'\n\\f]\n\nwhere V is the n-by-k matrix formed by the vectors \\f$v_i\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of the matrix A.\n@param[in]\nk           rocblas_int. 0 <= k <= n.\\n\nThe number of rows and columns of the matrix A to be reduced.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the n-by-n matrix to be reduced.\nOn exit, if uplo is lower, the first k columns have been reduced to tridiagonal form\n(given in the diagonal elements of A and the array E), the elements below the diagonal\ncontain the possibly non-zero entries of the Householder vectors associated with Q, stored as columns.\nIf uplo is upper, the last k columns have been reduced to tridiagonal form\n(given in the diagonal elements of A and the array E), the elements above the diagonal\ncontain the possibly non-zero entries of the Householder vectors associated with Q, stored as columns.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nThe leading dimension of A.\n@param[out]\nE           pointer to real type. Array on the GPU of dimension n-1.\\n\nIf upper (lower), the last (first) k elements of E are the off-diagonal elements of the\ncomputed tridiagonal block.\n@param[out]\ntau         pointer to type. Array on the GPU of dimension n-1.\\n\nIf upper (lower), the last (first) k elements of tau are the Householder scalars related to Q.\n@param[out]\nW           pointer to type. Array on the GPU of dimension ldw*k.\\n\nThe n-by-k matrix needed to update the unreduced part of A.\n@param[in]\nldw         rocblas_int. ldw >= n.\\n\nThe leading dimension of W."]
+    #[doc = " @{\n\\brief LATRD computes the tridiagonal form of k rows and columns of\na symmetric/hermitian matrix A, as well as the matrix W needed to update\nthe remaining part of A.\n\n\\details\nThe reduced form is given by:\n\n\\f[\nT = Q'AQ\n\\f]\n\nIf uplo is lower, the first k rows and columns of T form the tridiagonal block. If uplo is upper, then the last\nk rows and columns of T form the tridiagonal block. Q is an orthogonal/unitary matrix represented as the\nproduct of Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ = H(1)H(2)\\cdots H(k) & \\text{if uplo indicates lower, or}\\\\\nQ = H(n)H(n-1)\\cdots H(n-k+1) & \\text{if uplo is upper}.\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H(i)\\f$ is given by\n\n\\f[\nH(i) = I - \\text{tau}[i]\\cdot v_i^{}v_i'\n\\f]\n\nwhere tau[\\f$i\\f$] is the corresponding Householder scalar. When uplo indicates lower, the first \\f$i\\f$\nelements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i+1] = 1\\f$. If uplo is upper,\nthe last n-\\f$i\\f$ elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$.\n\nThe unreduced part of the matrix A can be updated using a rank update of the form:\n\n\\f[\nA = A - VW' - WV'\n\\f]\n\nwhere V is the n-by-k matrix formed by the vectors \\f$v_i\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of the matrix A.\n@param[in]\nk           rocblas_int. 0 <= k <= n.\nThe number of rows and columns of the matrix A to be reduced.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the n-by-n matrix to be reduced.\nOn exit, if uplo is lower, the first k columns have been reduced to tridiagonal form\n(given in the diagonal elements of A and the array E), the elements below the diagonal\ncontain the possibly non-zero entries of the Householder vectors associated with Q, stored as columns.\nIf uplo is upper, the last k columns have been reduced to tridiagonal form\n(given in the diagonal elements of A and the array E), the elements above the diagonal\ncontain the possibly non-zero entries of the Householder vectors associated with Q, stored as columns.\n@param[in]\nlda         rocblas_int. lda >= n.\nThe leading dimension of A.\n@param[out]\nE           pointer to real type. Array on the GPU of dimension n-1.\nIf upper (lower), the last (first) k elements of E are the off-diagonal elements of the\ncomputed tridiagonal block.\n@param[out]\ntau         pointer to type. Array on the GPU of dimension n-1.\nIf upper (lower), the last (first) k elements of tau are the Householder scalars related to Q.\n@param[out]\nW           pointer to type. Array on the GPU of dimension ldw*k.\nThe n-by-k matrix needed to update the unreduced part of A.\n@param[in]\nldw         rocblas_int. ldw >= n.\nThe leading dimension of W."]
     pub fn rocsolver_slatrd(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -899,7 +918,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief LASYF computes a partial factorization of a symmetric matrix \\f$A\\f$\nusing Bunch-Kaufman diagonal pivoting.\n\n\\details\nThe partial factorization has the form\n\n\\f[\nA = \\left[ \\begin{array}{cc}\nI & U_{12} \\\\\n0 & U_{22}\n\\end{array} \\right] \\left[ \\begin{array}{cc}\nA_{11} & 0 \\\\\n0 & D\n\\end{array} \\right] \\left[ \\begin{array}{cc}\nI & 0 \\\\\nU_{12}^T & U_{22}^T\n\\end{array} \\right]\n\\f]\n\nor\n\n\\f[\nA = \\left[ \\begin{array}{cc}\nL_{11} & 0 \\\\\nL_{21} & I\n\\end{array} \\right] \\left[ \\begin{array}{cc}\nD & 0 \\\\\n0 & A_{22}\n\\end{array} \\right] \\left[ \\begin{array}{cc}\nL_{11}^T & L_{21}^T \\\\\n0 & I\n\\end{array} \\right]\n\\f]\n\ndepending on the value of uplo. The order of the block diagonal matrix \\f$D\\f$\nis either \\f$nb\\f$ or \\f$nb-1\\f$, and is returned in the argument \\f$kb\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of the matrix A.\n@param[in]\nnb          rocblas_int. 2 <= nb <= n.\\n\nThe number of columns of A to be factored.\n@param[out]\nkb          pointer to a rocblas_int on the GPU.\\n\nThe number of columns of A that were actually factored (either nb or\nnb-1).\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the symmetric matrix A to be factored.\nOn exit, the partially factored matrix.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A.\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU of dimension n.\\n\nThe vector of pivot indices. Elements of ipiv are 1-based indices.\nIf uplo is upper, then only the last kb elements of ipiv will be\nset. For n - kb < k <= n, if ipiv[k] > 0 then rows and columns k\nand ipiv[k] were interchanged and D[k,k] is a 1-by-1 diagonal block.\nIf, instead, ipiv[k] = ipiv[k-1] < 0, then rows and columns k-1\nand -ipiv[k] were interchanged and D[k-1,k-1] to D[k,k] is a 2-by-2\ndiagonal block.\nIf uplo is lower, then only the first kb elements of ipiv will be\nset. For 1 <= k <= kb, if ipiv[k] > 0 then rows and columns k\nand ipiv[k] were interchanged and D[k,k] is a 1-by-1 diagonal block.\nIf, instead, ipiv[k] = ipiv[k+1] < 0, then rows and columns k+1\nand -ipiv[k] were interchanged and D[k,k] to D[k+1,k+1] is a 2-by-2\ndiagonal block.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0, D is singular. D[i,i] is the first diagonal zero."]
+    #[doc = " @{\n\\brief LASYF computes a partial factorization of a symmetric matrix \\f$A\\f$\nusing Bunch-Kaufman diagonal pivoting.\n\n\\details\nThe partial factorization has the form\n\n\\f[\nA = \\left[ \\begin{array}{cc}\nI & U_{12} \\\\\n0 & U_{22}\n\\end{array} \\right] \\left[ \\begin{array}{cc}\nA_{11} & 0 \\\\\n0 & D\n\\end{array} \\right] \\left[ \\begin{array}{cc}\nI & 0 \\\\\nU_{12}^T & U_{22}^T\n\\end{array} \\right]\n\\f]\n\nor\n\n\\f[\nA = \\left[ \\begin{array}{cc}\nL_{11} & 0 \\\\\nL_{21} & I\n\\end{array} \\right] \\left[ \\begin{array}{cc}\nD & 0 \\\\\n0 & A_{22}\n\\end{array} \\right] \\left[ \\begin{array}{cc}\nL_{11}^T & L_{21}^T \\\\\n0 & I\n\\end{array} \\right]\n\\f]\n\ndepending on the value of uplo. The order of the block diagonal matrix \\f$D\\f$\nis either \\f$nb\\f$ or \\f$nb-1\\f$, and is returned in the argument \\f$kb\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of the matrix A.\n@param[in]\nnb          rocblas_int. 2 <= nb <= n.\nThe number of columns of A to be factored.\n@param[out]\nkb          pointer to a rocblas_int on the GPU.\nThe number of columns of A that were actually factored (either nb or\nnb-1).\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the symmetric matrix A to be factored.\nOn exit, the partially factored matrix.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A.\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU of dimension n.\nThe vector of pivot indices. Elements of ipiv are 1-based indices.\nIf uplo is upper, then only the last kb elements of ipiv will be\nset. For n - kb < k <= n, if ipiv[k] > 0 then rows and columns k\nand ipiv[k] were interchanged and D[k,k] is a 1-by-1 diagonal block.\nIf, instead, ipiv[k] = ipiv[k-1] < 0, then rows and columns k-1\nand -ipiv[k] were interchanged and D[k-1,k-1] to D[k,k] is a 2-by-2\ndiagonal block.\nIf uplo is lower, then only the first kb elements of ipiv will be\nset. For 1 <= k <= kb, if ipiv[k] > 0 then rows and columns k\nand ipiv[k] were interchanged and D[k,k] is a 1-by-1 diagonal block.\nIf, instead, ipiv[k] = ipiv[k+1] < 0, then rows and columns k+1\nand -ipiv[k] were interchanged and D[k,k] to D[k+1,k+1] is a 2-by-2\ndiagonal block.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0, D is singular. D[i,i] is the first diagonal zero."]
     pub fn rocsolver_slasyf(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -956,7 +975,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief LAUUM computes the product of the upper (or lower) triangular part U (or L) of a\nsymmetric/Hemitian matrix A with its transpose.\n\n\\details\nIf uplo indicates upper, then \\f$UU'\\f$ is computed. If uplo indicates lower, then \\f$L'L\\f$ is computed instead.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower triangular part of A will be used.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not referenced.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns and rows of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n. \\n\nOn entry, it contains the upper (or lower) part of the symmetric/Hermitian matrix.\nOn exit, the upper (or lower) part is overwritten with the result of U*U' (or L'*L).\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nThe leading dimension of the array A."]
+    #[doc = " @{\n\\brief LAUUM computes the product of the upper (or lower) triangular part U (or L) of a\nsymmetric/Hemitian matrix A with its transpose.\n\n\\details\nIf uplo indicates upper, then \\f$UU'\\f$ is computed. If uplo indicates lower, then \\f$L'L\\f$ is computed instead.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower triangular part of A will be used.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not referenced.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns and rows of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, it contains the upper (or lower) part of the symmetric/Hermitian matrix.\nOn exit, the upper (or lower) part is overwritten with the result of U*U' (or L'*L).\n@param[in]\nlda         rocblas_int. lda >= n.\nThe leading dimension of the array A."]
     pub fn rocsolver_slauum(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -997,7 +1016,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief ORG2R generates an m-by-n Matrix Q with orthonormal columns.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe matrix Q is defined as the first n columns of the product of k Householder\nreflectors of order m\n\n\\f[\nQ = H_1H_2\\cdots H_k.\n\\f]\n\nThe Householder matrices \\f$H_i\\f$ are never stored, they are computed from its corresponding\nHouseholder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgeqrf \"GEQRF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of the matrix Q.\n@param[in]\nn           rocblas_int. 0 <= n <= m.\\n\nThe number of columns of the matrix Q.\n@param[in]\nk           rocblas_int. 0 <= k <= n.\\n\nThe number of Householder reflectors.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A as returned by \\ref rocsolver_sgeqrf \"GEQRF\", with the Householder vectors in the first k columns.\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\\n\nThe Householder scalars as returned by \\ref rocsolver_sgeqrf \"GEQRF\"."]
+    #[doc = " @{\n\\brief ORG2R generates an m-by-n Matrix Q with orthonormal columns.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe matrix Q is defined as the first n columns of the product of k Householder\nreflectors of order m\n\n\\f[\nQ = H(1)H(2)\\cdots H(k).\n\\f]\n\nThe Householder matrices \\f$H(i)\\f$ are never stored, they are computed from its corresponding\nHouseholder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgeqrf \"GEQRF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of the matrix Q.\n@param[in]\nn           rocblas_int. 0 <= n <= m.\nThe number of columns of the matrix Q.\n@param[in]\nk           rocblas_int. 0 <= k <= n.\nThe number of Householder reflectors.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A as returned by \\ref rocsolver_sgeqrf \"GEQRF\", with the Householder vectors in the first k columns.\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\nThe Householder scalars as returned by \\ref rocsolver_sgeqrf \"GEQRF\"."]
     pub fn rocsolver_sorg2r(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -1022,7 +1041,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief UNG2R generates an m-by-n complex Matrix Q with orthonormal columns.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe matrix Q is defined as the first n columns of the product of k Householder\nreflectors of order m\n\n\\f[\nQ = H_1H_2\\cdots H_k\n\\f]\n\nThe Householder matrices \\f$H_i\\f$ are never stored, they are computed from its corresponding\nHouseholder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgeqrf \"GEQRF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of the matrix Q.\n@param[in]\nn           rocblas_int. 0 <= n <= m.\\n\nThe number of columns of the matrix Q.\n@param[in]\nk           rocblas_int. 0 <= k <= n.\\n\nThe number of Householder reflectors.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A as returned by \\ref rocsolver_sgeqrf \"GEQRF\", with the Householder vectors in the first k columns.\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\\n\nThe Householder scalars as returned by \\ref rocsolver_sgeqrf \"GEQRF\"."]
+    #[doc = " @{\n\\brief UNG2R generates an m-by-n complex Matrix Q with orthonormal columns.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe matrix Q is defined as the first n columns of the product of k Householder\nreflectors of order m\n\n\\f[\nQ = H(1)H(2)\\cdots H(k)\n\\f]\n\nThe Householder matrices \\f$H(i)\\f$ are never stored, they are computed from its corresponding\nHouseholder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgeqrf \"GEQRF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of the matrix Q.\n@param[in]\nn           rocblas_int. 0 <= n <= m.\nThe number of columns of the matrix Q.\n@param[in]\nk           rocblas_int. 0 <= k <= n.\nThe number of Householder reflectors.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A as returned by \\ref rocsolver_sgeqrf \"GEQRF\", with the Householder vectors in the first k columns.\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\nThe Householder scalars as returned by \\ref rocsolver_sgeqrf \"GEQRF\"."]
     pub fn rocsolver_cung2r(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -1047,7 +1066,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief ORGQR generates an m-by-n Matrix Q with orthonormal columns.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe matrix Q is defined as the first n columns of the product of k Householder\nreflectors of order m\n\n\\f[\nQ = H_1H_2\\cdots H_k\n\\f]\n\nThe Householder matrices \\f$H_i\\f$ are never stored, they are computed from its corresponding\nHouseholder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgeqrf \"GEQRF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of the matrix Q.\n@param[in]\nn           rocblas_int. 0 <= n <= m.\\n\nThe number of columns of the matrix Q.\n@param[in]\nk           rocblas_int. 0 <= k <= n.\\n\nThe number of Householder reflectors.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A as returned by \\ref rocsolver_sgeqrf \"GEQRF\", with the Householder vectors in the first k columns.\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\\n\nThe Householder scalars as returned by \\ref rocsolver_sgeqrf \"GEQRF\"."]
+    #[doc = " @{\n\\brief ORGQR generates an m-by-n Matrix Q with orthonormal columns.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe matrix Q is defined as the first n columns of the product of k Householder\nreflectors of order m\n\n\\f[\nQ = H(1)H(2)\\cdots H(k)\n\\f]\n\nThe Householder matrices \\f$H(i)\\f$ are never stored, they are computed from its corresponding\nHouseholder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgeqrf \"GEQRF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of the matrix Q.\n@param[in]\nn           rocblas_int. 0 <= n <= m.\nThe number of columns of the matrix Q.\n@param[in]\nk           rocblas_int. 0 <= k <= n.\nThe number of Householder reflectors.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A as returned by \\ref rocsolver_sgeqrf \"GEQRF\", with the Householder vectors in the first k columns.\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\nThe Householder scalars as returned by \\ref rocsolver_sgeqrf \"GEQRF\"."]
     pub fn rocsolver_sorgqr(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -1072,7 +1091,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief UNGQR generates an m-by-n complex Matrix Q with orthonormal columns.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe matrix Q is defined as the first n columns of the product of k Householder\nreflectors of order m\n\n\\f[\nQ = H_1H_2\\cdots H_k\n\\f]\n\nHouseholder matrices \\f$H_i\\f$ are never stored, they are computed from its corresponding\nHouseholder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgeqrf \"GEQRF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of the matrix Q.\n@param[in]\nn           rocblas_int. 0 <= n <= m.\\n\nThe number of columns of the matrix Q.\n@param[in]\nk           rocblas_int. 0 <= k <= n.\\n\nThe number of Householder reflectors.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A as returned by \\ref rocsolver_sgeqrf \"GEQRF\", with the Householder vectors in the first k columns.\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\\n\nThe Householder scalars as returned by \\ref rocsolver_sgeqrf \"GEQRF\"."]
+    #[doc = " @{\n\\brief UNGQR generates an m-by-n complex Matrix Q with orthonormal columns.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe matrix Q is defined as the first n columns of the product of k Householder\nreflectors of order m\n\n\\f[\nQ = H(1)H(2)\\cdots H(k)\n\\f]\n\nHouseholder matrices \\f$H(i)\\f$ are never stored, they are computed from its corresponding\nHouseholder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgeqrf \"GEQRF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of the matrix Q.\n@param[in]\nn           rocblas_int. 0 <= n <= m.\nThe number of columns of the matrix Q.\n@param[in]\nk           rocblas_int. 0 <= k <= n.\nThe number of Householder reflectors.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A as returned by \\ref rocsolver_sgeqrf \"GEQRF\", with the Householder vectors in the first k columns.\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\nThe Householder scalars as returned by \\ref rocsolver_sgeqrf \"GEQRF\"."]
     pub fn rocsolver_cungqr(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -1097,7 +1116,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief ORGL2 generates an m-by-n Matrix Q with orthonormal rows.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe matrix Q is defined as the first m rows of the product of k Householder\nreflectors of order n\n\n\\f[\nQ = H_kH_{k-1}\\cdots H_1\n\\f]\n\nThe Householder matrices \\f$H_i\\f$ are never stored, they are computed from its corresponding\nHouseholder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgelqf \"GELQF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. 0 <= m <= n.\\n\nThe number of rows of the matrix Q.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of the matrix Q.\n@param[in]\nk           rocblas_int. 0 <= k <= m.\\n\nThe number of Householder reflectors.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A as returned by \\ref rocsolver_sgeqrf \"GELQF\", with the Householder vectors in the first k rows.\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\\n\nThe Householder scalars as returned by \\ref rocsolver_sgelqf \"GELQF\"."]
+    #[doc = " @{\n\\brief ORGL2 generates an m-by-n Matrix Q with orthonormal rows.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe matrix Q is defined as the first m rows of the product of k Householder\nreflectors of order n\n\n\\f[\nQ = H(k)H(k-1)\\cdots H(1)\n\\f]\n\nThe Householder matrices \\f$H(i)\\f$ are never stored, they are computed from its corresponding\nHouseholder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgelqf \"GELQF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. 0 <= m <= n.\nThe number of rows of the matrix Q.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of the matrix Q.\n@param[in]\nk           rocblas_int. 0 <= k <= m.\nThe number of Householder reflectors.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A as returned by \\ref rocsolver_sgeqrf \"GELQF\", with the Householder vectors in the first k rows.\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\nThe Householder scalars as returned by \\ref rocsolver_sgelqf \"GELQF\"."]
     pub fn rocsolver_sorgl2(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -1122,7 +1141,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief UNGL2 generates an m-by-n complex Matrix Q with orthonormal rows.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe matrix Q is defined as the first m rows of the product of k Householder\nreflectors of order n\n\n\\f[\nQ = H_k^HH_{k-1}^H\\cdots H_1^H\n\\f]\n\nThe Householder matrices \\f$H_i\\f$ are never stored, they are computed from its corresponding\nHouseholder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgelqf \"GELQF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. 0 <= m <= n.\\n\nThe number of rows of the matrix Q.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of the matrix Q.\n@param[in]\nk           rocblas_int. 0 <= k <= m.\\n\nThe number of Householder reflectors.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A as returned by \\ref rocsolver_sgeqrf \"GELQF\", with the Householder vectors in the first k rows.\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\\n\nThe Householder scalars as returned by \\ref rocsolver_sgelqf \"GELQF\"."]
+    #[doc = " @{\n\\brief UNGL2 generates an m-by-n complex Matrix Q with orthonormal rows.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe matrix Q is defined as the first m rows of the product of k Householder\nreflectors of order n\n\n\\f[\nQ = H(k)^HH(k-1)^H\\cdots H(1)^H\n\\f]\n\nThe Householder matrices \\f$H(i)\\f$ are never stored, they are computed from its corresponding\nHouseholder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgelqf \"GELQF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. 0 <= m <= n.\nThe number of rows of the matrix Q.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of the matrix Q.\n@param[in]\nk           rocblas_int. 0 <= k <= m.\nThe number of Householder reflectors.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A as returned by \\ref rocsolver_sgeqrf \"GELQF\", with the Householder vectors in the first k rows.\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\nThe Householder scalars as returned by \\ref rocsolver_sgelqf \"GELQF\"."]
     pub fn rocsolver_cungl2(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -1147,7 +1166,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief ORGLQ generates an m-by-n Matrix Q with orthonormal rows.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe matrix Q is defined as the first m rows of the product of k Householder\nreflectors of order n\n\n\\f[\nQ = H_kH_{k-1}\\cdots H_1\n\\f]\n\nThe Householder matrices \\f$H_i\\f$ are never stored, they are computed from its corresponding\nHouseholder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgelqf \"GELQF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. 0 <= m <= n.\\n\nThe number of rows of the matrix Q.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of the matrix Q.\n@param[in]\nk           rocblas_int. 0 <= k <= m.\\n\nThe number of Householder reflectors.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A as returned by \\ref rocsolver_sgeqrf \"GELQF\", with the Householder vectors in the first k rows.\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\\n\nThe Householder scalars as returned by \\ref rocsolver_sgelqf \"GELQF\"."]
+    #[doc = " @{\n\\brief ORGLQ generates an m-by-n Matrix Q with orthonormal rows.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe matrix Q is defined as the first m rows of the product of k Householder\nreflectors of order n\n\n\\f[\nQ = H(k)H(k-1)\\cdots H(1)\n\\f]\n\nThe Householder matrices \\f$H(i)\\f$ are never stored, they are computed from its corresponding\nHouseholder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgelqf \"GELQF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. 0 <= m <= n.\nThe number of rows of the matrix Q.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of the matrix Q.\n@param[in]\nk           rocblas_int. 0 <= k <= m.\nThe number of Householder reflectors.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A as returned by \\ref rocsolver_sgeqrf \"GELQF\", with the Householder vectors in the first k rows.\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\nThe Householder scalars as returned by \\ref rocsolver_sgelqf \"GELQF\"."]
     pub fn rocsolver_sorglq(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -1172,7 +1191,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief UNGLQ generates an m-by-n complex Matrix Q with orthonormal rows.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe matrix Q is defined as the first m rows of the product of k Householder\nreflectors of order n\n\n\\f[\nQ = H_k^HH_{k-1}^H\\cdots H_1^H\n\\f]\n\nThe Householder matrices \\f$H_i\\f$ are never stored, they are computed from its corresponding\nHouseholder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgelqf \"GELQF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. 0 <= m <= n.\\n\nThe number of rows of the matrix Q.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of the matrix Q.\n@param[in]\nk           rocblas_int. 0 <= k <= m.\\n\nThe number of Householder reflectors.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A as returned by \\ref rocsolver_sgeqrf \"GELQF\", with the Householder vectors in the first k rows.\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\\n\nThe Householder scalars as returned by \\ref rocsolver_sgelqf \"GELQF\"."]
+    #[doc = " @{\n\\brief UNGLQ generates an m-by-n complex Matrix Q with orthonormal rows.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe matrix Q is defined as the first m rows of the product of k Householder\nreflectors of order n\n\n\\f[\nQ = H(k)^HH(k-1)^H\\cdots H(1)^H\n\\f]\n\nThe Householder matrices \\f$H(i)\\f$ are never stored, they are computed from its corresponding\nHouseholder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgelqf \"GELQF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. 0 <= m <= n.\nThe number of rows of the matrix Q.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of the matrix Q.\n@param[in]\nk           rocblas_int. 0 <= k <= m.\nThe number of Householder reflectors.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A as returned by \\ref rocsolver_sgeqrf \"GELQF\", with the Householder vectors in the first k rows.\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\nThe Householder scalars as returned by \\ref rocsolver_sgelqf \"GELQF\"."]
     pub fn rocsolver_cunglq(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -1197,7 +1216,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief ORG2L generates an m-by-n Matrix Q with orthonormal columns.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe matrix Q is defined as the last n columns of the product of k\nHouseholder reflectors of order m\n\n\\f[\nQ = H_kH_{k-1}\\cdots H_1\n\\f]\n\nThe Householder matrices \\f$H_i\\f$ are never stored, they are computed from its\ncorresponding Householder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgeqlf \"GEQLF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of the matrix Q.\n@param[in]\nn           rocblas_int. 0 <= n <= m.\\n\nThe number of columns of the matrix Q.\n@param[in]\nk           rocblas_int. 0 <= k <= n.\\n\nThe number of Householder reflectors.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A as returned by \\ref rocsolver_sgeqrf \"GEQLF\", with the Householder vectors in the last k columns.\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\\n\nThe Householder scalars as returned by \\ref rocsolver_sgeqlf \"GEQLF\"."]
+    #[doc = " @{\n\\brief ORG2L generates an m-by-n Matrix Q with orthonormal columns.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe matrix Q is defined as the last n columns of the product of k\nHouseholder reflectors of order m\n\n\\f[\nQ = H(k)H(k-1)\\cdots H(1)\n\\f]\n\nThe Householder matrices \\f$H(i)\\f$ are never stored, they are computed from its\ncorresponding Householder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgeqlf \"GEQLF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of the matrix Q.\n@param[in]\nn           rocblas_int. 0 <= n <= m.\nThe number of columns of the matrix Q.\n@param[in]\nk           rocblas_int. 0 <= k <= n.\nThe number of Householder reflectors.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A as returned by \\ref rocsolver_sgeqrf \"GEQLF\", with the Householder vectors in the last k columns.\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\nThe Householder scalars as returned by \\ref rocsolver_sgeqlf \"GEQLF\"."]
     pub fn rocsolver_sorg2l(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -1222,7 +1241,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief UNG2L generates an m-by-n complex Matrix Q with orthonormal columns.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe matrix Q is defined as the last n columns of the product of k\nHouseholder reflectors of order m\n\n\\f[\nQ = H_kH_{k-1}\\cdots H_1\n\\f]\n\nThe Householder matrices \\f$H_i\\f$ are never stored, they are computed from its\ncorresponding Householder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgeqlf \"GEQLF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of the matrix Q.\n@param[in]\nn           rocblas_int. 0 <= n <= m.\\n\nThe number of columns of the matrix Q.\n@param[in]\nk           rocblas_int. 0 <= k <= n.\\n\nThe number of Householder reflectors.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A as returned by \\ref rocsolver_sgeqrf \"GEQLF\", with the Householder vectors in the last k columns.\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\\n\nThe Householder scalars as returned by \\ref rocsolver_sgeqlf \"GEQLF\"."]
+    #[doc = " @{\n\\brief UNG2L generates an m-by-n complex Matrix Q with orthonormal columns.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe matrix Q is defined as the last n columns of the product of k\nHouseholder reflectors of order m\n\n\\f[\nQ = H(k)H(k-1)\\cdots H(1)\n\\f]\n\nThe Householder matrices \\f$H(i)\\f$ are never stored, they are computed from its\ncorresponding Householder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgeqlf \"GEQLF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of the matrix Q.\n@param[in]\nn           rocblas_int. 0 <= n <= m.\nThe number of columns of the matrix Q.\n@param[in]\nk           rocblas_int. 0 <= k <= n.\nThe number of Householder reflectors.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A as returned by \\ref rocsolver_sgeqrf \"GEQLF\", with the Householder vectors in the last k columns.\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\nThe Householder scalars as returned by \\ref rocsolver_sgeqlf \"GEQLF\"."]
     pub fn rocsolver_cung2l(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -1247,7 +1266,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief ORGQL generates an m-by-n Matrix Q with orthonormal columns.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe matrix Q is defined as the last n column of the product of k Householder\nreflectors of order m\n\n\\f[\nQ = H_kH_{k-1}\\cdots H_1\n\\f]\n\nThe Householder matrices \\f$H_i\\f$ are never stored, they are computed from its\ncorresponding Householder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgeqlf \"GEQLF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of the matrix Q.\n@param[in]\nn           rocblas_int. 0 <= n <= m.\\n\nThe number of columns of the matrix Q.\n@param[in]\nk           rocblas_int. 0 <= k <= n.\\n\nThe number of Householder reflectors.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A as returned by \\ref rocsolver_sgeqrf \"GEQLF\", with the Householder vectors in the last k columns.\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\\n\nThe Householder scalars as returned by \\ref rocsolver_sgeqlf \"GEQLF\"."]
+    #[doc = " @{\n\\brief ORGQL generates an m-by-n Matrix Q with orthonormal columns.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe matrix Q is defined as the last n column of the product of k Householder\nreflectors of order m\n\n\\f[\nQ = H(k)H(k-1)\\cdots H(1)\n\\f]\n\nThe Householder matrices \\f$H(i)\\f$ are never stored, they are computed from its\ncorresponding Householder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgeqlf \"GEQLF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of the matrix Q.\n@param[in]\nn           rocblas_int. 0 <= n <= m.\nThe number of columns of the matrix Q.\n@param[in]\nk           rocblas_int. 0 <= k <= n.\nThe number of Householder reflectors.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A as returned by \\ref rocsolver_sgeqrf \"GEQLF\", with the Householder vectors in the last k columns.\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\nThe Householder scalars as returned by \\ref rocsolver_sgeqlf \"GEQLF\"."]
     pub fn rocsolver_sorgql(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -1272,7 +1291,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief UNGQL generates an m-by-n complex Matrix Q with orthonormal columns.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe matrix Q is defined as the last n columns of the product of k\nHouseholder reflectors of order m\n\n\\f[\nQ = H_kH_{k-1}\\cdots H_1\n\\f]\n\nThe Householder matrices \\f$H_i\\f$ are never stored, they are computed from its\ncorresponding Householder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgeqlf \"GEQLF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of the matrix Q.\n@param[in]\nn           rocblas_int. 0 <= n <= m.\\n\nThe number of columns of the matrix Q.\n@param[in]\nk           rocblas_int. 0 <= k <= n.\\n\nThe number of Householder reflectors.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A as returned by \\ref rocsolver_sgeqrf \"GEQLF\", with the Householder vectors in the last k columns.\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\\n\nThe Householder scalars as returned by \\ref rocsolver_sgeqlf \"GEQLF\"."]
+    #[doc = " @{\n\\brief UNGQL generates an m-by-n complex Matrix Q with orthonormal columns.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe matrix Q is defined as the last n columns of the product of k\nHouseholder reflectors of order m\n\n\\f[\nQ = H(k)H(k-1)\\cdots H(1)\n\\f]\n\nThe Householder matrices \\f$H(i)\\f$ are never stored, they are computed from its\ncorresponding Householder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgeqlf \"GEQLF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of the matrix Q.\n@param[in]\nn           rocblas_int. 0 <= n <= m.\nThe number of columns of the matrix Q.\n@param[in]\nk           rocblas_int. 0 <= k <= n.\nThe number of Householder reflectors.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A as returned by \\ref rocsolver_sgeqrf \"GEQLF\", with the Householder vectors in the last k columns.\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\nThe Householder scalars as returned by \\ref rocsolver_sgeqlf \"GEQLF\"."]
     pub fn rocsolver_cungql(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -1297,7 +1316,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief ORGBR generates an m-by-n Matrix Q with orthonormal rows or columns.\n\n\\details\nIf storev is column-wise, then the matrix Q has orthonormal columns. If m >= k, Q is defined as the first\nn columns of the product of k Householder reflectors of order m\n\n\\f[\nQ = H_1H_2\\cdots H_k\n\\f]\n\nIf m < k, Q is defined as the product of Householder reflectors of order m\n\n\\f[\nQ = H_1H_2\\cdots H_{m-1}\n\\f]\n\nOn the other hand, if storev is row-wise, then the matrix Q has orthonormal rows. If n > k, Q is defined as the\nfirst m rows of the product of k Householder reflectors of order n\n\n\\f[\nQ = H_kH_{k-1}\\cdots H_1\n\\f]\n\nIf n <= k, Q is defined as the product of Householder reflectors of order n\n\n\\f[\nQ = H_{n-1}H_{n-2}\\cdots H_1\n\\f]\n\nThe Householder matrices \\f$H_i\\f$ are never stored, they are computed from its corresponding\nHouseholder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgebrd \"GEBRD\" in its arguments A and tauq or taup.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nstorev      #rocblas_storev.\\n\nSpecifies whether to work column-wise or row-wise.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of the matrix Q.\nIf row-wise, then min(n,k) <= m <= n.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of the matrix Q.\nIf column-wise, then min(m,k) <= n <= m.\n@param[in]\nk           rocblas_int. k >= 0.\\n\nThe number of columns (if storev is column-wise) or rows (if row-wise) of the\noriginal matrix reduced by \\ref rocsolver_sgebrd \"GEBRD\".\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the Householder vectors as returned by \\ref rocsolver_sgebrd \"GEBRD\".\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension min(m,k) if column-wise, or min(n,k) if row-wise.\\n\nThe Householder scalars as returned by \\ref rocsolver_sgebrd \"GEBRD\"."]
+    #[doc = " @{\n\\brief ORGBR generates an m-by-n Matrix Q with orthonormal rows or columns.\n\n\\details\nIf storev is column-wise, then the matrix Q has orthonormal columns. If m >= k, Q is defined as the first\nn columns of the product of k Householder reflectors of order m\n\n\\f[\nQ = H(1)H(2)\\cdots H(k)\n\\f]\n\nIf m < k, Q is defined as the product of Householder reflectors of order m\n\n\\f[\nQ = H(1)H(2)\\cdots H(m-1)\n\\f]\n\nOn the other hand, if storev is row-wise, then the matrix Q has orthonormal rows. If n > k, Q is defined as the\nfirst m rows of the product of k Householder reflectors of order n\n\n\\f[\nQ = H(k)H(k-1)\\cdots H(1)\n\\f]\n\nIf n <= k, Q is defined as the product of Householder reflectors of order n\n\n\\f[\nQ = H(n-1)H(n-2)\\cdots H(1)\n\\f]\n\nThe Householder matrices \\f$H(i)\\f$ are never stored, they are computed from its corresponding\nHouseholder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgebrd \"GEBRD\" in its arguments A and tauq or taup.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nstorev      #rocblas_storev.\nSpecifies whether to work column-wise or row-wise.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of the matrix Q.\nIf row-wise, then min(n,k) <= m <= n.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of the matrix Q.\nIf column-wise, then min(m,k) <= n <= m.\n@param[in]\nk           rocblas_int. k >= 0.\nThe number of columns (if storev is column-wise) or rows (if row-wise) of the\noriginal matrix reduced by \\ref rocsolver_sgebrd \"GEBRD\".\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the Householder vectors as returned by \\ref rocsolver_sgebrd \"GEBRD\".\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension min(m,k) if column-wise, or min(n,k) if row-wise.\nThe Householder scalars as returned by \\ref rocsolver_sgebrd \"GEBRD\"."]
     pub fn rocsolver_sorgbr(
         handle: rocblas_handle,
         storev: rocblas_storev,
@@ -1324,7 +1343,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief UNGBR generates an m-by-n complex Matrix Q with orthonormal rows or\ncolumns.\n\n\\details\nIf storev is column-wise, then the matrix Q has orthonormal columns. If m >= k, Q is defined as the first\nn columns of the product of k Householder reflectors of order m\n\n\\f[\nQ = H_1H_2\\cdots H_k\n\\f]\n\nIf m < k, Q is defined as the product of Householder reflectors of order m\n\n\\f[\nQ = H_1H_2\\cdots H_{m-1}\n\\f]\n\nOn the other hand, if storev is row-wise, then the matrix Q has orthonormal rows. If n > k, Q is defined as the\nfirst m rows of the product of k Householder reflectors of order n\n\n\\f[\nQ = H_kH_{k-1}\\cdots H_1\n\\f]\n\nIf n <= k, Q is defined as the product of Householder reflectors of order n\n\n\\f[\nQ = H_{n-1}H_{n-2}\\cdots H_1\n\\f]\n\nThe Householder matrices \\f$H_i\\f$ are never stored, they are computed from its corresponding\nHouseholder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgebrd \"GEBRD\" in its arguments A and tauq or taup.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nstorev      #rocblas_storev.\\n\nSpecifies whether to work column-wise or row-wise.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of the matrix Q.\nIf row-wise, then min(n,k) <= m <= n.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of the matrix Q.\nIf column-wise, then min(m,k) <= n <= m.\n@param[in]\nk           rocblas_int. k >= 0.\\n\nThe number of columns (if storev is column-wise) or rows (if row-wise) of the\noriginal matrix reduced by \\ref rocsolver_sgebrd \"GEBRD\".\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the Householder vectors as returned by \\ref rocsolver_sgebrd \"GEBRD\".\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension min(m,k) if column-wise, or min(n,k) if row-wise.\\n\nThe Householder scalars as returned by \\ref rocsolver_sgebrd \"GEBRD\"."]
+    #[doc = " @{\n\\brief UNGBR generates an m-by-n complex Matrix Q with orthonormal rows or\ncolumns.\n\n\\details\nIf storev is column-wise, then the matrix Q has orthonormal columns. If m >= k, Q is defined as the first\nn columns of the product of k Householder reflectors of order m\n\n\\f[\nQ = H(1)H(2)\\cdots H(k)\n\\f]\n\nIf m < k, Q is defined as the product of Householder reflectors of order m\n\n\\f[\nQ = H(1)H(2)\\cdots H(m-1)\n\\f]\n\nOn the other hand, if storev is row-wise, then the matrix Q has orthonormal rows. If n > k, Q is defined as the\nfirst m rows of the product of k Householder reflectors of order n\n\n\\f[\nQ = H(k)H(k-1)\\cdots H(1)\n\\f]\n\nIf n <= k, Q is defined as the product of Householder reflectors of order n\n\n\\f[\nQ = H(n-1)H(n-2)\\cdots H(1)\n\\f]\n\nThe Householder matrices \\f$H(i)\\f$ are never stored, they are computed from its corresponding\nHouseholder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by \\ref rocsolver_sgebrd \"GEBRD\" in its arguments A and tauq or taup.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nstorev      #rocblas_storev.\nSpecifies whether to work column-wise or row-wise.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of the matrix Q.\nIf row-wise, then min(n,k) <= m <= n.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of the matrix Q.\nIf column-wise, then min(m,k) <= n <= m.\n@param[in]\nk           rocblas_int. k >= 0.\nThe number of columns (if storev is column-wise) or rows (if row-wise) of the\noriginal matrix reduced by \\ref rocsolver_sgebrd \"GEBRD\".\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the Householder vectors as returned by \\ref rocsolver_sgebrd \"GEBRD\".\nOn exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension min(m,k) if column-wise, or min(n,k) if row-wise.\nThe Householder scalars as returned by \\ref rocsolver_sgebrd \"GEBRD\"."]
     pub fn rocsolver_cungbr(
         handle: rocblas_handle,
         storev: rocblas_storev,
@@ -1351,7 +1370,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief ORGTR generates an n-by-n orthogonal Matrix Q.\n\n\\details\nQ is defined as the product of n-1 Householder reflectors of order n. If\nuplo indicates upper, then Q has the form\n\n\\f[\nQ = H_{n-1}H_{n-2}\\cdots H_1\n\\f]\n\nOn the other hand, if uplo indicates lower, then Q has the form\n\n\\f[\nQ = H_1H_2\\cdots H_{n-1}\n\\f]\n\nThe Householder matrices \\f$H_i\\f$ are never stored, they are computed from its\ncorresponding Householder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by\n\\ref rocsolver_ssytrd \"SYTRD\" in its arguments A and tau.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the \\ref rocsolver_ssytrd \"SYTRD\" factorization was upper or lower\ntriangular. If uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of the matrix Q.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the Householder vectors as returned\nby \\ref rocsolver_ssytrd \"SYTRD\". On exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension n-1.\\n\nThe Householder scalars as returned by \\ref rocsolver_ssytrd \"SYTRD\"."]
+    #[doc = " @{\n\\brief ORGTR generates an n-by-n orthogonal Matrix Q.\n\n\\details\nQ is defined as the product of n-1 Householder reflectors of order n. If\nuplo indicates upper, then Q has the form\n\n\\f[\nQ = H(n-1)H(n-2)\\cdots H(1)\n\\f]\n\nOn the other hand, if uplo indicates lower, then Q has the form\n\n\\f[\nQ = H(1)H(2)\\cdots H(n-1)\n\\f]\n\nThe Householder matrices \\f$H(i)\\f$ are never stored, they are computed from its\ncorresponding Householder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by\n\\ref rocsolver_ssytrd \"SYTRD\" in its arguments A and tau.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the \\ref rocsolver_ssytrd \"SYTRD\" factorization was upper or lower\ntriangular. If uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of the matrix Q.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the Householder vectors as returned\nby \\ref rocsolver_ssytrd \"SYTRD\". On exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension n-1.\nThe Householder scalars as returned by \\ref rocsolver_ssytrd \"SYTRD\"."]
     pub fn rocsolver_sorgtr(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -1374,7 +1393,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief UNGTR generates an n-by-n unitary Matrix Q.\n\n\\details\nQ is defined as the product of n-1 Householder reflectors of order n. If\nuplo indicates upper, then Q has the form\n\n\\f[\nQ = H_{n-1}H_{n-2}\\cdots H_1\n\\f]\n\nOn the other hand, if uplo indicates lower, then Q has the form\n\n\\f[\nQ = H_1H_2\\cdots H_{n-1}\n\\f]\n\nThe Householder matrices \\f$H_i\\f$ are never stored, they are computed from its\ncorresponding Householder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by\n\\ref rocsolver_chetrd \"HETRD\" in its arguments A and tau.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the \\ref rocsolver_chetrd \"HETRD\" factorization was upper or lower\ntriangular. If uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of the matrix Q.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the Householder vectors as returned\nby \\ref rocsolver_chetrd \"HETRD\". On exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension n-1.\\n\nThe Householder scalars as returned by \\ref rocsolver_chetrd \"HETRD\"."]
+    #[doc = " @{\n\\brief UNGTR generates an n-by-n unitary Matrix Q.\n\n\\details\nQ is defined as the product of n-1 Householder reflectors of order n. If\nuplo indicates upper, then Q has the form\n\n\\f[\nQ = H(n-1)H(n-2)\\cdots H(1)\n\\f]\n\nOn the other hand, if uplo indicates lower, then Q has the form\n\n\\f[\nQ = H(1)H(2)\\cdots H(n-1)\n\\f]\n\nThe Householder matrices \\f$H(i)\\f$ are never stored, they are computed from its\ncorresponding Householder vectors \\f$v_i\\f$ and scalars \\f$\\text{ipiv}[i]\\f$, as returned by\n\\ref rocsolver_chetrd \"HETRD\" in its arguments A and tau.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the \\ref rocsolver_chetrd \"HETRD\" factorization was upper or lower\ntriangular. If uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of the matrix Q.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the Householder vectors as returned\nby \\ref rocsolver_chetrd \"HETRD\". On exit, the computed matrix Q.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension n-1.\nThe Householder scalars as returned by \\ref rocsolver_chetrd \"HETRD\"."]
     pub fn rocsolver_cungtr(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -1397,7 +1416,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief ORM2R multiplies a matrix Q with orthonormal columns by a general m-by-n\nmatrix C.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^TC & \\: \\text{Transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^T & \\: \\text{Transpose from the right.}\n\\end{array}\n\\f]\n\nQ is defined as the product of k Householder reflectors\n\n\\f[\nQ = H_1H_2 \\cdots H_k\n\\f]\n\nof order m if applying from the left, or n if applying from the right. Q is never stored, it is\ncalculated from the Householder vectors and scalars returned by the QR factorization \\ref rocsolver_sgeqrf \"GEQRF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\\n\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\\n\nSpecifies whether the matrix Q or its transpose is to be applied.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0; k <= m if side is left, k <= n if side is right.\\n\nThe number of Householder reflectors that form Q.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*k.\\n\nThe Householder vectors as returned by \\ref rocsolver_sgeqrf \"GEQRF\"\nin the first k columns of its argument A.\n@param[in]\nlda         rocblas_int. lda >= m if side is left, or lda >= n if side is right. \\n\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\\n\nThe Householder scalars as returned by \\ref rocsolver_sgeqrf \"GEQRF\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\\n\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\\n\nLeading dimension of C."]
+    #[doc = " @{\n\\brief ORM2R multiplies a matrix Q with orthonormal columns by a general m-by-n\nmatrix C.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^TC & \\: \\text{Transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^T & \\: \\text{Transpose from the right.}\n\\end{array}\n\\f]\n\nQ is defined as the product of k Householder reflectors\n\n\\f[\nQ = H(1)H(2) \\cdots H(k)\n\\f]\n\nof order m if applying from the left, or n if applying from the right. Q is never stored, it is\ncalculated from the Householder vectors and scalars returned by the QR factorization \\ref rocsolver_sgeqrf \"GEQRF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\nSpecifies whether the matrix Q or its transpose is to be applied.\n@param[in]\nm           rocblas_int. m >= 0.\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0; k <= m if side is left, k <= n if side is right.\nThe number of Householder reflectors that form Q.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*k.\nThe Householder vectors as returned by \\ref rocsolver_sgeqrf \"GEQRF\"\nin the first k columns of its argument A.\n@param[in]\nlda         rocblas_int. lda >= m if side is left, or lda >= n if side is right.\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\nThe Householder scalars as returned by \\ref rocsolver_sgeqrf \"GEQRF\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\nLeading dimension of C."]
     pub fn rocsolver_sorm2r(
         handle: rocblas_handle,
         side: rocblas_side,
@@ -1430,7 +1449,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief UNM2R multiplies a complex matrix Q with orthonormal columns by a\ngeneral m-by-n matrix C.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^HC & \\: \\text{Conjugate transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^H & \\: \\text{Conjugate transpose from the right.}\n\\end{array}\n\\f]\n\nQ is defined as the product of k Householder reflectors\n\n\\f[\nQ = H_1H_2\\cdots H_k\n\\f]\n\nof order m if applying from the left, or n if applying from the right. Q is never stored, it is\ncalculated from the Householder vectors and scalars returned by the QR factorization \\ref rocsolver_sgeqrf \"GEQRF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\\n\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\\n\nSpecifies whether the matrix Q or its conjugate transpose is to be applied.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0; k <= m if side is left, k <= n if side is right.\\n\nThe number of Householder reflectors that form Q.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*k.\\n\nThe Householder vectors as returned by \\ref rocsolver_sgeqrf \"GEQRF\"\nin the first k columns of its argument A.\n@param[in]\nlda         rocblas_int. lda >= m if side is left, or lda >= n if side is right. \\n\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\\n\nThe Householder scalars as returned by \\ref rocsolver_sgeqrf \"GEQRF\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\\n\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\\n\nLeading dimension of C.\n"]
+    #[doc = " @{\n\\brief UNM2R multiplies a complex matrix Q with orthonormal columns by a\ngeneral m-by-n matrix C.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^HC & \\: \\text{Conjugate transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^H & \\: \\text{Conjugate transpose from the right.}\n\\end{array}\n\\f]\n\nQ is defined as the product of k Householder reflectors\n\n\\f[\nQ = H(1)H(2)\\cdots H(k)\n\\f]\n\nof order m if applying from the left, or n if applying from the right. Q is never stored, it is\ncalculated from the Householder vectors and scalars returned by the QR factorization \\ref rocsolver_sgeqrf \"GEQRF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\nSpecifies whether the matrix Q or its conjugate transpose is to be applied.\n@param[in]\nm           rocblas_int. m >= 0.\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0; k <= m if side is left, k <= n if side is right.\nThe number of Householder reflectors that form Q.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*k.\nThe Householder vectors as returned by \\ref rocsolver_sgeqrf \"GEQRF\"\nin the first k columns of its argument A.\n@param[in]\nlda         rocblas_int. lda >= m if side is left, or lda >= n if side is right.\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\nThe Householder scalars as returned by \\ref rocsolver_sgeqrf \"GEQRF\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\nLeading dimension of C.\n"]
     pub fn rocsolver_cunm2r(
         handle: rocblas_handle,
         side: rocblas_side,
@@ -1463,7 +1482,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief ORMQR multiplies a matrix Q with orthonormal columns by a general m-by-n\nmatrix C.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^TC & \\: \\text{Transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^T & \\: \\text{Transpose from the right.}\n\\end{array}\n\\f]\n\nQ is defined as the product of k Householder reflectors\n\n\\f[\nQ = H_1H_2\\cdots H_k\n\\f]\n\nof order m if applying from the left, or n if applying from the right. Q is never stored, it is\ncalculated from the Householder vectors and scalars returned by the QR factorization \\ref rocsolver_sgeqrf \"GEQRF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\\n\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\\n\nSpecifies whether the matrix Q or its transpose is to be applied.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0; k <= m if side is left, k <= n if side is right.\\n\nThe number of Householder reflectors that form Q.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*k.\\n\nThe Householder vectors as returned by \\ref rocsolver_sgeqrf \"GEQRF\"\nin the first k columns of its argument A.\n@param[in]\nlda         rocblas_int. lda >= m if side is left, or lda >= n if side is right. \\n\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\\n\nThe Householder scalars as returned by \\ref rocsolver_sgeqrf \"GEQRF\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\\n\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\\n\nLeading dimension of C."]
+    #[doc = " @{\n\\brief ORMQR multiplies a matrix Q with orthonormal columns by a general m-by-n\nmatrix C.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^TC & \\: \\text{Transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^T & \\: \\text{Transpose from the right.}\n\\end{array}\n\\f]\n\nQ is defined as the product of k Householder reflectors\n\n\\f[\nQ = H(1)H(2)\\cdots H(k)\n\\f]\n\nof order m if applying from the left, or n if applying from the right. Q is never stored, it is\ncalculated from the Householder vectors and scalars returned by the QR factorization \\ref rocsolver_sgeqrf \"GEQRF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\nSpecifies whether the matrix Q or its transpose is to be applied.\n@param[in]\nm           rocblas_int. m >= 0.\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0; k <= m if side is left, k <= n if side is right.\nThe number of Householder reflectors that form Q.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*k.\nThe Householder vectors as returned by \\ref rocsolver_sgeqrf \"GEQRF\"\nin the first k columns of its argument A.\n@param[in]\nlda         rocblas_int. lda >= m if side is left, or lda >= n if side is right.\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\nThe Householder scalars as returned by \\ref rocsolver_sgeqrf \"GEQRF\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\nLeading dimension of C."]
     pub fn rocsolver_sormqr(
         handle: rocblas_handle,
         side: rocblas_side,
@@ -1496,7 +1515,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief UNMQR multiplies a complex matrix Q with orthonormal columns by a\ngeneral m-by-n matrix C.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^HC & \\: \\text{Conjugate transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^H & \\: \\text{Conjugate transpose from the right.}\n\\end{array}\n\\f]\n\nQ is defined as the product of k Householder reflectors\n\n\\f[\nQ = H_1H_2\\cdots H_k\n\\f]\n\nof order m if applying from the left, or n if applying from the right. Q is never stored, it is\ncalculated from the Householder vectors and scalars returned by the QR factorization \\ref rocsolver_sgeqrf \"GEQRF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\\n\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\\n\nSpecifies whether the matrix Q or its conjugate transpose is to be applied.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0; k <= m if side is left, k <= n if side is right.\\n\nThe number of Householder reflectors that form Q.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*k.\\n\nThe Householder vectors as returned by \\ref rocsolver_sgeqrf \"GEQRF\"\nin the first k columns of its argument A.\n@param[in]\nlda         rocblas_int. lda >= m if side is left, or lda >= n if side is right. \\n\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\\n\nThe Householder scalars as returned by \\ref rocsolver_sgeqrf \"GEQRF\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\\n\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\\n\nLeading dimension of C."]
+    #[doc = " @{\n\\brief UNMQR multiplies a complex matrix Q with orthonormal columns by a\ngeneral m-by-n matrix C.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^HC & \\: \\text{Conjugate transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^H & \\: \\text{Conjugate transpose from the right.}\n\\end{array}\n\\f]\n\nQ is defined as the product of k Householder reflectors\n\n\\f[\nQ = H(1)H(2)\\cdots H(k)\n\\f]\n\nof order m if applying from the left, or n if applying from the right. Q is never stored, it is\ncalculated from the Householder vectors and scalars returned by the QR factorization \\ref rocsolver_sgeqrf \"GEQRF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\nSpecifies whether the matrix Q or its conjugate transpose is to be applied.\n@param[in]\nm           rocblas_int. m >= 0.\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0; k <= m if side is left, k <= n if side is right.\nThe number of Householder reflectors that form Q.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*k.\nThe Householder vectors as returned by \\ref rocsolver_sgeqrf \"GEQRF\"\nin the first k columns of its argument A.\n@param[in]\nlda         rocblas_int. lda >= m if side is left, or lda >= n if side is right.\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\nThe Householder scalars as returned by \\ref rocsolver_sgeqrf \"GEQRF\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\nLeading dimension of C."]
     pub fn rocsolver_cunmqr(
         handle: rocblas_handle,
         side: rocblas_side,
@@ -1529,7 +1548,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief ORML2 multiplies a matrix Q with orthonormal rows by a general m-by-n\nmatrix C.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^TC & \\: \\text{Transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^T & \\: \\text{Transpose from the right.}\n\\end{array}\n\\f]\n\nQ is defined as the product of k Householder reflectors\n\n\\f[\nQ = H_kH_{k-1}\\cdots H_1\n\\f]\n\nof order m if applying from the left, or n if applying from the right. Q is never stored, it is\ncalculated from the Householder vectors and scalars returned by the LQ factorization \\ref rocsolver_sgelqf \"GELQF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\\n\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\\n\nSpecifies whether the matrix Q or its transpose is to be applied.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0; k <= m if side is left, k <= n if side is right.\\n\nThe number of Householder reflectors that form Q.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*m if side is left, or lda*n if side is right.\\n\nThe Householder vectors as returned by \\ref rocsolver_sgelqf \"GELQF\"\nin the first k rows of its argument A.\n@param[in]\nlda         rocblas_int. lda >= k. \\n\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\\n\nThe Householder scalars as returned by \\ref rocsolver_sgelqf \"GELQF\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\\n\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\\n\nLeading dimension of C.\n"]
+    #[doc = " @{\n\\brief ORML2 multiplies a matrix Q with orthonormal rows by a general m-by-n\nmatrix C.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^TC & \\: \\text{Transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^T & \\: \\text{Transpose from the right.}\n\\end{array}\n\\f]\n\nQ is defined as the product of k Householder reflectors\n\n\\f[\nQ = H(k)H(k-1)\\cdots H(1)\n\\f]\n\nof order m if applying from the left, or n if applying from the right. Q is never stored, it is\ncalculated from the Householder vectors and scalars returned by the LQ factorization \\ref rocsolver_sgelqf \"GELQF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\nSpecifies whether the matrix Q or its transpose is to be applied.\n@param[in]\nm           rocblas_int. m >= 0.\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0; k <= m if side is left, k <= n if side is right.\nThe number of Householder reflectors that form Q.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*m if side is left, or lda*n if side is right.\nThe Householder vectors as returned by \\ref rocsolver_sgelqf \"GELQF\"\nin the first k rows of its argument A.\n@param[in]\nlda         rocblas_int. lda >= k.\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\nThe Householder scalars as returned by \\ref rocsolver_sgelqf \"GELQF\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\nLeading dimension of C.\n"]
     pub fn rocsolver_sorml2(
         handle: rocblas_handle,
         side: rocblas_side,
@@ -1562,7 +1581,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief UNML2 multiplies a complex matrix Q with orthonormal rows by a general\nm-by-n matrix C.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^HC & \\: \\text{Conjugate transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^H & \\: \\text{Conjugate transpose from the right.}\n\\end{array}\n\\f]\n\nQ is defined as the product of k Householder reflectors\n\n\\f[\nQ = H_k^HH_{k-1}^H\\cdots H_1^H\n\\f]\n\nof order m if applying from the left, or n if applying from the right. Q is never stored, it is\ncalculated from the Householder vectors and scalars returned by the LQ factorization \\ref rocsolver_sgelqf \"GELQF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\\n\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\\n\nSpecifies whether the matrix Q or its conjugate transpose is to be applied.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0; k <= m if side is left, k <= n if side is right.\\n\nThe number of Householder reflectors that form Q.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*m if side is left, or lda*n if side is right.\\n\nThe Householder vectors as returned by \\ref rocsolver_sgelqf \"GELQF\"\nin the first k rows of its argument A.\n@param[in]\nlda         rocblas_int. lda >= k. \\n\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\\n\nThe Householder scalars as returned by \\ref rocsolver_sgelqf \"GELQF\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\\n\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\\n\nLeading dimension of C."]
+    #[doc = " @{\n\\brief UNML2 multiplies a complex matrix Q with orthonormal rows by a general\nm-by-n matrix C.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^HC & \\: \\text{Conjugate transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^H & \\: \\text{Conjugate transpose from the right.}\n\\end{array}\n\\f]\n\nQ is defined as the product of k Householder reflectors\n\n\\f[\nQ = H(k)^HH(k-1)^H\\cdots H(1)^H\n\\f]\n\nof order m if applying from the left, or n if applying from the right. Q is never stored, it is\ncalculated from the Householder vectors and scalars returned by the LQ factorization \\ref rocsolver_sgelqf \"GELQF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\nSpecifies whether the matrix Q or its conjugate transpose is to be applied.\n@param[in]\nm           rocblas_int. m >= 0.\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0; k <= m if side is left, k <= n if side is right.\nThe number of Householder reflectors that form Q.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*m if side is left, or lda*n if side is right.\nThe Householder vectors as returned by \\ref rocsolver_sgelqf \"GELQF\"\nin the first k rows of its argument A.\n@param[in]\nlda         rocblas_int. lda >= k.\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\nThe Householder scalars as returned by \\ref rocsolver_sgelqf \"GELQF\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\nLeading dimension of C."]
     pub fn rocsolver_cunml2(
         handle: rocblas_handle,
         side: rocblas_side,
@@ -1595,7 +1614,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief ORMLQ multiplies a matrix Q with orthonormal rows by a general m-by-n\nmatrix C.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^TC & \\: \\text{Transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^T & \\: \\text{Transpose from the right.}\n\\end{array}\n\\f]\n\nQ is defined as the product of k Householder reflectors\n\n\\f[\nQ = H_kH_{k-1}\\cdots H_1\n\\f]\n\nof order m if applying from the left, or n if applying from the right. Q is never stored, it is\ncalculated from the Householder vectors and scalars returned by the LQ factorization \\ref rocsolver_sgelqf \"GELQF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\\n\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\\n\nSpecifies whether the matrix Q or its transpose is to be applied.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0; k <= m if side is left, k <= n if side is right.\\n\nThe number of Householder reflectors that form Q.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*m if side is left, or lda*n if side is right.\\n\nThe Householder vectors as returned by \\ref rocsolver_sgelqf \"GELQF\"\nin the first k rows of its argument A.\n@param[in]\nlda         rocblas_int. lda >= k. \\n\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\\n\nThe Householder scalars as returned by \\ref rocsolver_sgelqf \"GELQF\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\\n\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\\n\nLeading dimension of C."]
+    #[doc = " @{\n\\brief ORMLQ multiplies a matrix Q with orthonormal rows by a general m-by-n\nmatrix C.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^TC & \\: \\text{Transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^T & \\: \\text{Transpose from the right.}\n\\end{array}\n\\f]\n\nQ is defined as the product of k Householder reflectors\n\n\\f[\nQ = H(k)H(k-1)\\cdots H(1)\n\\f]\n\nof order m if applying from the left, or n if applying from the right. Q is never stored, it is\ncalculated from the Householder vectors and scalars returned by the LQ factorization \\ref rocsolver_sgelqf \"GELQF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\nSpecifies whether the matrix Q or its transpose is to be applied.\n@param[in]\nm           rocblas_int. m >= 0.\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0; k <= m if side is left, k <= n if side is right.\nThe number of Householder reflectors that form Q.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*m if side is left, or lda*n if side is right.\nThe Householder vectors as returned by \\ref rocsolver_sgelqf \"GELQF\"\nin the first k rows of its argument A.\n@param[in]\nlda         rocblas_int. lda >= k.\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\nThe Householder scalars as returned by \\ref rocsolver_sgelqf \"GELQF\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\nLeading dimension of C."]
     pub fn rocsolver_sormlq(
         handle: rocblas_handle,
         side: rocblas_side,
@@ -1628,7 +1647,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief UNMLQ multiplies a complex matrix Q with orthonormal rows by a general\nm-by-n matrix C.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^HC & \\: \\text{Conjugate transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^H & \\: \\text{Conjugate transpose from the right.}\n\\end{array}\n\\f]\n\nQ is defined as the product of k Householder reflectors\n\n\\f[\nQ = H_k^HH_{k-1}^H\\cdots H_1^H\n\\f]\n\nof order m if applying from the left, or n if applying from the right. Q is never stored, it is\ncalculated from the Householder vectors and scalars returned by the LQ factorization \\ref rocsolver_sgelqf \"GELQF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\\n\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\\n\nSpecifies whether the matrix Q or its conjugate transpose is to be applied.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0; k <= m if side is left, k <= n if side is right.\\n\nThe number of Householder reflectors that form Q.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*m if side is left, or lda*n if side is right.\\n\nThe Householder vectors as returned by \\ref rocsolver_sgelqf \"GELQF\"\nin the first k rows of its argument A.\n@param[in]\nlda         rocblas_int. lda >= k. \\n\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\\n\nThe Householder scalars as returned by \\ref rocsolver_sgelqf \"GELQF\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\\n\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\\n\nLeading dimension of C."]
+    #[doc = " @{\n\\brief UNMLQ multiplies a complex matrix Q with orthonormal rows by a general\nm-by-n matrix C.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^HC & \\: \\text{Conjugate transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^H & \\: \\text{Conjugate transpose from the right.}\n\\end{array}\n\\f]\n\nQ is defined as the product of k Householder reflectors\n\n\\f[\nQ = H(k)^HH(k-1)^H\\cdots H(1)^H\n\\f]\n\nof order m if applying from the left, or n if applying from the right. Q is never stored, it is\ncalculated from the Householder vectors and scalars returned by the LQ factorization \\ref rocsolver_sgelqf \"GELQF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\nSpecifies whether the matrix Q or its conjugate transpose is to be applied.\n@param[in]\nm           rocblas_int. m >= 0.\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0; k <= m if side is left, k <= n if side is right.\nThe number of Householder reflectors that form Q.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*m if side is left, or lda*n if side is right.\nThe Householder vectors as returned by \\ref rocsolver_sgelqf \"GELQF\"\nin the first k rows of its argument A.\n@param[in]\nlda         rocblas_int. lda >= k.\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\nThe Householder scalars as returned by \\ref rocsolver_sgelqf \"GELQF\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\nLeading dimension of C."]
     pub fn rocsolver_cunmlq(
         handle: rocblas_handle,
         side: rocblas_side,
@@ -1661,7 +1680,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief ORM2L multiplies a matrix Q with orthonormal columns by a general m-by-n\nmatrix C.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^TC & \\: \\text{Transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^T & \\: \\text{Transpose from the right.}\n\\end{array}\n\\f]\n\nQ is defined as the product of k Householder reflectors\n\n\\f[\nQ = H_kH_{k-1}\\cdots H_1\n\\f]\n\nof order m if applying from the left, or n if applying from the right. Q is\nnever stored, it is calculated from the Householder vectors and scalars\nreturned by the QL factorization \\ref rocsolver_sgeqlf \"GEQLF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\\n\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\\n\nSpecifies whether the matrix Q or its transpose is to be\napplied.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0; k <= m if side is left, k <= n if side is right.\\n\nThe number of Householder reflectors that form Q.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*k.\\n\nThe Householder vectors as returned by \\ref rocsolver_sgeqlf \"GEQLF\" in the last k columns of its\nargument A.\n@param[in]\nlda         rocblas_int. lda >= m if side is left, lda >= n if side is right.\\n\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\\n\nThe Householder scalars as returned by\n\\ref rocsolver_sgeqlf \"GEQLF\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\\n\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\\n\nLeading dimension of C."]
+    #[doc = " @{\n\\brief ORM2L multiplies a matrix Q with orthonormal columns by a general m-by-n\nmatrix C.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^TC & \\: \\text{Transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^T & \\: \\text{Transpose from the right.}\n\\end{array}\n\\f]\n\nQ is defined as the product of k Householder reflectors\n\n\\f[\nQ = H(k)H(k-1)\\cdots H(1)\n\\f]\n\nof order m if applying from the left, or n if applying from the right. Q is\nnever stored, it is calculated from the Householder vectors and scalars\nreturned by the QL factorization \\ref rocsolver_sgeqlf \"GEQLF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\nSpecifies whether the matrix Q or its transpose is to be\napplied.\n@param[in]\nm           rocblas_int. m >= 0.\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0; k <= m if side is left, k <= n if side is right.\nThe number of Householder reflectors that form Q.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*k.\nThe Householder vectors as returned by \\ref rocsolver_sgeqlf \"GEQLF\" in the last k columns of its\nargument A.\n@param[in]\nlda         rocblas_int. lda >= m if side is left, lda >= n if side is right.\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\nThe Householder scalars as returned by\n\\ref rocsolver_sgeqlf \"GEQLF\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\nLeading dimension of C."]
     pub fn rocsolver_sorm2l(
         handle: rocblas_handle,
         side: rocblas_side,
@@ -1694,7 +1713,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief UNM2L multiplies a complex matrix Q with orthonormal columns by a\ngeneral m-by-n matrix C.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^HC & \\: \\text{Conjugate transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^H & \\: \\text{Conjugate transpose from the right.}\n\\end{array}\n\\f]\n\nQ is defined as the product of k Householder reflectors\n\n\\f[\nQ = H_kH_{k-1}\\cdots H_1\n\\f]\n\nof order m if applying from the left, or n if applying from the right. Q is\nnever stored, it is calculated from the Householder vectors and scalars\nreturned by the QL factorization \\ref rocsolver_sgeqlf \"GEQLF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\\n\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\\n\nSpecifies whether the matrix Q or its conjugate\ntranspose is to be applied.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0; k <= m if side is left, k <= n if side is right.\\n\nThe number of Householder reflectors that form Q.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*k.\\n\nThe Householder vectors as returned by \\ref rocsolver_sgeqlf \"GEQLF\" in the last k columns of its\nargument A.\n@param[in]\nlda         rocblas_int. lda >= m if side is left, lda >= n if side is right.\\n\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\\n\nThe Householder scalars as returned by\n\\ref rocsolver_sgeqlf \"GEQLF\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\\n\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\\n\nLeading dimension of C."]
+    #[doc = " @{\n\\brief UNM2L multiplies a complex matrix Q with orthonormal columns by a\ngeneral m-by-n matrix C.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^HC & \\: \\text{Conjugate transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^H & \\: \\text{Conjugate transpose from the right.}\n\\end{array}\n\\f]\n\nQ is defined as the product of k Householder reflectors\n\n\\f[\nQ = H(k)H(k-1)\\cdots H(1)\n\\f]\n\nof order m if applying from the left, or n if applying from the right. Q is\nnever stored, it is calculated from the Householder vectors and scalars\nreturned by the QL factorization \\ref rocsolver_sgeqlf \"GEQLF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\nSpecifies whether the matrix Q or its conjugate\ntranspose is to be applied.\n@param[in]\nm           rocblas_int. m >= 0.\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0; k <= m if side is left, k <= n if side is right.\nThe number of Householder reflectors that form Q.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*k.\nThe Householder vectors as returned by \\ref rocsolver_sgeqlf \"GEQLF\" in the last k columns of its\nargument A.\n@param[in]\nlda         rocblas_int. lda >= m if side is left, lda >= n if side is right.\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\nThe Householder scalars as returned by\n\\ref rocsolver_sgeqlf \"GEQLF\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\nLeading dimension of C."]
     pub fn rocsolver_cunm2l(
         handle: rocblas_handle,
         side: rocblas_side,
@@ -1727,7 +1746,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief ORMQL multiplies a matrix Q with orthonormal columns by a general m-by-n\nmatrix C.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^TC & \\: \\text{Transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^T & \\: \\text{Transpose from the right.}\n\\end{array}\n\\f]\n\nQ is defined as the product of k Householder reflectors\n\n\\f[\nQ = H_kH_{k-1}\\cdots H_1\n\\f]\n\nof order m if applying from the left, or n if applying from the right. Q is\nnever stored, it is calculated from the Householder vectors and scalars\nreturned by the QL factorization \\ref rocsolver_sgeqlf \"GEQLF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\\n\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\\n\nSpecifies whether the matrix Q or its transpose is to be\napplied.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0; k <= m if side is left, k <= n if side is right.\\n\nThe number of Householder reflectors that form Q.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*k.\\n\nThe Householder vectors as returned by \\ref rocsolver_sgeqlf \"GEQLF\" in the last k columns of its\nargument A.\n@param[in]\nlda         rocblas_int. lda >= m if side is left, lda >= n if side is right.\\n\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\\n\nThe Householder scalars as returned by\n\\ref rocsolver_sgeqlf \"GEQLF\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\\n\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\\n\nLeading dimension of C."]
+    #[doc = " @{\n\\brief ORMQL multiplies a matrix Q with orthonormal columns by a general m-by-n\nmatrix C.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^TC & \\: \\text{Transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^T & \\: \\text{Transpose from the right.}\n\\end{array}\n\\f]\n\nQ is defined as the product of k Householder reflectors\n\n\\f[\nQ = H(k)H(k-1)\\cdots H(1)\n\\f]\n\nof order m if applying from the left, or n if applying from the right. Q is\nnever stored, it is calculated from the Householder vectors and scalars\nreturned by the QL factorization \\ref rocsolver_sgeqlf \"GEQLF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\nSpecifies whether the matrix Q or its transpose is to be\napplied.\n@param[in]\nm           rocblas_int. m >= 0.\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0; k <= m if side is left, k <= n if side is right.\nThe number of Householder reflectors that form Q.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*k.\nThe Householder vectors as returned by \\ref rocsolver_sgeqlf \"GEQLF\" in the last k columns of its\nargument A.\n@param[in]\nlda         rocblas_int. lda >= m if side is left, lda >= n if side is right.\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\nThe Householder scalars as returned by\n\\ref rocsolver_sgeqlf \"GEQLF\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\nLeading dimension of C."]
     pub fn rocsolver_sormql(
         handle: rocblas_handle,
         side: rocblas_side,
@@ -1760,7 +1779,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief UNMQL multiplies a complex matrix Q with orthonormal columns by a\ngeneral m-by-n matrix C.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^HC & \\: \\text{Conjugate transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^H & \\: \\text{Conjugate transpose from the right.}\n\\end{array}\n\\f]\n\nQ is defined as the product of k Householder reflectors\n\n\\f[\nQ = H_kH_{k-1}\\cdots H_1\n\\f]\n\nof order m if applying from the left, or n if applying from the right. Q is\nnever stored, it is calculated from the Householder vectors and scalars\nreturned by the QL factorization \\ref rocsolver_sgeqlf \"GEQLF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\\n\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\\n\nSpecifies whether the matrix Q or its conjugate\ntranspose is to be applied.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0; k <= m if side is left, k <= n if side is right.\\n\nThe number of Householder reflectors that form Q.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*k.\\n\nThe Householder vectors as returned by \\ref rocsolver_sgeqlf \"GEQLF\" in the last k columns of its\nargument A.\n@param[in]\nlda         rocblas_int. lda >= m if side is left, lda >= n if side is right.\\n\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\\n\nThe Householder scalars as returned by\n\\ref rocsolver_sgeqlf \"GEQLF\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\\n\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\\n\nLeading dimension of C."]
+    #[doc = " @{\n\\brief UNMQL multiplies a complex matrix Q with orthonormal columns by a\ngeneral m-by-n matrix C.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^HC & \\: \\text{Conjugate transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^H & \\: \\text{Conjugate transpose from the right.}\n\\end{array}\n\\f]\n\nQ is defined as the product of k Householder reflectors\n\n\\f[\nQ = H(k)H(k-1)\\cdots H(1)\n\\f]\n\nof order m if applying from the left, or n if applying from the right. Q is\nnever stored, it is calculated from the Householder vectors and scalars\nreturned by the QL factorization \\ref rocsolver_sgeqlf \"GEQLF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\nSpecifies whether the matrix Q or its conjugate\ntranspose is to be applied.\n@param[in]\nm           rocblas_int. m >= 0.\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0; k <= m if side is left, k <= n if side is right.\nThe number of Householder reflectors that form Q.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*k.\nThe Householder vectors as returned by \\ref rocsolver_sgeqlf \"GEQLF\" in the last k columns of its\nargument A.\n@param[in]\nlda         rocblas_int. lda >= m if side is left, lda >= n if side is right.\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least k.\nThe Householder scalars as returned by\n\\ref rocsolver_sgeqlf \"GEQLF\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\nLeading dimension of C."]
     pub fn rocsolver_cunmql(
         handle: rocblas_handle,
         side: rocblas_side,
@@ -1793,7 +1812,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief ORMBR multiplies a matrix Q with orthonormal rows or columns by a\ngeneral m-by-n matrix C.\n\n\\details\nIf storev is column-wise, then the matrix Q has orthonormal columns.\nIf storev is row-wise, then the matrix Q has orthonormal rows.\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^TC & \\: \\text{Transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^T & \\: \\text{Transpose from the right.}\n\\end{array}\n\\f]\n\nThe order q of the orthogonal matrix Q is q = m if applying from the left, or q = n if applying from the right.\n\nWhen storev is column-wise, if q >= k, then Q is defined as the product of k Householder reflectors\n\n\\f[\nQ = H_1H_2\\cdots H_k,\n\\f]\n\nand if q < k, then Q is defined as the product\n\n\\f[\nQ = H_1H_2\\cdots H_{q-1}.\n\\f]\n\nWhen storev is row-wise, if q > k, then Q is defined as the product of k Householder reflectors\n\n\\f[\nQ = H_1H_2\\cdots H_k,\n\\f]\n\nand if q <= k, Q is defined as the product\n\n\\f[\nQ = H_1H_2\\cdots H_{q-1}.\n\\f]\n\nThe Householder matrices \\f$H_i\\f$ are never stored, they are computed from its corresponding\nHouseholder vectors and scalars as returned by \\ref rocsolver_sgebrd \"GEBRD\" in its arguments A and tauq or taup.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nstorev      #rocblas_storev.\\n\nSpecifies whether to work column-wise or row-wise.\n@param[in]\nside        rocblas_side.\\n\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\\n\nSpecifies whether the matrix Q or its transpose is to be applied.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0.\\n\nThe number of columns (if storev is column-wise) or rows (if row-wise) of the\noriginal matrix reduced by \\ref rocsolver_sgebrd \"GEBRD\".\n@param[in]\nA           pointer to type. Array on the GPU of size lda*min(q,k) if column-wise, or lda*q if row-wise.\\n\nThe Householder vectors as returned by \\ref rocsolver_sgebrd \"GEBRD\".\n@param[in]\nlda         rocblas_int. lda >= q if column-wise, or lda >= min(q,k) if row-wise. \\n\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least min(q,k).\\n\nThe Householder scalars as returned by \\ref rocsolver_sgebrd \"GEBRD\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\\n\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\\n\nLeading dimension of C."]
+    #[doc = " @{\n\\brief ORMBR multiplies a matrix Q with orthonormal rows or columns by a\ngeneral m-by-n matrix C.\n\n\\details\nIf storev is column-wise, then the matrix Q has orthonormal columns.\nIf storev is row-wise, then the matrix Q has orthonormal rows.\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^TC & \\: \\text{Transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^T & \\: \\text{Transpose from the right.}\n\\end{array}\n\\f]\n\nThe order q of the orthogonal matrix Q is q = m if applying from the left, or q = n if applying from the right.\n\nWhen storev is column-wise, if q >= k, then Q is defined as the product of k Householder reflectors\n\n\\f[\nQ = H(1)H(2)\\cdots H(k),\n\\f]\n\nand if q < k, then Q is defined as the product\n\n\\f[\nQ = H(1)H(2)\\cdots H(q-1).\n\\f]\n\nWhen storev is row-wise, if q > k, then Q is defined as the product of k Householder reflectors\n\n\\f[\nQ = H(1)H(2)\\cdots H(k),\n\\f]\n\nand if q <= k, Q is defined as the product\n\n\\f[\nQ = H(1)H(2)\\cdots H(q-1).\n\\f]\n\nThe Householder matrices \\f$H(i)\\f$ are never stored, they are computed from its corresponding\nHouseholder vectors and scalars as returned by \\ref rocsolver_sgebrd \"GEBRD\" in its arguments A and tauq or taup.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nstorev      #rocblas_storev.\nSpecifies whether to work column-wise or row-wise.\n@param[in]\nside        rocblas_side.\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\nSpecifies whether the matrix Q or its transpose is to be applied.\n@param[in]\nm           rocblas_int. m >= 0.\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0.\nThe number of columns (if storev is column-wise) or rows (if row-wise) of the\noriginal matrix reduced by \\ref rocsolver_sgebrd \"GEBRD\".\n@param[in]\nA           pointer to type. Array on the GPU of size lda*min(q,k) if column-wise, or lda*q if row-wise.\nThe Householder vectors as returned by \\ref rocsolver_sgebrd \"GEBRD\".\n@param[in]\nlda         rocblas_int. lda >= q if column-wise, or lda >= min(q,k) if row-wise.\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least min(q,k).\nThe Householder scalars as returned by \\ref rocsolver_sgebrd \"GEBRD\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\nLeading dimension of C."]
     pub fn rocsolver_sormbr(
         handle: rocblas_handle,
         storev: rocblas_storev,
@@ -1828,7 +1847,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief UNMBR multiplies a complex matrix Q with orthonormal rows or columns by\na general m-by-n matrix C.\n\n\\details\nIf storev is column-wise, then the matrix Q has orthonormal columns.\nIf storev is row-wise, then the matrix Q has orthonormal rows.\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^HC & \\: \\text{Conjugate transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^H & \\: \\text{Conjugate transpose from the right.}\n\\end{array}\n\\f]\n\nThe order q of the unitary matrix Q is q = m if applying from the left, or q = n if applying from the right.\n\nWhen storev is column-wise, if q >= k, then Q is defined as the product of k Householder reflectors\n\n\\f[\nQ = H_1H_2\\cdots H_k,\n\\f]\n\nand if q < k, then Q is defined as the product\n\n\\f[\nQ = H_1H_2\\cdots H_{q-1}.\n\\f]\n\nWhen storev is row-wise, if q > k, then Q is defined as the product of k Householder reflectors\n\n\\f[\nQ = H_1H_2\\cdots H_k,\n\\f]\n\nand if q <= k, Q is defined as the product\n\n\\f[\nQ = H_1H_2\\cdots H_{q-1}.\n\\f]\n\nThe Householder matrices \\f$H_i\\f$ are never stored, they are computed from its corresponding\nHouseholder vectors and scalars as returned by \\ref rocsolver_sgebrd \"GEBRD\" in its arguments A and tauq or taup.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nstorev      #rocblas_storev.\\n\nSpecifies whether to work column-wise or row-wise.\n@param[in]\nside        rocblas_side.\\n\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\\n\nSpecifies whether the matrix Q or its conjugate transpose is to be applied.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0.\\n\nThe number of columns (if storev is column-wise) or rows (if row-wise) of the\noriginal matrix reduced by \\ref rocsolver_sgebrd \"GEBRD\".\n@param[in]\nA           pointer to type. Array on the GPU of size lda*min(q,k) if column-wise, or lda*q if row-wise.\\n\nThe Householder vectors as returned by \\ref rocsolver_sgebrd \"GEBRD\".\n@param[in]\nlda         rocblas_int. lda >= q if column-wise, or lda >= min(q,k) if row-wise. \\n\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least min(q,k).\\n\nThe Householder scalars as returned by \\ref rocsolver_sgebrd \"GEBRD\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\\n\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\\n\nLeading dimension of C."]
+    #[doc = " @{\n\\brief UNMBR multiplies a complex matrix Q with orthonormal rows or columns by\na general m-by-n matrix C.\n\n\\details\nIf storev is column-wise, then the matrix Q has orthonormal columns.\nIf storev is row-wise, then the matrix Q has orthonormal rows.\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^HC & \\: \\text{Conjugate transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^H & \\: \\text{Conjugate transpose from the right.}\n\\end{array}\n\\f]\n\nThe order q of the unitary matrix Q is q = m if applying from the left, or q = n if applying from the right.\n\nWhen storev is column-wise, if q >= k, then Q is defined as the product of k Householder reflectors\n\n\\f[\nQ = H(1)H(2)\\cdots H(k),\n\\f]\n\nand if q < k, then Q is defined as the product\n\n\\f[\nQ = H(1)H(2)\\cdots H(q-1).\n\\f]\n\nWhen storev is row-wise, if q > k, then Q is defined as the product of k Householder reflectors\n\n\\f[\nQ = H(1)H(2)\\cdots H(k),\n\\f]\n\nand if q <= k, Q is defined as the product\n\n\\f[\nQ = H(1)H(2)\\cdots H(q-1).\n\\f]\n\nThe Householder matrices \\f$H(i)\\f$ are never stored, they are computed from its corresponding\nHouseholder vectors and scalars as returned by \\ref rocsolver_sgebrd \"GEBRD\" in its arguments A and tauq or taup.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nstorev      #rocblas_storev.\nSpecifies whether to work column-wise or row-wise.\n@param[in]\nside        rocblas_side.\nSpecifies from which side to apply Q.\n@param[in]\ntrans       rocblas_operation.\nSpecifies whether the matrix Q or its conjugate transpose is to be applied.\n@param[in]\nm           rocblas_int. m >= 0.\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of columns of matrix C.\n@param[in]\nk           rocblas_int. k >= 0.\nThe number of columns (if storev is column-wise) or rows (if row-wise) of the\noriginal matrix reduced by \\ref rocsolver_sgebrd \"GEBRD\".\n@param[in]\nA           pointer to type. Array on the GPU of size lda*min(q,k) if column-wise, or lda*q if row-wise.\nThe Householder vectors as returned by \\ref rocsolver_sgebrd \"GEBRD\".\n@param[in]\nlda         rocblas_int. lda >= q if column-wise, or lda >= min(q,k) if row-wise.\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least min(q,k).\nThe Householder scalars as returned by \\ref rocsolver_sgebrd \"GEBRD\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\nLeading dimension of C."]
     pub fn rocsolver_cunmbr(
         handle: rocblas_handle,
         storev: rocblas_storev,
@@ -1863,7 +1882,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief ORMTR multiplies an orthogonal matrix Q by a general m-by-n matrix C.\n\n\\details\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^TC & \\: \\text{Transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^T & \\: \\text{Transpose from the right.}\n\\end{array}\n\\f]\n\nThe order q of the orthogonal matrix Q is q = m if applying from the left, or\nq = n if applying from the right.\n\nQ is defined as a product of q-1 Householder reflectors. If\nuplo indicates upper, then Q has the form\n\n\\f[\nQ = H_{q-1}H_{q-2}\\cdots H_1.\n\\f]\n\nOn the other hand, if uplo indicates lower, then Q has the form\n\n\\f[\nQ = H_1H_2\\cdots H_{q-1}\n\\f]\n\nThe Householder matrices \\f$H_i\\f$ are never stored, they are computed from its\ncorresponding Householder vectors and scalars as returned by\n\\ref rocsolver_ssytrd \"SYTRD\" in its arguments A and tau.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\\n\nSpecifies from which side to apply Q.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the \\ref rocsolver_ssytrd \"SYTRD\" factorization was upper or\nlower triangular. If uplo indicates lower (or upper), then the upper (or\nlower) part of A is not used.\n@param[in]\ntrans       rocblas_operation.\\n\nSpecifies whether the matrix Q or its transpose is to be\napplied.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of columns of matrix C.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*q.\\n\nOn entry, the Householder vectors as\nreturned by \\ref rocsolver_ssytrd \"SYTRD\".\n@param[in]\nlda         rocblas_int. lda >= q.\\n\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least q-1.\\n\nThe Householder scalars as returned by\n\\ref rocsolver_ssytrd \"SYTRD\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\\n\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\\n\nLeading dimension of C."]
+    #[doc = " @{\n\\brief ORMTR multiplies an orthogonal matrix Q by a general m-by-n matrix C.\n\n\\details\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^TC & \\: \\text{Transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^T & \\: \\text{Transpose from the right.}\n\\end{array}\n\\f]\n\nThe order q of the orthogonal matrix Q is q = m if applying from the left, or\nq = n if applying from the right.\n\nQ is defined as a product of q-1 Householder reflectors. If\nuplo indicates upper, then Q has the form\n\n\\f[\nQ = H(q-1)H(q-2)\\cdots H(1).\n\\f]\n\nOn the other hand, if uplo indicates lower, then Q has the form\n\n\\f[\nQ = H(1)H(2)\\cdots H(q-1)\n\\f]\n\nThe Householder matrices \\f$H(i)\\f$ are never stored, they are computed from its\ncorresponding Householder vectors and scalars as returned by\n\\ref rocsolver_ssytrd \"SYTRD\" in its arguments A and tau.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\nSpecifies from which side to apply Q.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the \\ref rocsolver_ssytrd \"SYTRD\" factorization was upper or\nlower triangular. If uplo indicates lower (or upper), then the upper (or\nlower) part of A is not used.\n@param[in]\ntrans       rocblas_operation.\nSpecifies whether the matrix Q or its transpose is to be\napplied.\n@param[in]\nm           rocblas_int. m >= 0.\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of columns of matrix C.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*q.\nOn entry, the Householder vectors as\nreturned by \\ref rocsolver_ssytrd \"SYTRD\".\n@param[in]\nlda         rocblas_int. lda >= q.\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least q-1.\nThe Householder scalars as returned by\n\\ref rocsolver_ssytrd \"SYTRD\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\nLeading dimension of C."]
     pub fn rocsolver_sormtr(
         handle: rocblas_handle,
         side: rocblas_side,
@@ -1896,7 +1915,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief UNMTR multiplies a unitary matrix Q by a general m-by-n matrix C.\n\n\\details\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^HC & \\: \\text{Conjugate transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^H & \\: \\text{Conjugate transpose from the right.}\n\\end{array}\n\\f]\n\nThe order q of the unitary matrix Q is q = m if applying from the left, or\nq = n if applying from the right.\n\nQ is defined as a product of q-1 Householder reflectors. If\nuplo indicates upper, then Q has the form\n\n\\f[\nQ = H_{q-1}H_{q-2}\\cdots H_1.\n\\f]\n\nOn the other hand, if uplo indicates lower, then Q has the form\n\n\\f[\nQ = H_1H_2\\cdots H_{q-1}\n\\f]\n\nThe Householder matrices \\f$H_i\\f$ are never stored, they are computed from its\ncorresponding Householder vectors and scalars as returned by\n\\ref rocsolver_chetrd \"HETRD\" in its arguments A and tau.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\\n\nSpecifies from which side to apply Q.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the \\ref rocsolver_chetrd \"HETRD\" factorization was upper or\nlower triangular. If uplo indicates lower (or upper), then the upper (or\nlower) part of A is not used.\n@param[in]\ntrans       rocblas_operation.\\n\nSpecifies whether the matrix Q or its conjugate\ntranspose is to be applied.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of columns of matrix C.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*q.\\n\nOn entry, the Householder vectors as\nreturned by \\ref rocsolver_chetrd \"HETRD\".\n@param[in]\nlda         rocblas_int. lda >= q.\\n\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least q-1.\\n\nThe Householder scalars as returned by\n\\ref rocsolver_chetrd \"HETRD\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\\n\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\\n\nLeading dimension of C."]
+    #[doc = " @{\n\\brief UNMTR multiplies a unitary matrix Q by a general m-by-n matrix C.\n\n\\details\nThe matrix Q is applied in one of the following forms, depending on\nthe values of side and trans:\n\n\\f[\n\\begin{array}{cl}\nQC & \\: \\text{No transpose from the left,}\\\\\nQ^HC & \\: \\text{Conjugate transpose from the left,}\\\\\nCQ & \\: \\text{No transpose from the right, and}\\\\\nCQ^H & \\: \\text{Conjugate transpose from the right.}\n\\end{array}\n\\f]\n\nThe order q of the unitary matrix Q is q = m if applying from the left, or\nq = n if applying from the right.\n\nQ is defined as a product of q-1 Householder reflectors. If\nuplo indicates upper, then Q has the form\n\n\\f[\nQ = H(q-1)H(q-2)\\cdots H(1).\n\\f]\n\nOn the other hand, if uplo indicates lower, then Q has the form\n\n\\f[\nQ = H(1)H(2)\\cdots H(q-1)\n\\f]\n\nThe Householder matrices \\f$H(i)\\f$ are never stored, they are computed from its\ncorresponding Householder vectors and scalars as returned by\n\\ref rocsolver_chetrd \"HETRD\" in its arguments A and tau.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nside        rocblas_side.\nSpecifies from which side to apply Q.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the \\ref rocsolver_chetrd \"HETRD\" factorization was upper or\nlower triangular. If uplo indicates lower (or upper), then the upper (or\nlower) part of A is not used.\n@param[in]\ntrans       rocblas_operation.\nSpecifies whether the matrix Q or its conjugate\ntranspose is to be applied.\n@param[in]\nm           rocblas_int. m >= 0.\nNumber of rows of matrix C.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of columns of matrix C.\n@param[in]\nA           pointer to type. Array on the GPU of size lda*q.\nOn entry, the Householder vectors as\nreturned by \\ref rocsolver_chetrd \"HETRD\".\n@param[in]\nlda         rocblas_int. lda >= q.\nLeading dimension of A.\n@param[in]\nipiv        pointer to type. Array on the GPU of dimension at least q-1.\nThe Householder scalars as returned by\n\\ref rocsolver_chetrd \"HETRD\".\n@param[inout]\nC           pointer to type. Array on the GPU of size ldc*n.\nOn entry, the matrix C. On exit, it is overwritten with\nQ*C, C*Q, Q'*C, or C*Q'.\n@param[in]\nldc         rocblas_int. ldc >= m.\nLeading dimension of C."]
     pub fn rocsolver_cunmtr(
         handle: rocblas_handle,
         side: rocblas_side,
@@ -1929,7 +1948,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief BDSQR computes the singular value decomposition (SVD) of an\nn-by-n bidiagonal matrix B, using the implicit QR algorithm.\n\n\\details\nThe SVD of B has the form:\n\n\\f[\nB = QSP'\n\\f]\n\nwhere S is the n-by-n diagonal matrix of singular values of B, the columns of Q are the left\nsingular vectors of B, and the columns of P are its right singular vectors.\n\nThe computation of the singular vectors is optional; this function accepts input matrices\nU (of size nu-by-n) and V (of size n-by-nv) that are overwritten with \\f$UQ\\f$ and \\f$P'V\\f$. If nu = 0\nno left vectors are computed; if nv = 0 no right vectors are computed.\n\nOptionally, this function can also compute \\f$Q'C\\f$ for a given n-by-nc input matrix C.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether B is upper or lower bidiagonal.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of matrix B.\n@param[in]\nnv          rocblas_int. nv >= 0.\\n\nThe number of columns of matrix V.\n@param[in]\nnu          rocblas_int. nu >= 0.\\n\nThe number of rows of matrix U.\n@param[in]\nnc          rocblas_int. nu >= 0.\\n\nThe number of columns of matrix C.\n@param[inout]\nD           pointer to real type. Array on the GPU of dimension n.\\n\nOn entry, the diagonal elements of B. On exit, if info = 0,\nthe singular values of B in decreasing order; if info > 0,\nthe diagonal elements of a bidiagonal matrix\northogonally equivalent to B.\n@param[inout]\nE           pointer to real type. Array on the GPU of dimension n-1.\\n\nOn entry, the off-diagonal elements of B. On exit, if info > 0,\nthe off-diagonal elements of a bidiagonal matrix\northogonally equivalent to B (if info = 0 this matrix converges to zero).\n@param[inout]\nV           pointer to type. Array on the GPU of dimension ldv*nv.\\n\nOn entry, the matrix V. On exit, it is overwritten with P'*V.\n(Not referenced if nv = 0).\n@param[in]\nldv         rocblas_int. ldv >= n if nv > 0, or ldv >=1 if nv = 0.\\n\nThe leading dimension of V.\n@param[inout]\nU           pointer to type. Array on the GPU of dimension ldu*n.\\n\nOn entry, the matrix U. On exit, it is overwritten with U*Q.\n(Not referenced if nu = 0).\n@param[in]\nldu         rocblas_int. ldu >= nu.\\n\nThe leading dimension of U.\n@param[inout]\nC           pointer to type. Array on the GPU of dimension ldc*nc.\\n\nOn entry, the matrix C. On exit, it is overwritten with Q'*C.\n(Not referenced if nc = 0).\n@param[in]\nldc         rocblas_int. ldc >= n if nc > 0, or ldc >=1 if nc = 0.\\n\nThe leading dimension of C.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0, i elements of E have not converged to zero."]
+    #[doc = " @{\n\\brief BDSQR computes the singular value decomposition (SVD) of an\nn-by-n bidiagonal matrix B, using the implicit QR algorithm.\n\n\\details\nThe SVD of B has the form:\n\n\\f[\nB = QSP'\n\\f]\n\nwhere S is the n-by-n diagonal matrix of singular values of B, the columns of Q are the left\nsingular vectors of B, and the columns of P are its right singular vectors.\n\nThe computation of the singular vectors is optional; this function accepts input matrices\nU (of size nu-by-n) and V (of size n-by-nv) that are overwritten with \\f$UQ\\f$ and \\f$P'V\\f$. If nu = 0\nno left vectors are computed; if nv = 0 no right vectors are computed.\n\nOptionally, this function can also compute \\f$Q'C\\f$ for a given n-by-nc input matrix C.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether B is upper or lower bidiagonal.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of matrix B.\n@param[in]\nnv          rocblas_int. nv >= 0.\nThe number of columns of matrix V.\n@param[in]\nnu          rocblas_int. nu >= 0.\nThe number of rows of matrix U.\n@param[in]\nnc          rocblas_int. nu >= 0.\nThe number of columns of matrix C.\n@param[inout]\nD           pointer to real type. Array on the GPU of dimension n.\nOn entry, the diagonal elements of B. On exit, if info = 0,\nthe singular values of B in decreasing order; if info > 0,\nthe diagonal elements of a bidiagonal matrix\northogonally equivalent to B.\n@param[inout]\nE           pointer to real type. Array on the GPU of dimension n-1.\nOn entry, the off-diagonal elements of B. On exit, if info > 0,\nthe off-diagonal elements of a bidiagonal matrix\northogonally equivalent to B (if info = 0 this matrix converges to zero).\n@param[inout]\nV           pointer to type. Array on the GPU of dimension ldv*nv.\nOn entry, the matrix V. On exit, it is overwritten with P'*V.\n(Not referenced if nv = 0).\n@param[in]\nldv         rocblas_int. ldv >= n if nv > 0, or ldv >=1 if nv = 0.\nThe leading dimension of V.\n@param[inout]\nU           pointer to type. Array on the GPU of dimension ldu*n.\nOn entry, the matrix U. On exit, it is overwritten with U*Q.\n(Not referenced if nu = 0).\n@param[in]\nldu         rocblas_int. ldu >= nu.\nThe leading dimension of U.\n@param[inout]\nC           pointer to type. Array on the GPU of dimension ldc*nc.\nOn entry, the matrix C. On exit, it is overwritten with Q'*C.\n(Not referenced if nc = 0).\n@param[in]\nldc         rocblas_int. ldc >= n if nc > 0, or ldc >=1 if nc = 0.\nThe leading dimension of C.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0, i elements of E have not converged to zero."]
     pub fn rocsolver_sbdsqr(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -2010,7 +2029,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief STERF computes the eigenvalues of a symmetric tridiagonal matrix.\n\n\\details\nThe eigenvalues of the symmetric tridiagonal matrix are computed by the\nPal-Walker-Kahan variant of the QL/QR algorithm, and returned in\nincreasing order.\n\nThe matrix is not represented explicitly, but rather as the array of\ndiagonal elements D and the array of symmetric off-diagonal elements E.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of the tridiagonal matrix.\n@param[inout]\nD           pointer to real type. Array on the GPU of dimension n.\\n\nOn entry, the diagonal elements of the tridiagonal matrix.\nOn exit, if info = 0, the eigenvalues in increasing order.\nIf info > 0, the diagonal elements of a tridiagonal matrix\nthat is similar to the original matrix (i.e. has the same\neigenvalues).\n@param[inout]\nE           pointer to real type. Array on the GPU of dimension n-1.\\n\nOn entry, the off-diagonal elements of the tridiagonal matrix.\nOn exit, if info = 0, this array converges to zero.\nIf info > 0, the off-diagonal elements of a tridiagonal matrix\nthat is similar to the original matrix (i.e. has the same\neigenvalues).\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0, STERF did not converge. i elements of E did not\nconverge to zero."]
+    #[doc = " @{\n\\brief STERF computes the eigenvalues of a symmetric tridiagonal matrix.\n\n\\details\nThe eigenvalues of the symmetric tridiagonal matrix are computed by the\nPal-Walker-Kahan variant of the QL/QR algorithm, and returned in\nincreasing order.\n\nThe matrix is not represented explicitly, but rather as the array of\ndiagonal elements D and the array of symmetric off-diagonal elements E.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of the tridiagonal matrix.\n@param[inout]\nD           pointer to real type. Array on the GPU of dimension n.\nOn entry, the diagonal elements of the tridiagonal matrix.\nOn exit, if info = 0, the eigenvalues in increasing order.\nIf info > 0, the diagonal elements of a tridiagonal matrix\nthat is similar to the original matrix (i.e. has the same\neigenvalues).\n@param[inout]\nE           pointer to real type. Array on the GPU of dimension n-1.\nOn entry, the off-diagonal elements of the tridiagonal matrix.\nOn exit, if info = 0, this array converges to zero.\nIf info > 0, the off-diagonal elements of a tridiagonal matrix\nthat is similar to the original matrix (i.e. has the same\neigenvalues).\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0, STERF did not converge. i elements of E did not\nconverge to zero."]
     pub fn rocsolver_ssterf(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -2031,7 +2050,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief STEQR computes the eigenvalues and (optionally) eigenvectors of\na symmetric tridiagonal matrix.\n\n\\details\nThe eigenvalues of the symmetric tridiagonal matrix are computed by the\nimplicit QL/QR algorithm, and returned in increasing order.\n\nThe matrix is not represented explicitly, but rather as the array of\ndiagonal elements D and the array of symmetric off-diagonal elements E.\nWhen D and E correspond to the tridiagonal form of a full symmetric/Hermitian matrix, as returned by, e.g.,\n\\ref rocsolver_ssytrd \"SYTRD\" or \\ref rocsolver_chetrd \"HETRD\", the eigenvectors of the original matrix can also\nbe computed, depending on the value of evect.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies how the eigenvectors are computed.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of the tridiagonal matrix.\n@param[inout]\nD           pointer to real type. Array on the GPU of dimension n.\\n\nOn entry, the diagonal elements of the tridiagonal matrix.\nOn exit, if info = 0, the eigenvalues in increasing order.\nIf info > 0, the diagonal elements of a tridiagonal matrix\nthat is similar to the original matrix (i.e. has the same\neigenvalues).\n@param[inout]\nE           pointer to real type. Array on the GPU of dimension n-1.\\n\nOn entry, the off-diagonal elements of the tridiagonal matrix.\nOn exit, if info = 0, this array converges to zero.\nIf info > 0, the off-diagonal elements of a tridiagonal matrix\nthat is similar to the original matrix (i.e. has the same\neigenvalues).\n@param[inout]\nC           pointer to type. Array on the GPU of dimension ldc*n.\\n\nOn entry, if evect is original, the orthogonal/unitary matrix\nused for the reduction to tridiagonal form as returned by, e.g.,\n\\ref rocsolver_sorgtr \"ORGTR\" or \\ref rocsolver_cungtr \"UNGTR\".\nOn exit, it is overwritten with the eigenvectors of the original\nsymmetric/Hermitian matrix (if evect is original), or the\neigenvectors of the tridiagonal matrix (if evect is tridiagonal).\n(Not referenced if evect is none).\n@param[in]\nldc         rocblas_int. ldc >= n if evect is original or tridiagonal.\\n\nSpecifies the leading dimension of C.\n(Not referenced if evect is none).\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0, STEQR did not converge. i elements of E did not\nconverge to zero."]
+    #[doc = " @{\n\\brief STEQR computes the eigenvalues and (optionally) eigenvectors of\na symmetric tridiagonal matrix.\n\n\\details\nThe eigenvalues of the symmetric tridiagonal matrix are computed by the\nimplicit QL/QR algorithm, and returned in increasing order.\n\nThe matrix is not represented explicitly, but rather as the array of\ndiagonal elements D and the array of symmetric off-diagonal elements E.\nWhen D and E correspond to the tridiagonal form of a full symmetric/Hermitian matrix, as returned by, e.g.,\n\\ref rocsolver_ssytrd \"SYTRD\" or \\ref rocsolver_chetrd \"HETRD\", the eigenvectors of the original matrix can also\nbe computed, depending on the value of evect.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\nSpecifies how the eigenvectors are computed.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of the tridiagonal matrix.\n@param[inout]\nD           pointer to real type. Array on the GPU of dimension n.\nOn entry, the diagonal elements of the tridiagonal matrix.\nOn exit, if info = 0, the eigenvalues in increasing order.\nIf info > 0, the diagonal elements of a tridiagonal matrix\nthat is similar to the original matrix (i.e. has the same\neigenvalues).\n@param[inout]\nE           pointer to real type. Array on the GPU of dimension n-1.\nOn entry, the off-diagonal elements of the tridiagonal matrix.\nOn exit, if info = 0, this array converges to zero.\nIf info > 0, the off-diagonal elements of a tridiagonal matrix\nthat is similar to the original matrix (i.e. has the same\neigenvalues).\n@param[inout]\nC           pointer to type. Array on the GPU of dimension ldc*n.\nOn entry, if evect is original, the orthogonal/unitary matrix\nused for the reduction to tridiagonal form as returned by, e.g.,\n\\ref rocsolver_sorgtr \"ORGTR\" or \\ref rocsolver_cungtr \"UNGTR\".\nOn exit, it is overwritten with the eigenvectors of the original\nsymmetric/Hermitian matrix (if evect is original), or the\neigenvectors of the tridiagonal matrix (if evect is tridiagonal).\n(Not referenced if evect is none).\n@param[in]\nldc         rocblas_int. ldc >= n if evect is original or tridiagonal.\nSpecifies the leading dimension of C.\n(Not referenced if evect is none).\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0, STEQR did not converge. i elements of E did not\nconverge to zero."]
     pub fn rocsolver_ssteqr(
         handle: rocblas_handle,
         evect: rocblas_evect,
@@ -2084,7 +2103,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief STEDC computes the eigenvalues and (optionally) eigenvectors of\na symmetric tridiagonal matrix.\n\n\\details\nThis function uses the divide and conquer method to compute the eigenvectors.\nThe eigenvalues are returned in increasing order.\n\nThe matrix is not represented explicitly, but rather as the array of\ndiagonal elements D and the array of symmetric off-diagonal elements E.\nWhen D and E correspond to the tridiagonal form of a full symmetric/Hermitian matrix, as returned by, e.g.,\n\\ref rocsolver_ssytrd \"SYTRD\" or \\ref rocsolver_chetrd \"HETRD\", the eigenvectors of the original matrix can also\nbe computed, depending on the value of evect.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies how the eigenvectors are computed.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of the tridiagonal matrix.\n@param[inout]\nD           pointer to real type. Array on the GPU of dimension n.\\n\nOn entry, the diagonal elements of the tridiagonal matrix.\nOn exit, if info = 0, the eigenvalues in increasing order.\n@param[inout]\nE           pointer to real type. Array on the GPU of dimension n-1.\\n\nOn entry, the off-diagonal elements of the tridiagonal matrix.\nOn exit, if info = 0, the values of this array are destroyed.\n@param[inout]\nC           pointer to type. Array on the GPU of dimension ldc*n.\\n\nOn entry, if evect is original, the orthogonal/unitary matrix\nused for the reduction to tridiagonal form as returned by, e.g.,\n\\ref rocsolver_sorgtr \"ORGTR\" or \\ref rocsolver_cungtr \"UNGTR\".\nOn exit, if info = 0, it is overwritten with the eigenvectors of the original\nsymmetric/Hermitian matrix (if evect is original), or the\neigenvectors of the tridiagonal matrix (if evect is tridiagonal).\n(Not referenced if evect is none).\n@param[in]\nldc         rocblas_int. ldc >= n if evect is original or tridiagonal.\\n\nSpecifies the leading dimension of C. (Not referenced if evect is none).\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0, STEDC failed to compute an eigenvalue on the sub-matrix formed by\nthe rows and columns info/(n+1) through mod(info,n+1)."]
+    #[doc = " @{\n\\brief STEDC computes the eigenvalues and (optionally) eigenvectors of\na symmetric tridiagonal matrix.\n\n\\details\nThis function uses the divide and conquer method to compute the eigenvectors.\nThe eigenvalues are returned in increasing order.\n\nThe matrix is not represented explicitly, but rather as the array of\ndiagonal elements D and the array of symmetric off-diagonal elements E.\nWhen D and E correspond to the tridiagonal form of a full symmetric/Hermitian matrix, as returned by, e.g.,\n\\ref rocsolver_ssytrd \"SYTRD\" or \\ref rocsolver_chetrd \"HETRD\", the eigenvectors of the original matrix can also\nbe computed, depending on the value of evect.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\nSpecifies how the eigenvectors are computed.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of the tridiagonal matrix.\n@param[inout]\nD           pointer to real type. Array on the GPU of dimension n.\nOn entry, the diagonal elements of the tridiagonal matrix.\nOn exit, if info = 0, the eigenvalues in increasing order.\n@param[inout]\nE           pointer to real type. Array on the GPU of dimension n-1.\nOn entry, the off-diagonal elements of the tridiagonal matrix.\nOn exit, if info = 0, the values of this array are destroyed.\n@param[inout]\nC           pointer to type. Array on the GPU of dimension ldc*n.\nOn entry, if evect is original, the orthogonal/unitary matrix\nused for the reduction to tridiagonal form as returned by, e.g.,\n\\ref rocsolver_sorgtr \"ORGTR\" or \\ref rocsolver_cungtr \"UNGTR\".\nOn exit, if info = 0, it is overwritten with the eigenvectors of the original\nsymmetric/Hermitian matrix (if evect is original), or the\neigenvectors of the tridiagonal matrix (if evect is tridiagonal).\n(Not referenced if evect is none).\n@param[in]\nldc         rocblas_int. ldc >= n if evect is original or tridiagonal.\nSpecifies the leading dimension of C. (Not referenced if evect is none).\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0, STEDC failed to compute an eigenvalue on the sub-matrix formed by\nthe rows and columns info/(n+1) through mod(info,n+1)."]
     pub fn rocsolver_sstedc(
         handle: rocblas_handle,
         evect: rocblas_evect,
@@ -2137,7 +2156,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief STEBZ computes a set of eigenvalues of a symmetric tridiagonal matrix T.\n\n\\details\nThis function computes all the eigenvalues of T, all the eigenvalues in the half-open interval \\f$(vl, vu]\\f$,\nor the il-th through iu-th eigenvalues, depending on the value of erange.\n\nThe eigenvalues are returned in increasing order either for the entire matrix, or grouped by independent\ndiagonal blocks (if they exist), depending on the value of eorder.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nerange      #rocblas_erange.\\n\nSpecifies the type of range or interval of the eigenvalues to be computed.\n@param[in]\neorder      #rocblas_eorder.\\n\nSpecifies whether the computed eigenvalues will be ordered by their position in the\nentire spectrum, or grouped by independent diagonal (split off) blocks.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe order of the tridiagonal matrix T.\n@param[in]\nvl          real type. vl < vu.\\n\nThe lower bound of the search interval (vl, vu]. Ignored if erange indicates to look\nfor all the eigenvalues of T or the eigenvalues within a set of indices.\n@param[in]\nvu          real type. vl < vu.\\n\nThe upper bound of the search interval (vl, vu]. Ignored if erange indicates to look\nfor all the eigenvalues of T or the eigenvalues within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\\n\nThe index of the smallest eigenvalue to be computed. Ignored if erange indicates to look\nfor all the eigenvalues of T or the eigenvalues in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise.\\n\nThe index of the largest eigenvalue to be computed. Ignored if erange indicates to look\nfor all the eigenvalues of T or the eigenvalues in a half-open interval.\n@param[in]\nabstol      real type.\\n\nThe absolute tolerance. An eigenvalue is considered to be located if it lies\nin an interval whose width is <= abstol. If abstol is negative, then machine-epsilon times\nthe 1-norm of T will be used as tolerance. If abstol=0, then the tolerance will be set\nto twice the underflow threshold; this is the tolerance that could get the most accurate results.\n@param[in]\nD           pointer to real type. Array on the GPU of dimension n.\\n\nThe diagonal elements of the tridiagonal matrix.\n@param[in]\nE           pointer to real type. Array on the GPU of dimension n-1.\\n\nThe off-diagonal elements of the tridiagonal matrix.\n@param[out]\nnev         pointer to a rocblas_int on the GPU. \\n\nThe total number of eigenvalues found.\n@param[out]\nnsplit      pointer to a rocblas_int on the GPU.\\n\nThe number of split off blocks in the matrix.\n@param[out]\nW           pointer to real type. Array on the GPU of dimension n.\\n\nThe first nev elements contain the computed eigenvalues. (The remaining elements\nmay be used as workspace for internal computations).\n@param[out]\niblock      pointer to rocblas_int. Array on the GPU of dimension n.\\n\nThe block indices corresponding to each eigenvalue. When matrix T has\nsplit off blocks (nsplit > 1), then if iblock[i] = k, the\neigenvalue W[i] belongs to the k-th diagonal block from the top.\n@param[out]\nisplit      pointer to rocblas_int. Array on the GPU of dimension n.\\n\nThe splitting indices that divide the tridiagonal matrix into\ndiagonal blocks. The k-th block stretches from the end of the (k-1)-th\nblock (or the top left corner of the tridiagonal matrix,\nin the case of the 1st block) to the isplit[k]-th row/column.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = 1, the bisection did not converge for some eigenvalues, i.e. the returned\nvalues are not as accurate as the given tolerance. The non-converged eigenvalues\nare flagged by negative entries in iblock.\n"]
+    #[doc = " @{\n\\brief STEBZ computes a set of eigenvalues of a symmetric tridiagonal matrix T.\n\n\\details\nThis function computes all the eigenvalues of T, all the eigenvalues in the half-open interval (vl, vu],\nor the il-th through iu-th eigenvalues, depending on the value of erange.\n\nThe eigenvalues are returned in increasing order either for the entire matrix, or grouped by independent\ndiagonal blocks (if they exist), depending on the value of eorder.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nerange      #rocblas_erange.\nSpecifies the type of range or interval of the eigenvalues to be computed.\n@param[in]\neorder      #rocblas_eorder.\nSpecifies whether the computed eigenvalues will be ordered by their position in the\nentire spectrum, or grouped by independent diagonal (split off) blocks.\n@param[in]\nn           rocblas_int. n >= 0.\nThe order of the tridiagonal matrix T.\n@param[in]\nvl          real type. vl < vu.\nThe lower bound of the search interval (vl, vu]. Ignored if erange indicates to look\nfor all the eigenvalues of T or the eigenvalues within a set of indices.\n@param[in]\nvu          real type. vl < vu.\nThe upper bound of the search interval (vl, vu]. Ignored if erange indicates to look\nfor all the eigenvalues of T or the eigenvalues within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\nThe index of the smallest eigenvalue to be computed. Ignored if erange indicates to look\nfor all the eigenvalues of T or the eigenvalues in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise.\nThe index of the largest eigenvalue to be computed. Ignored if erange indicates to look\nfor all the eigenvalues of T or the eigenvalues in a half-open interval.\n@param[in]\nabstol      real type.\nThe absolute tolerance. An eigenvalue is considered to be located if it lies\nin an interval whose width is <= abstol. If abstol is negative, then machine-epsilon times\nthe 1-norm of the tridiagonal form of A will be used as tolerance. If abstol=0, then the tolerance will be set\nto twice the underflow threshold; this is the tolerance that could get the most accurate results.\n@param[in]\nD           pointer to real type. Array on the GPU of dimension n.\nThe diagonal elements of the tridiagonal matrix.\n@param[in]\nE           pointer to real type. Array on the GPU of dimension n-1.\nThe off-diagonal elements of the tridiagonal matrix.\n@param[out]\nnev         pointer to a rocblas_int on the GPU.\nThe total number of eigenvalues found.\n@param[out]\nnsplit      pointer to a rocblas_int on the GPU.\nThe number of split off blocks in the matrix.\n@param[out]\nW           pointer to real type. Array on the GPU of dimension n.\nThe first nev elements contain the computed eigenvalues. (The remaining elements\ncan be used as workspace for internal computations).\n@param[out]\niblock      pointer to rocblas_int. Array on the GPU of dimension n.\nThe block indices corresponding to each eigenvalue. When matrix T has\nsplit off blocks (nsplit > 1), then if iblock[i] = k, the\neigenvalue W[i] belongs to the k-th diagonal block from the top.\n@param[out]\nisplit      pointer to rocblas_int. Array on the GPU of dimension n.\nThe splitting indices that divide the tridiagonal matrix into\ndiagonal blocks. The k-th block stretches from the end of the (k-1)-th\nblock (or the top left corner of the tridiagonal matrix,\nin the case of the 1st block) to the isplit[k]-th row/column.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = 1, the bisection did not converge for some eigenvalues, i.e. the returned\nvalues are not as accurate as the given tolerance. The non-converged eigenvalues\nare flagged by negative entries in iblock.\n"]
     pub fn rocsolver_sstebz(
         handle: rocblas_handle,
         erange: rocblas_erange,
@@ -2182,7 +2201,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief STEIN computes the eigenvectors associated with a set of\nprovided eigenvalues of a symmetric tridiagonal matrix.\n\n\\details\nThe eigenvectors of the symmetric tridiagonal matrix are computed using\ninverse iteration.\n\nThe matrix is not represented explicitly, but rather as the array of\ndiagonal elements D and the array of symmetric off-diagonal elements E.\nThe eigenvalues must be provided in the array W, as returned by \\ref rocsolver_sstebz \"STEBZ\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of the tridiagonal matrix.\n@param[in]\nD           pointer to real type. Array on the GPU of dimension n.\\n\nThe diagonal elements of the tridiagonal matrix.\n@param[in]\nE           pointer to real type. Array on the GPU of dimension n-1.\\n\nThe off-diagonal elements of the tridiagonal matrix.\n@param[in]\nnev         pointer to a rocblas_int on the GPU. 0 <= nev <= n.\\n\nThe number of provided eigenvalues, and the number of eigenvectors\nto be computed.\n@param[in]\nW           pointer to real type. Array on the GPU of dimension >= nev.\\n\nA subset of nev eigenvalues of the tridiagonal matrix, as returned\nby \\ref rocsolver_sstebz \"STEBZ\".\n@param[in]\niblock      pointer to rocblas_int. Array on the GPU of dimension n.\\n\nThe block indices corresponding to each eigenvalue, as\nreturned by \\ref rocsolver_sstebz \"STEBZ\". If iblock[i] = k,\nthen eigenvalue W[i] belongs to the k-th block from the top.\n@param[in]\nisplit      pointer to rocblas_int. Array on the GPU of dimension n.\\n\nThe splitting indices that divide the tridiagonal matrix into\ndiagonal blocks, as returned by \\ref rocsolver_sstebz \"STEBZ\".\nThe k-th block stretches from the end of the (k-1)-th\nblock (or the top left corner of the tridiagonal matrix,\nin the case of the 1st block) to the isplit[k]-th row/column.\n@param[out]\nZ           pointer to type. Array on the GPU of dimension ldz*nev.\\n\nOn exit, contains the eigenvectors of the tridiagonal matrix\ncorresponding to the provided eigenvalues, stored by columns.\n@param[in]\nldz         rocblas_int. ldz >= n.\\n\nSpecifies the leading dimension of Z.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU of dimension n.\\n\nIf info = 0, the first nev elements of ifail are zero.\nOtherwise, contains the indices of those eigenvectors that failed\nto converge.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0, i eigenvectors did not converge; their indices are stored in\nifail.\n"]
+    #[doc = " @{\n\\brief STEIN computes the eigenvectors associated with a set of\nprovided eigenvalues of a symmetric tridiagonal matrix.\n\n\\details\nThe eigenvectors of the symmetric tridiagonal matrix are computed using\ninverse iteration.\n\nThe matrix is not represented explicitly, but rather as the array of\ndiagonal elements D and the array of symmetric off-diagonal elements E.\nThe eigenvalues must be provided in the array W, as returned by \\ref rocsolver_sstebz \"STEBZ\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of the tridiagonal matrix.\n@param[in]\nD           pointer to real type. Array on the GPU of dimension n.\nThe diagonal elements of the tridiagonal matrix.\n@param[in]\nE           pointer to real type. Array on the GPU of dimension n-1.\nThe off-diagonal elements of the tridiagonal matrix.\n@param[in]\nnev         pointer to a rocblas_int on the GPU. 0 <= nev <= n.\nThe number of provided eigenvalues, and the number of eigenvectors\nto be computed.\n@param[in]\nW           pointer to real type. Array on the GPU of dimension >= nev.\nA subset of nev eigenvalues of the tridiagonal matrix, as returned\nby \\ref rocsolver_sstebz \"STEBZ\".\n@param[in]\niblock      pointer to rocblas_int. Array on the GPU of dimension n.\nThe block indices corresponding to each eigenvalue, as\nreturned by \\ref rocsolver_sstebz \"STEBZ\". If iblock[i] = k,\nthen eigenvalue W[i] belongs to the k-th block from the top.\n@param[in]\nisplit      pointer to rocblas_int. Array on the GPU of dimension n.\nThe splitting indices that divide the tridiagonal matrix into\ndiagonal blocks, as returned by \\ref rocsolver_sstebz \"STEBZ\".\nThe k-th block stretches from the end of the (k-1)-th\nblock (or the top left corner of the tridiagonal matrix,\nin the case of the 1st block) to the isplit[k]-th row/column.\n@param[out]\nZ           pointer to type. Array on the GPU of dimension ldz*nev.\nOn exit, contains the eigenvectors of the tridiagonal matrix\ncorresponding to the provided eigenvalues, stored by columns.\n@param[in]\nldz         rocblas_int. ldz >= n.\nSpecifies the leading dimension of Z.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU of dimension n.\nIf info = 0, the first nev elements of ifail are zero.\nOtherwise, contains the indices of those eigenvectors that failed\nto converge.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0, i eigenvectors did not converge; their indices are stored in\nIFAIL.\n"]
     pub fn rocsolver_sstein(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -2251,7 +2270,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief BDSVDX computes a set of singular values of a bidiagonal matrix B.\n\n\\details\nThis function computes all the singular values of B, all the singular values in the half-open interval\n\\f$[vl, vu)\\f$, or the il-th through iu-th singular values, depending on the value of srange.\n\nDepending on the value of svect, the corresponding singular vectors will be computed and stored as blocks\nin the output matrix Z. That is,\n\n\\f[\nZ = \\left[\\begin{array}{c}\nU\\\\\nV\n\\end{array}\\right]\n\\f]\n\nwhere U contains the corresponding left singular vectors of B, and V contains the corresponding right\nsingular vectors.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether B is upper or lower bidiagonal.\n@param[in]\nsvect       #rocblas_svect.\\n\nSpecifies how the singular vectors are computed. Only rocblas_svect_none and\nrocblas_svect_singular are accepted.\n@param[in]\nsrange      #rocblas_srange.\\n\nSpecifies the type of range or interval of the singular values to be computed.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe order of the bidiagonal matrix B.\n@param[in]\nD           pointer to real type. Array on the GPU of dimension n.\\n\nThe diagonal elements of the bidiagonal matrix.\n@param[in]\nE           pointer to real type. Array on the GPU of dimension n-1.\\n\nThe off-diagonal elements of the bidiagonal matrix.\n@param[in]\nvl          real type. 0 <= vl < vu.\\n\nThe lower bound of the search interval [vl, vu). Ignored if srange indicates to look\nfor all the singular values of B or the singular values within a set of indices.\n@param[in]\nvu          real type. 0 <= vl < vu.\\n\nThe upper bound of the search interval [vl, vu). Ignored if srange indicates to look\nfor all the singular values of B or the singular values within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\\n\nThe index of the largest singular value to be computed. Ignored if srange indicates to look\nfor all the singular values of B or the singular values in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise.\\n\nThe index of the smallest singular value to be computed. Ignored if srange indicates to look\nfor all the singular values of B or the singular values in a half-open interval.\n@param[out]\nnsv         pointer to a rocblas_int on the GPU. \\n\nThe total number of singular values found. If srange is rocblas_srange_all, nsv = n.\nIf srange is rocblas_srange_index, nsv = iu - il + 1. Otherwise, 0 <= nsv <= n.\n@param[out]\nS           pointer to real type. Array on the GPU of dimension n.\\n\nThe first nsv elements contain the computed singular values in descending order.\n@param[out]\nZ           pointer to real type. Array on the GPU of dimension ldz*nsv.\\n\nIf info = 0, the first nsv columns contain the computed singular vectors corresponding to the\nsingular values in S. The first n rows of Z contain the matrix U, and the next n rows contain\nthe matrix V. Not referenced if svect is rocblas_svect_none.\nNote: If srange is rocblas_srange_value, then the values of nsv are not known in advance.\nThe user should ensure that Z is large enough to hold n columns, as all n columns\ncan be used as workspace for internal computations.\n@param[in]\nldz         rocblas_int. ldz >= 2*n if svect is rocblas_svect_singular; ldz >= 1 otherwise.\\n\nSpecifies the leading dimension of Z.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU of dimension n.\\n\nIf info = 0, the first nsv elements of ifail are zero.\nOtherwise, contains the indices of those eigenvectors that failed\nto converge, as returned by \\ref rocsolver_sstein \"STEIN\".\nNot referenced if svect is rocblas_svect_none.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0, i eigenvectors did not converge in \\ref rocsolver_sstein \"STEIN\"; their\nindices are stored in ifail.\n"]
+    #[doc = " @{\n\\brief BDSVDX computes a set of singular values of a bidiagonal matrix B.\n\n\\details\nThis function computes all the singular values of B, all the singular values in the half-open interval\n\\f$[vl, vu)\\f$, or the il-th through iu-th singular values, depending on the value of srange.\n\nDepending on the value of svect, the corresponding singular vectors will be computed and stored as blocks\nin the output matrix Z. That is,\n\n\\f[\nZ = \\left[\\begin{array}{c}\nU\\\\\nV\n\\end{array}\\right]\n\\f]\n\nwhere U contains the corresponding left singular vectors of B, and V contains the corresponding right\nsingular vectors.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether B is upper or lower bidiagonal.\n@param[in]\nsvect       #rocblas_svect.\nSpecifies how the singular vectors are computed. Only rocblas_svect_none and\nrocblas_svect_singular are accepted.\n@param[in]\nsrange      #rocblas_srange.\nSpecifies the type of range or interval of the singular values to be computed.\n@param[in]\nn           rocblas_int. n >= 0.\nThe order of the bidiagonal matrix B.\n@param[in]\nD           pointer to real type. Array on the GPU of dimension n.\nThe diagonal elements of the bidiagonal matrix.\n@param[in]\nE           pointer to real type. Array on the GPU of dimension n-1.\nThe off-diagonal elements of the bidiagonal matrix.\n@param[in]\nvl          real type. 0 <= vl < vu.\nThe lower bound of the search interval [vl, vu). Ignored if srange indicates to look\nfor all the singular values of B or the singular values within a set of indices.\n@param[in]\nvu          real type. 0 <= vl < vu.\nThe upper bound of the search interval [vl, vu). Ignored if srange indicates to look\nfor all the singular values of B or the singular values within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\nThe index of the largest singular value to be computed. Ignored if srange indicates to look\nfor all the singular values of B or the singular values in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise.\nThe index of the smallest singular value to be computed. Ignored if srange indicates to look\nfor all the singular values of B or the singular values in a half-open interval.\n@param[out]\nnsv         pointer to a rocblas_int on the GPU.\nThe total number of singular values found. If srange is rocblas_srange_all, nsv = n.\nIf srange is rocblas_srange_index, nsv = iu - il + 1. Otherwise, 0 <= nsv <= n.\n@param[out]\nS           pointer to real type. Array on the GPU of dimension nsv.\nThe first nsv elements contain the computed singular values in descending order.\nNote: If srange is rocblas_srange_value, then the value of nsv is not known in advance.\nIn this case, the user should ensure that S is large enough to hold n values.\n@param[out]\nZ           pointer to real type. Array on the GPU of dimension ldz*nsv.\nIf info = 0, the first nsv columns contain the computed singular vectors corresponding to the\nsingular values in S. The first n rows of Z contain the matrix U, and the next n rows contain\nthe matrix V. Not referenced if svect is rocblas_svect_none.\nNote: If srange is rocblas_srange_value, then the value of nsv is not known in advance.\nIn this case, the user should ensure that Z is large enough to hold n columns.\n@param[in]\nldz         rocblas_int. ldz >= 2*n if svect is rocblas_svect_singular; ldz >= 1 otherwise.\nSpecifies the leading dimension of Z.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU of dimension n.\nIf info = 0, the first nsv elements of ifail are zero.\nOtherwise, contains the indices of those eigenvectors that failed\nto converge, as returned by \\ref rocsolver_sstein \"STEIN\".\nNot referenced if svect is rocblas_svect_none.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0, i eigenvectors did not converge in \\ref rocsolver_sstein \"STEIN\"; their\nindices are stored in ifail.\n"]
     pub fn rocsolver_sbdsvdx(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -2296,7 +2315,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GETF2_NPVT computes the LU factorization of a general m-by-n matrix A\nwithout partial pivoting.\n\n\\details\n(This is the unblocked Level-2-BLAS version of the algorithm. An optimized internal implementation without rocBLAS calls\ncould be executed with small and mid-size matrices if optimizations are enabled (default option). For more details, see the\n\"Tuning rocSOLVER performance\" section of the Library Design Guide).\n\nThe factorization has the form\n\n\\f[\nA = LU\n\\f]\n\nwhere L is lower triangular with unit\ndiagonal elements (lower trapezoidal if m > n), and U is upper\ntriangular (upper trapezoidal if m < n).\n\nNote: Although this routine can offer better performance, Gaussian elimination without pivoting is not backward stable.\nIf numerical accuracy is compromised, use the legacy-LAPACK-like API \\ref rocsolver_sgetf2 \"GETF2\" routines instead.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrix A to be factored.\nOn exit, the factors L and U from the factorization.\nThe unit diagonal elements of L are not stored.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0, U is singular. U[i,i] is the first zero element in the diagonal. The factorization from\nthis point might be incomplete."]
+    #[doc = " @{\n\\brief GETF2_NPVT computes the LU factorization of a general m-by-n matrix A\nwithout partial pivoting.\n\n\\details\n(This is the unblocked Level-2-BLAS version of the algorithm. An optimized internal implementation without rocBLAS calls\ncould be executed with small and mid-size matrices if optimizations are enabled (default option). For more details, see the\n\"Tuning rocSOLVER performance\" section of the Library Design Guide).\n\nThe factorization has the form\n\n\\f[\nA = LU\n\\f]\n\nwhere L is lower triangular with unit\ndiagonal elements (lower trapezoidal if m > n), and U is upper\ntriangular (upper trapezoidal if m < n).\n\n\\note\nAlthough this routine can offer better performance, Gaussian elimination without pivoting is not backward stable.\nIf numerical accuracy is compromised, use the legacy-LAPACK API \\ref rocsolver_sgetf2 \"GETF2\" routines instead.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrix A to be factored.\nOn exit, the factors L and U from the factorization.\nThe unit diagonal elements of L are not stored.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0, U is singular. U[i,i] is the first zero element in the diagonal. The factorization from\nthis point might be incomplete."]
     pub fn rocsolver_sgetf2_npvt(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -2341,7 +2360,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GETF2_NPVT_BATCHED computes the LU factorization of a batch of\ngeneral m-by-n matrices without partial pivoting.\n\n\\details\n(This is the unblocked Level-2-BLAS version of the algorithm. An optimized internal implementation without rocBLAS calls\ncould be executed with small and mid-size matrices if optimizations are enabled (default option). For more details, see the\n\"Tuning rocSOLVER performance\" section of the Library Design Guide).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form\n\n\\f[\nA_j = L_jU_j\n\\f]\n\nwhere \\f$L_j\\f$ is lower triangular with unit\ndiagonal elements (lower trapezoidal if m > n), and \\f$U_j\\f$ is upper\ntriangular (upper trapezoidal if m < n).\n\nNote: Although this routine can offer better performance, Gaussian elimination without pivoting is not backward stable.\nIf numerical accuracy is compromised, use the legacy-LAPACK-like API \\ref rocsolver_sgetf2_batched \"GETF2_BATCHED\" routines instead.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all matrices A_j in the batch.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the factors L_j and U_j from the factorizations.\nThe unit diagonal elements of L_j are not stored.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for factorization of A_j.\nIf info[j] = i > 0, U_j is singular. U_j[i,i] is the first zero element in the diagonal. The factorization from\nthis point might be incomplete.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GETF2_NPVT_BATCHED computes the LU factorization of a batch of\ngeneral m-by-n matrices without partial pivoting.\n\n\\details\n(This is the unblocked Level-2-BLAS version of the algorithm. An optimized internal implementation without rocBLAS calls\ncould be executed with small and mid-size matrices if optimizations are enabled (default option). For more details, see the\n\"Tuning rocSOLVER performance\" section of the Library Design Guide).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form\n\n\\f[\nA_l = L_lU_l\n\\f]\n\nwhere \\f$L_l\\f$ is lower triangular with unit\ndiagonal elements (lower trapezoidal if m > n), and \\f$U_l\\f$ is upper\ntriangular (upper trapezoidal if m < n).\n\n\\note\nAlthough this routine can offer better performance, Gaussian elimination without pivoting is not backward stable.\nIf numerical accuracy is compromised, use the legacy-LAPACK API \\ref rocsolver_sgetf2_batched \"GETF2_BATCHED\" routines instead.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all matrices A_l in the batch.\n@param[in,out]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the factors L_l and U_l from the factorizations.\nThe unit diagonal elements of L_l are not stored.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for factorization of A_l.\nIf info[l] = i > 0, U_l is singular. U_l[i,i] is the first zero element in the diagonal. The factorization from\nthis point might be incomplete.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgetf2_npvt_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -2390,7 +2409,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GETF2_NPVT_STRIDED_BATCHED computes the LU factorization of a batch\nof general m-by-n matrices without partial pivoting.\n\n\\details\n(This is the unblocked Level-2-BLAS version of the algorithm. An optimized internal implementation without rocBLAS calls\ncould be executed with small and mid-size matrices if optimizations are enabled (default option). For more details, see the\n\"Tuning rocSOLVER performance\" section of the Library Design Guide).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form\n\n\\f[\nA_j = L_jU_j\n\\f]\n\nwhere \\f$L_j\\f$ is lower triangular with unit\ndiagonal elements (lower trapezoidal if m > n), and \\f$U_j\\f$ is upper\ntriangular (upper trapezoidal if m < n).\n\nNote: Although this routine can offer better performance, Gaussian elimination without pivoting is not backward stable.\nIf numerical accuracy is compromised, use the legacy-LAPACK-like API \\ref rocsolver_sgetf2_strided_batched \"GETF2_STRIDED_BATCHED\" routines instead.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all matrices A_j in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the factors L_j and U_j from the factorization.\nThe unit diagonal elements of L_j are not stored.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for factorization of A_j.\nIf info[j] = i > 0, U_j is singular. U_j[i,i] is the first zero element in the diagonal. The factorization from\nthis point might be incomplete.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GETF2_NPVT_STRIDED_BATCHED computes the LU factorization of a batch\nof general m-by-n matrices without partial pivoting.\n\n\\details\n(This is the unblocked Level-2-BLAS version of the algorithm. An optimized internal implementation without rocBLAS calls\ncould be executed with small and mid-size matrices if optimizations are enabled (default option). For more details, see the\n\"Tuning rocSOLVER performance\" section of the Library Design Guide).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form\n\n\\f[\nA_l = L_lU_l\n\\f]\n\nwhere \\f$L_l\\f$ is lower triangular with unit\ndiagonal elements (lower trapezoidal if m > n), and \\f$U_l\\f$ is upper\ntriangular (upper trapezoidal if m < n).\n\n\\note\nAlthough this routine can offer better performance, Gaussian elimination without pivoting is not backward stable.\nIf numerical accuracy is compromised, use the legacy-LAPACK-like API \\ref rocsolver_sgetf2_strided_batched \"GETF2_STRIDED_BATCHED\" routines instead.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all matrices A_l in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the factors L_l and U_l from the factorization.\nThe unit diagonal elements of L_l are not stored.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for factorization of A_l.\nIf info[l] = i > 0, U_l is singular. U_l[i,i] is the first zero element in the diagonal. The factorization from\nthis point might be incomplete.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgetf2_npvt_strided_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -2443,7 +2462,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GETRF_NPVT computes the LU factorization of a general m-by-n matrix A\nwithout partial pivoting.\n\n\\details\n(This is the blocked Level-3-BLAS version of the algorithm. An optimized internal implementation without rocBLAS calls\ncould be executed with mid-size matrices if optimizations are enabled (default option). For more details, see the\n\"Tuning rocSOLVER performance\" section of the Library Design Guide).\n\nThe factorization has the form\n\n\\f[\nA = LU\n\\f]\n\nwhere L is lower triangular with unit\ndiagonal elements (lower trapezoidal if m > n), and U is upper\ntriangular (upper trapezoidal if m < n).\n\nNote: Although this routine can offer better performance, Gaussian elimination without pivoting is not backward stable.\nIf numerical accuracy is compromised, use the legacy-LAPACK-like API \\ref rocsolver_sgetrf \"GETRF\" routines instead.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrix A to be factored.\nOn exit, the factors L and U from the factorization.\nThe unit diagonal elements of L are not stored.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0, U is singular. U[i,i] is the first zero element in the diagonal. The factorization from\nthis point might be incomplete."]
+    #[doc = " @{\n\\brief GETRF_NPVT computes the LU factorization of a general m-by-n matrix A\nwithout partial pivoting.\n\n\\details\n(This is the blocked Level-3-BLAS version of the algorithm. An optimized internal implementation without rocBLAS calls\ncould be executed with mid-size matrices if optimizations are enabled (default option). For more details, see the\n\"Tuning rocSOLVER performance\" section of the Library Design Guide).\n\nThe factorization has the form\n\n\\f[\nA = LU\n\\f]\n\nwhere L is lower triangular with unit\ndiagonal elements (lower trapezoidal if m > n), and U is upper\ntriangular (upper trapezoidal if m < n).\n\nNote: Although this routine can offer better performance, Gaussian elimination without pivoting is not backward stable.\nIf numerical accuracy is compromised, use the legacy-LAPACK-like API \\ref rocsolver_sgetrf \"GETRF\" routines instead.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrix A to be factored.\nOn exit, the factors L and U from the factorization.\nThe unit diagonal elements of L are not stored.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0, U is singular. U[i,i] is the first zero element in the diagonal. The factorization from\nthis point might be incomplete."]
     pub fn rocsolver_sgetrf_npvt(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -2488,7 +2507,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GETRF_NPVT_BATCHED computes the LU factorization of a batch of\ngeneral m-by-n matrices without partial pivoting.\n\n\\details\n(This is the blocked Level-3-BLAS version of the algorithm. An optimized internal implementation without rocBLAS calls\ncould be executed with mid-size matrices if optimizations are enabled (default option). For more details, see the\n\"Tuning rocSOLVER performance\" section of the Library Design Guide).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form\n\n\\f[\nA_j = L_jU_j\n\\f]\n\nwhere \\f$L_j\\f$ is lower triangular with unit\ndiagonal elements (lower trapezoidal if m > n), and \\f$U_j\\f$ is upper\ntriangular (upper trapezoidal if m < n).\n\nNote: Although this routine can offer better performance, Gaussian elimination without pivoting is not backward stable.\nIf numerical accuracy is compromised, use the legacy-LAPACK-like API \\ref rocsolver_sgetrf_batched \"GETRF_BATCHED\" routines instead.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all matrices A_j in the batch.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the factors L_j and U_j from the factorizations.\nThe unit diagonal elements of L_j are not stored.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for factorization of A_j.\nIf info[j] = i > 0, U_j is singular. U_j[i,i] is the first zero element in the diagonal. The factorization from\nthis point might be incomplete.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch.\n"]
+    #[doc = " @{\n\\brief GETRF_NPVT_BATCHED computes the LU factorization of a batch of\ngeneral m-by-n matrices without partial pivoting.\n\n\\details\n(This is the blocked Level-3-BLAS version of the algorithm. An optimized internal implementation without rocBLAS calls\ncould be executed with mid-size matrices if optimizations are enabled (default option). For more details, see the\n\"Tuning rocSOLVER performance\" section of the Library Design Guide).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form\n\n\\f[\nA_l = L_lU_l\n\\f]\n\nwhere \\f$L_l\\f$ is lower triangular with unit\ndiagonal elements (lower trapezoidal if m > n), and \\f$U_l\\f$ is upper\ntriangular (upper trapezoidal if m < n).\n\nNote: Although this routine can offer better performance, Gaussian elimination without pivoting is not backward stable.\nIf numerical accuracy is compromised, use the legacy-LAPACK-like API \\ref rocsolver_sgetrf_batched \"GETRF_BATCHED\" routines instead.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all matrices A_l in the batch.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the factors L_l and U_l from the factorizations.\nThe unit diagonal elements of L_l are not stored.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for factorization of A_l.\nIf info[l] = i > 0, U_l is singular. U_l[i,i] is the first zero element in the diagonal. The factorization from\nthis point might be incomplete.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch.\n"]
     pub fn rocsolver_sgetrf_npvt_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -2537,7 +2556,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GETRF_NPVT_STRIDED_BATCHED computes the LU factorization of a batch\nof general m-by-n matrices without partial pivoting.\n\n\\details\n(This is the blocked Level-3-BLAS version of the algorithm. An optimized internal implementation without rocBLAS calls\ncould be executed with mid-size matrices if optimizations are enabled (default option). For more details, see the\n\"Tuning rocSOLVER performance\" section of the Library Design Guide).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form\n\n\\f[\nA_j = L_jU_j\n\\f]\n\nwhere \\f$L_j\\f$ is lower triangular with unit\ndiagonal elements (lower trapezoidal if m > n), and \\f$U_j\\f$ is upper\ntriangular (upper trapezoidal if m < n).\n\nNote: Although this routine can offer better performance, Gaussian elimination without pivoting is not backward stable.\nIf numerical accuracy is compromised, use the legacy-LAPACK-like API \\ref rocsolver_sgetrf_strided_batched \"GETRF_STRIDED_BATCHED\" routines instead.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all matrices A_j in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the factors L_j and U_j from the factorization.\nThe unit diagonal elements of L_j are not stored.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for factorization of A_j.\nIf info[j] = i > 0, U_j is singular. U_j[i,i] is the first zero element in the diagonal. The factorization from\nthis point might be incomplete.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch.\n"]
+    #[doc = " @{\n\\brief GETRF_NPVT_STRIDED_BATCHED computes the LU factorization of a batch\nof general m-by-n matrices without partial pivoting.\n\n\\details\n(This is the blocked Level-3-BLAS version of the algorithm. An optimized internal implementation without rocBLAS calls\ncould be executed with mid-size matrices if optimizations are enabled (default option). For more details, see the\n\"Tuning rocSOLVER performance\" section of the Library Design Guide).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form\n\n\\f[\nA_l = L_lU_l\n\\f]\n\nwhere \\f$L_l\\f$ is lower triangular with unit\ndiagonal elements (lower trapezoidal if m > n), and \\f$U_l\\f$ is upper\ntriangular (upper trapezoidal if m < n).\n\nNote: Although this routine can offer better performance, Gaussian elimination without pivoting is not backward stable.\nIf numerical accuracy is compromised, use the legacy-LAPACK-like API \\ref rocsolver_sgetrf_strided_batched \"GETRF_STRIDED_BATCHED\" routines instead.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all matrices A_l in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the factors L_l and U_l from the factorization.\nThe unit diagonal elements of L_l are not stored.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for factorization of A_l.\nIf info[l] = i > 0, U_l is singular. U_l[i,i] is the first zero element in the diagonal. The factorization from\nthis point might be incomplete.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch.\n"]
     pub fn rocsolver_sgetrf_npvt_strided_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -2590,7 +2609,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GETF2 computes the LU factorization of a general m-by-n matrix A\nusing partial pivoting with row interchanges.\n\n\\details\n(This is the unblocked Level-2-BLAS version of the algorithm. An optimized internal implementation without rocBLAS calls\ncould be executed with small and mid-size matrices if optimizations are enabled (default option). For more details, see the\n\"Tuning rocSOLVER performance\" section of the Library Design Guide).\n\nThe factorization has the form\n\n\\f[\nA = PLU\n\\f]\n\nwhere P is a permutation matrix, L is lower triangular with unit\ndiagonal elements (lower trapezoidal if m > n), and U is upper\ntriangular (upper trapezoidal if m < n).\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrix A to be factored.\nOn exit, the factors L and U from the factorization.\nThe unit diagonal elements of L are not stored.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU of dimension min(m,n).\\n\nThe vector of pivot indices. Elements of ipiv are 1-based indices.\nFor 1 <= i <= min(m,n), the row i of the\nmatrix was interchanged with row ipiv[i].\nMatrix P of the factorization can be derived from ipiv.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0, U is singular. U[i,i] is the first zero pivot."]
+    #[doc = " @{\n\\brief GETF2 computes the LU factorization of a general m-by-n matrix A\nusing partial pivoting with row interchanges.\n\n\\details\n(This is the unblocked Level-2-BLAS version of the algorithm. An optimized internal implementation without rocBLAS calls\ncould be executed with small and mid-size matrices if optimizations are enabled (default option). For more details, see the\n\"Tuning rocSOLVER performance\" section of the Library Design Guide).\n\nThe factorization has the form\n\n\\f[\nA = PLU\n\\f]\n\nwhere P is a permutation matrix, L is lower triangular with unit\ndiagonal elements (lower trapezoidal if m > n), and U is upper\ntriangular (upper trapezoidal if m < n).\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrix A to be factored.\nOn exit, the factors L and U from the factorization.\nThe unit diagonal elements of L are not stored.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU of dimension min(m,n).\nThe vector of pivot indices. Elements of ipiv are 1-based indices.\nFor 1 <= i <= min(m,n), the row i of the\nmatrix was interchanged with row ipiv[i].\nMatrix P of the factorization can be derived from ipiv.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0, U is singular. U[i,i] is the first zero pivot."]
     pub fn rocsolver_sgetf2(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -2639,7 +2658,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GETF2_BATCHED computes the LU factorization of a batch of general\nm-by-n matrices using partial pivoting with row interchanges.\n\n\\details\n(This is the unblocked Level-2-BLAS version of the algorithm. An optimized internal implementation without rocBLAS calls\ncould be executed with small and mid-size matrices if optimizations are enabled (default option). For more details, see the\n\"Tuning rocSOLVER performance\" section of the Library Design Guide).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form\n\n\\f[\nA_j = P_jL_jU_j\n\\f]\n\nwhere \\f$P_j\\f$ is a permutation matrix, \\f$L_j\\f$ is lower triangular with unit\ndiagonal elements (lower trapezoidal if m > n), and \\f$U_j\\f$ is upper\ntriangular (upper trapezoidal if m < n).\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all matrices A_j in the batch.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the factors L_j and U_j from the factorizations.\nThe unit diagonal elements of L_j are not stored.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors of pivot indices ipiv_j (corresponding to A_j).\nDimension of ipiv_j is min(m,n).\nElements of ipiv_j are 1-based indices.\nFor each instance A_j in the batch and for 1 <= i <= min(m,n), the row i of the\nmatrix A_j was interchanged with row ipiv_j[i].\nMatrix P_j of the factorization can be derived from ipiv_j.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= min(m,n).\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for factorization of A_j.\nIf info[j] = i > 0, U_j is singular. U_j[i,i] is the first zero pivot.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GETF2_BATCHED computes the LU factorization of a batch of general\nm-by-n matrices using partial pivoting with row interchanges.\n\n\\details\n(This is the unblocked Level-2-BLAS version of the algorithm. An optimized internal implementation without rocBLAS calls\ncould be executed with small and mid-size matrices if optimizations are enabled (default option). For more details, see the\n\"Tuning rocSOLVER performance\" section of the Library Design Guide).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form\n\n\\f[\nA_l = P_lL_lU_l\n\\f]\n\nwhere \\f$P_l\\f$ is a permutation matrix, \\f$L_l\\f$ is lower triangular with unit\ndiagonal elements (lower trapezoidal if m > n), and \\f$U_l\\f$ is upper\ntriangular (upper trapezoidal if m < n).\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all matrices A_l in the batch.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the factors L_l and U_l from the factorizations.\nThe unit diagonal elements of L_l are not stored.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU (the size depends on the value of strideP).\nContains the vectors of pivot indices ipiv_l (corresponding to A_l).\nDimension of ipiv_l is min(m,n).\nElements of ipiv_l are 1-based indices.\nFor each instance A_l in the batch and for 1 <= i <= min(m,n), the row i of the\nmatrix A_l was interchanged with row ipiv_l[i].\nMatrix P_l of the factorization can be derived from ipiv_l.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= min(m,n).\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for factorization of A_l.\nIf info[l] = i > 0, U_l is singular. U_l[i,i] is the first zero pivot.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgetf2_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -2696,7 +2715,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GETF2_STRIDED_BATCHED computes the LU factorization of a batch of\ngeneral m-by-n matrices using partial pivoting with row interchanges.\n\n\\details\n(This is the unblocked Level-2-BLAS version of the algorithm. An optimized internal implementation without rocBLAS calls\ncould be executed with small and mid-size matrices if optimizations are enabled (default option). For more details, see the\n\"Tuning rocSOLVER performance\" section of the Library Design Guide).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form\n\n\\f[\nA_j = P_jL_jU_j\n\\f]\n\nwhere \\f$P_j\\f$ is a permutation matrix, \\f$L_j\\f$ is lower triangular with unit\ndiagonal elements (lower trapezoidal if m > n), and \\f$U_j\\f$ is upper\ntriangular (upper trapezoidal if m < n).\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all matrices A_j in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the factors L_j and U_j from the factorization.\nThe unit diagonal elements of L_j are not stored.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors of pivots indices ipiv_j (corresponding to A_j).\nDimension of ipiv_j is min(m,n).\nElements of ipiv_j are 1-based indices.\nFor each instance A_j in the batch and for 1 <= i <= min(m,n), the row i of the\nmatrix A_j was interchanged with row ipiv_j[i].\nMatrix P_j of the factorization can be derived from ipiv_j.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= min(m,n).\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for factorization of A_j.\nIf info[j] = i > 0, U_j is singular. U_j[i,i] is the first zero pivot.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GETF2_STRIDED_BATCHED computes the LU factorization of a batch of\ngeneral m-by-n matrices using partial pivoting with row interchanges.\n\n\\details\n(This is the unblocked Level-2-BLAS version of the algorithm. An optimized internal implementation without rocBLAS calls\ncould be executed with small and mid-size matrices if optimizations are enabled (default option). For more details, see the\n\"Tuning rocSOLVER performance\" section of the Library Design Guide).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form\n\n\\f[\nA_l = P_lL_lU_l\n\\f]\n\nwhere \\f$P_l\\f$ is a permutation matrix, \\f$L_l\\f$ is lower triangular with unit\ndiagonal elements (lower trapezoidal if m > n), and \\f$U_l\\f$ is upper\ntriangular (upper trapezoidal if m < n).\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all matrices A_l in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the factors L_l and U_l from the factorization.\nThe unit diagonal elements of L_l are not stored.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU (the size depends on the value of strideP).\nContains the vectors of pivots indices ipiv_l (corresponding to A_l).\nDimension of ipiv_l is min(m,n).\nElements of ipiv_l are 1-based indices.\nFor each instance A_l in the batch and for 1 <= i <= min(m,n), the row i of the\nmatrix A_l was interchanged with row ipiv_l[i].\nMatrix P_l of the factorization can be derived from ipiv_l.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= min(m,n).\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for factorization of A_l.\nIf info[l] = i > 0, U_l is singular. U_l[i,i] is the first zero pivot.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgetf2_strided_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -2757,7 +2776,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GETRF computes the LU factorization of a general m-by-n matrix A\nusing partial pivoting with row interchanges.\n\n\\details\n(This is the blocked Level-3-BLAS version of the algorithm. An optimized internal implementation without rocBLAS calls\ncould be executed with mid-size matrices if optimizations are enabled (default option). For more details, see the\n\"Tuning rocSOLVER performance\" section of the Library Design Guide).\n\nThe factorization has the form\n\n\\f[\nA = PLU\n\\f]\n\nwhere P is a permutation matrix, L is lower triangular with unit\ndiagonal elements (lower trapezoidal if m > n), and U is upper\ntriangular (upper trapezoidal if m < n).\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrix A to be factored.\nOn exit, the factors L and U from the factorization.\nThe unit diagonal elements of L are not stored.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU of dimension min(m,n).\\n\nThe vector of pivot indices. Elements of ipiv are 1-based indices.\nFor 1 <= i <= min(m,n), the row i of the\nmatrix was interchanged with row ipiv[i].\nMatrix P of the factorization can be derived from ipiv.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0, U is singular. U[i,i] is the first zero pivot."]
+    #[doc = " @{\n\\brief GETRF computes the LU factorization of a general m-by-n matrix A\nusing partial pivoting with row interchanges.\n\n\\details\n(This is the blocked Level-3-BLAS version of the algorithm. An optimized internal implementation without rocBLAS calls\ncould be executed with mid-size matrices if optimizations are enabled (default option). For more details, see the\n\"Tuning rocSOLVER performance\" section of the Library Design Guide).\n\nThe factorization has the form\n\n\\f[\nA = PLU\n\\f]\n\nwhere P is a permutation matrix, L is lower triangular with unit\ndiagonal elements (lower trapezoidal if m > n), and U is upper\ntriangular (upper trapezoidal if m < n).\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrix A to be factored.\nOn exit, the factors L and U from the factorization.\nThe unit diagonal elements of L are not stored.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU of dimension min(m,n).\nThe vector of pivot indices. Elements of ipiv are 1-based indices.\nFor 1 <= i <= min(m,n), the row i of the\nmatrix was interchanged with row ipiv[i].\nMatrix P of the factorization can be derived from ipiv.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0, U is singular. U[i,i] is the first zero pivot."]
     pub fn rocsolver_sgetrf(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -2806,7 +2825,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GETRF_BATCHED computes the LU factorization of a batch of general\nm-by-n matrices using partial pivoting with row interchanges.\n\n\\details\n(This is the blocked Level-3-BLAS version of the algorithm. An optimized internal implementation without rocBLAS calls\ncould be executed with mid-size matrices if optimizations are enabled (default option). For more details, see the\n\"Tuning rocSOLVER performance\" section of the Library Design Guide).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form\n\n\\f[\nA_j = P_jL_jU_j\n\\f]\n\nwhere \\f$P_j\\f$ is a permutation matrix, \\f$L_j\\f$ is lower triangular with unit\ndiagonal elements (lower trapezoidal if m > n), and \\f$U_j\\f$ is upper\ntriangular (upper trapezoidal if m < n).\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all matrices A_j in the batch.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the factors L_j and U_j from the factorizations.\nThe unit diagonal elements of L_j are not stored.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors of pivot indices ipiv_j (corresponding to A_j).\nDimension of ipiv_j is min(m,n).\nElements of ipiv_j are 1-based indices.\nFor each instance A_j in the batch and for 1 <= i <= min(m,n), the row i of the\nmatrix A_j was interchanged with row ipiv_j[i].\nMatrix P_j of the factorization can be derived from ipiv_j.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= min(m,n).\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for factorization of A_j.\nIf info[j] = i > 0, U_j is singular. U_j[i,i] is the first zero pivot.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GETRF_BATCHED computes the LU factorization of a batch of general\nm-by-n matrices using partial pivoting with row interchanges.\n\n\\details\n(This is the blocked Level-3-BLAS version of the algorithm. An optimized internal implementation without rocBLAS calls\ncould be executed with mid-size matrices if optimizations are enabled (default option). For more details, see the\n\"Tuning rocSOLVER performance\" section of the Library Design Guide).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form\n\n\\f[\nA_l = P_lL_lU_l\n\\f]\n\nwhere \\f$P_l\\f$ is a permutation matrix, \\f$L_l\\f$ is lower triangular with unit\ndiagonal elements (lower trapezoidal if m > n), and \\f$U_l\\f$ is upper\ntriangular (upper trapezoidal if m < n).\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all matrices A_l in the batch.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the factors L_l and U_l from the factorizations.\nThe unit diagonal elements of L_l are not stored.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU (the size depends on the value of strideP).\nContains the vectors of pivot indices ipiv_l (corresponding to A_l).\nDimension of ipiv_l is min(m,n).\nElements of ipiv_l are 1-based indices.\nFor each instance A_l in the batch and for 1 <= i <= min(m,n), the row i of the\nmatrix A_l was interchanged with row ipiv_l[i].\nMatrix P_l of the factorization can be derived from ipiv_l.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= min(m,n).\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for factorization of A_l.\nIf info[l] = i > 0, U_l is singular. U_l[i,i] is the first zero pivot.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgetrf_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -2863,7 +2882,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GETRF_STRIDED_BATCHED computes the LU factorization of a batch of\ngeneral m-by-n matrices using partial pivoting with row interchanges.\n\n\\details\n(This is the blocked Level-3-BLAS version of the algorithm. An optimized internal implementation without rocBLAS calls\ncould be executed with mid-size matrices if optimizations are enabled (default option). For more details, see the\n\"Tuning rocSOLVER performance\" section of the Library Design Guide).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form\n\n\\f[\nA_j = P_jL_jU_j\n\\f]\n\nwhere \\f$P_j\\f$ is a permutation matrix, \\f$L_j\\f$ is lower triangular with unit\ndiagonal elements (lower trapezoidal if m > n), and \\f$U_j\\f$ is upper\ntriangular (upper trapezoidal if m < n).\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all matrices A_j in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the factors L_j and U_j from the factorization.\nThe unit diagonal elements of L_j are not stored.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors of pivots indices ipiv_j (corresponding to A_j).\nDimension of ipiv_j is min(m,n).\nElements of ipiv_j are 1-based indices.\nFor each instance A_j in the batch and for 1 <= i <= min(m,n), the row i of the\nmatrix A_j was interchanged with row ipiv_j[i].\nMatrix P_j of the factorization can be derived from ipiv_j.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= min(m,n).\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for factorization of A_j.\nIf info[j] = i > 0, U_j is singular. U_j[i,i] is the first zero pivot.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GETRF_STRIDED_BATCHED computes the LU factorization of a batch of\ngeneral m-by-n matrices using partial pivoting with row interchanges.\n\n\\details\n(This is the blocked Level-3-BLAS version of the algorithm. An optimized internal implementation without rocBLAS calls\ncould be executed with mid-size matrices if optimizations are enabled (default option). For more details, see the\n\"Tuning rocSOLVER performance\" section of the Library Design Guide).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form\n\n\\f[\nA_l = P_lL_lU_l\n\\f]\n\nwhere \\f$P_l\\f$ is a permutation matrix, \\f$L_l\\f$ is lower triangular with unit\ndiagonal elements (lower trapezoidal if m > n), and \\f$U_l\\f$ is upper\ntriangular (upper trapezoidal if m < n).\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all matrices A_l in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the factors L_l and U_l from the factorization.\nThe unit diagonal elements of L_l are not stored.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU (the size depends on the value of strideP).\nContains the vectors of pivots indices ipiv_l (corresponding to A_l).\nDimension of ipiv_l is min(m,n).\nElements of ipiv_l are 1-based indices.\nFor each instance A_l in the batch and for 1 <= i <= min(m,n), the row i of the\nmatrix A_l was interchanged with row ipiv_l[i].\nMatrix P_l of the factorization can be derived from ipiv_l.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= min(m,n).\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for factorization of A_l.\nIf info[l] = i > 0, U_l is singular. U_l[i,i] is the first zero pivot.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgetrf_strided_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -2924,7 +2943,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GEQR2 computes a QR factorization of a general m-by-n matrix A.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\nA = Q\\left[\\begin{array}{c}\nR\\\\\n0\n\\end{array}\\right]\n\\f]\n\nwhere R is upper triangular (upper trapezoidal if m < n), and Q is\na m-by-m orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ = H_1H_2\\cdots H_k, \\quad \\text{with} \\: k = \\text{min}(m,n)\n\\f]\n\nEach Householder matrix \\f$H_i\\f$ is given by\n\n\\f[\nH_i = I - \\text{ipiv}[i] \\cdot v_i v_i'\n\\f]\n\nwhere the first i-1 elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrix to be factored.\nOn exit, the elements on and above the diagonal contain the\nfactor R; the elements below the diagonal are the last m - i elements\nof Householder vector v_i.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[out]\nipiv        pointer to type. Array on the GPU of dimension min(m,n).\\n\nThe Householder scalars."]
+    #[doc = " @{\n\\brief GEQR2 computes a QR factorization of a general m-by-n matrix A.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\nA = Q\\left[\\begin{array}{c}\nR\\\\\n0\n\\end{array}\\right]\n\\f]\n\nwhere R is upper triangular (upper trapezoidal if m < n), and Q is\na m-by-m orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ = H(1)H(2)\\cdots H(k), \\quad \\text{with} \\: k = \\text{min}(m,n)\n\\f]\n\nEach Householder matrix \\f$H(i)\\f$ is given by\n\n\\f[\nH(i) = I - \\text{ipiv}[i] \\cdot v_i^{} v_i'\n\\f]\n\nwhere the first i-1 elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrix to be factored.\nOn exit, the elements on and above the diagonal contain the\nfactor R; the elements below the diagonal are the last m - i elements\nof Householder vector v_i.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[out]\nipiv        pointer to type. Array on the GPU of dimension min(m,n).\nThe Householder scalars."]
     pub fn rocsolver_sgeqr2(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -2969,7 +2988,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GEQR2_BATCHED computes the QR factorization of a batch of general\nm-by-n matrices.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form\n\n\\f[\nA_j = Q_j\\left[\\begin{array}{c}\nR_j\\\\\n0\n\\end{array}\\right]\n\\f]\n\nwhere \\f$R_j\\f$ is upper triangular (upper trapezoidal if m < n), and \\f$Q_j\\f$ is\na m-by-m orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_j = H_{j_1}H_{j_2}\\cdots H_{j_k}, \\quad \\text{with} \\: k = \\text{min}(m,n)\n\\f]\n\nEach Householder matrix \\f$H_{j_i}\\f$ is given by\n\n\\f[\nH_{j_i} = I - \\text{ipiv}_j[i] \\cdot v_{j_i} v_{j_i}'\n\\f]\n\nwhere the first i-1 elements of Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all the matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all the matrices A_j in the batch.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the elements on and above the diagonal contain the\nfactor R_j. The elements below the diagonal are the last m - i elements\nof Householder vector v_(j_i).\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors ipiv_j of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GEQR2_BATCHED computes the QR factorization of a batch of general\nm-by-n matrices.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form\n\n\\f[\nA_l = Q_l\\left[\\begin{array}{c}\nR_l\\\\\n0\n\\end{array}\\right]\n\\f]\n\nwhere \\f$R_l\\f$ is upper triangular (upper trapezoidal if m < n), and \\f$Q_l\\f$ is\na m-by-m orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_l = H_l(1)H_l(2)\\cdots H_l(k), \\quad \\text{with} \\: k = \\text{min}(m,n)\n\\f]\n\nEach Householder matrix \\f$H_l(i)\\f$ is given by\n\n\\f[\nH_l^{}(i) = I - \\text{ipiv}_l^{}[i] \\cdot v_{l_i}^{} v_{l_i}'\n\\f]\n\nwhere the first i-1 elements of Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all the matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all the matrices A_l in the batch.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the elements on and above the diagonal contain the\nfactor R_l. The elements below the diagonal are the last m - i elements\nof Householder vector v_(l_i).\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors ipiv_l of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgeqr2_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -3022,7 +3041,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GEQR2_STRIDED_BATCHED computes the QR factorization of a batch of\ngeneral m-by-n matrices.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form\n\n\\f[\nA_j = Q_j\\left[\\begin{array}{c}\nR_j\\\\\n0\n\\end{array}\\right]\n\\f]\n\nwhere \\f$R_j\\f$ is upper triangular (upper trapezoidal if m < n), and \\f$Q_j\\f$ is\na m-by-m orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_j = H_{j_1}H_{j_2}\\cdots H_{j_k}, \\quad \\text{with} \\: k = \\text{min}(m,n)\n\\f]\n\nEach Householder matrix \\f$H_{j_i}\\f$ is given by\n\n\\f[\nH_{j_i} = I - \\text{ipiv}_j[i] \\cdot v_{j_i} v_{j_i}'\n\\f]\n\nwhere the first i-1 elements of Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all the matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all the matrices A_j in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the elements on and above the diagonal contain the\nfactor R_j. The elements below the diagonal are the last m - i elements\nof Householder vector v_(j_i).\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors ipiv_j of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GEQR2_STRIDED_BATCHED computes the QR factorization of a batch of\ngeneral m-by-n matrices.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form\n\n\\f[\nA_l = Q_l\\left[\\begin{array}{c}\nR_l\\\\\n0\n\\end{array}\\right]\n\\f]\n\nwhere \\f$R_l\\f$ is upper triangular (upper trapezoidal if m < n), and \\f$Q_l\\f$ is\na m-by-m orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_l = H_l(1)H_l(2)\\cdots H_l(k), \\quad \\text{with} \\: k = \\text{min}(m,n)\n\\f]\n\nEach Householder matrix \\f$H_l(i)\\f$ is given by\n\n\\f[\nH_l^{}(i) = I - \\text{ipiv}_l^{}[i] \\cdot v_{l_i}^{} v_{l_i}'\n\\f]\n\nwhere the first i-1 elements of Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all the matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all the matrices A_l in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the elements on and above the diagonal contain the\nfactor R_l. The elements below the diagonal are the last m - i elements\nof Householder vector v_(l_i).\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors ipiv_l of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgeqr2_strided_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -3079,7 +3098,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GERQ2 computes a RQ factorization of a general m-by-n matrix A.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\nA = \\left[\\begin{array}{cc}\n0 & R\n\\end{array}\\right] Q\n\\f]\n\nwhere R is upper triangular (upper trapezoidal if m > n), and Q is\na n-by-n orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ = H_1'H_2' \\cdots H_k', \\quad \\text{with} \\: k = \\text{min}(m,n).\n\\f]\n\nEach Householder matrix \\f$H_i\\f$ is given by\n\n\\f[\nH_i = I - \\text{ipiv}[i] \\cdot v_i v_i'\n\\f]\n\nwhere the last n-i elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrix to be factored.\nOn exit, the elements on and above the (m-n)-th subdiagonal (when\nm >= n) or the (n-m)-th superdiagonal (when n > m) contain the\nfactor R; the elements below the sub/superdiagonal are the first i - 1\nelements of Householder vector v_i.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[out]\nipiv        pointer to type. Array on the GPU of dimension min(m,n).\\n\nThe Householder scalars."]
+    #[doc = " @{\n\\brief GERQ2 computes a RQ factorization of a general m-by-n matrix A.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\nA = \\left[\\begin{array}{cc}\n0 & R\n\\end{array}\\right] Q\n\\f]\n\nwhere R is upper triangular (upper trapezoidal if m > n), and Q is\na n-by-n orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ = H(1)'H(2)' \\cdots H(k)', \\quad \\text{with} \\: k = \\text{min}(m,n).\n\\f]\n\nEach Householder matrix \\f$H(i)\\f$ is given by\n\n\\f[\nH(i) = I - \\text{ipiv}[i] \\cdot v_i^{} v_i'\n\\f]\n\nwhere the last n-i elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrix to be factored.\nOn exit, the elements on and above the (m-n)-th subdiagonal (when\nm >= n) or the (n-m)-th superdiagonal (when n > m) contain the\nfactor R; the elements below the sub/superdiagonal are the first i - 1\nelements of Householder vector v_i.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[out]\nipiv        pointer to type. Array on the GPU of dimension min(m,n).\nThe Householder scalars."]
     pub fn rocsolver_sgerq2(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -3124,7 +3143,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GERQ2_BATCHED computes the RQ factorization of a batch of general\nm-by-n matrices.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form\n\n\\f[\nA_j = \\left[\\begin{array}{cc}\n0 & R_j\n\\end{array}\\right] Q_j\n\\f]\n\nwhere \\f$R_j\\f$ is upper triangular (upper trapezoidal if m > n), and \\f$Q_j\\f$ is\na n-by-n orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_j = H_{j_1}'H_{j_2}' \\cdots H_{j_k}', \\quad \\text{with} \\: k = \\text{min}(m,n).\n\\f]\n\nEach Householder matrices \\f$H_{j_i}\\f$ is given by\n\n\\f[\nH_{j_i} = I - \\text{ipiv}_j[i] \\cdot v_{j_i} v_{j_i}'\n\\f]\n\nwhere the last n-i elements of Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all the matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all the matrices A_j in the batch.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the elements on and above the (m-n)-th subdiagonal (when\nm >= n) or the (n-m)-th superdiagonal (when n > m) contain the\nfactor R_j; the elements below the sub/superdiagonal are the first i - 1\nelements of Householder vector v_(j_i).\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors ipiv_j of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GERQ2_BATCHED computes the RQ factorization of a batch of general\nm-by-n matrices.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form\n\n\\f[\nA_l = \\left[\\begin{array}{cc}\n0 & R_l\n\\end{array}\\right] Q_l\n\\f]\n\nwhere \\f$R_l\\f$ is upper triangular (upper trapezoidal if m > n), and \\f$Q_l\\f$ is\na n-by-n orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_l = H_l(1)'H_l(2)' \\cdots H_l(k)', \\quad \\text{with} \\: k = \\text{min}(m,n).\n\\f]\n\nEach Householder matrices \\f$H_l(i)\\f$ is given by\n\n\\f[\nH_l^{}(i) = I - \\text{ipiv}_l^{}[i] \\cdot v_{l_i}^{} v_{l_i}'\n\\f]\n\nwhere the last n-i elements of Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all the matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all the matrices A_l in the batch.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the elements on and above the (m-n)-th subdiagonal (when\nm >= n) or the (n-m)-th superdiagonal (when n > m) contain the\nfactor R_l; the elements below the sub/superdiagonal are the first i - 1\nelements of Householder vector v_(l_i).\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors ipiv_l of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgerq2_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -3177,7 +3196,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GERQ2_STRIDED_BATCHED computes the RQ factorization of a batch of\ngeneral m-by-n matrices.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form\n\n\\f[\nA_j = \\left[\\begin{array}{cc}\n0 & R_j\n\\end{array}\\right] Q_j\n\\f]\n\nwhere \\f$R_j\\f$ is upper triangular (upper trapezoidal if m > n), and \\f$Q_j\\f$ is\na n-by-n orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_j = H_{j_1}'H_{j_2}' \\cdots H_{j_k}', \\quad \\text{with} \\: k = \\text{min}(m,n).\n\\f]\n\nEach Householder matrices \\f$H_{j_i}\\f$ is given by\n\n\\f[\nH_{j_i} = I - \\text{ipiv}_j[i] \\cdot v_{j_i} v_{j_i}'\n\\f]\n\nwhere the last n-i elements of Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all the matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all the matrices A_j in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the elements on and above the (m-n)-th subdiagonal (when\nm >= n) or the (n-m)-th superdiagonal (when n > m) contain the\nfactor R_j; the elements below the sub/superdiagonal are the first i - 1\nelements of Householder vector v_(j_i).\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors ipiv_j of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GERQ2_STRIDED_BATCHED computes the RQ factorization of a batch of\ngeneral m-by-n matrices.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form\n\n\\f[\nA_l = \\left[\\begin{array}{cc}\n0 & R_l\n\\end{array}\\right] Q_l\n\\f]\n\nwhere \\f$R_l\\f$ is upper triangular (upper trapezoidal if m > n), and \\f$Q_l\\f$ is\na n-by-n orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_l = H_l(1)'H_l(2)' \\cdots H_l(k)', \\quad \\text{with} \\: k = \\text{min}(m,n).\n\\f]\n\nEach Householder matrices \\f$H_l(i)\\f$ is given by\n\n\\f[\nH_l^{}(i) = I - \\text{ipiv}_l^{}[i] \\cdot v_{l_i}^{} v_{l_i}'\n\\f]\n\nwhere the last n-i elements of Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all the matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all the matrices A_l in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the elements on and above the (m-n)-th subdiagonal (when\nm >= n) or the (n-m)-th superdiagonal (when n > m) contain the\nfactor R_l; the elements below the sub/superdiagonal are the first i - 1\nelements of Householder vector v_(l_i).\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors ipiv_l of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgerq2_strided_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -3234,7 +3253,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GEQL2 computes a QL factorization of a general m-by-n matrix A.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\nA = Q\\left[\\begin{array}{c}\n0\\\\\nL\n\\end{array}\\right]\n\\f]\n\nwhere L is lower triangular (lower trapezoidal if m < n), and Q is\na m-by-m orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ = H_kH_{k-1}\\cdots H_1, \\quad \\text{with} \\: k = \\text{min}(m,n)\n\\f]\n\nEach Householder matrix \\f$H_i\\f$ is given by\n\n\\f[\nH_i = I - \\text{ipiv}[i] \\cdot v_i v_i'\n\\f]\n\nwhere the last m-i elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrix to be factored.\nOn exit, the elements on and below the (m-n)-th subdiagonal (when\nm >= n) or the (n-m)-th superdiagonal (when n > m) contain the\nfactor L; the elements above the sub/superdiagonal are the first i - 1\nelements of Householder vector v_i.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[out]\nipiv        pointer to type. Array on the GPU of dimension min(m,n).\\n\nThe Householder scalars."]
+    #[doc = " @{\n\\brief GEQL2 computes a QL factorization of a general m-by-n matrix A.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\nA = Q\\left[\\begin{array}{c}\n0\\\\\nL\n\\end{array}\\right]\n\\f]\n\nwhere L is lower triangular (lower trapezoidal if m < n), and Q is\na m-by-m orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ = H(k)H(k-1)\\cdots H(1), \\quad \\text{with} \\: k = \\text{min}(m,n)\n\\f]\n\nEach Householder matrix \\f$H(i)\\f$ is given by\n\n\\f[\nH(i) = I - \\text{ipiv}[i] \\cdot v_i^{} v_i'\n\\f]\n\nwhere the last m-i elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrix to be factored.\nOn exit, the elements on and below the (m-n)-th subdiagonal (when\nm >= n) or the (n-m)-th superdiagonal (when n > m) contain the\nfactor L; the elements above the sub/superdiagonal are the first i - 1\nelements of Householder vector v_i.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[out]\nipiv        pointer to type. Array on the GPU of dimension min(m,n).\nThe Householder scalars."]
     pub fn rocsolver_sgeql2(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -3279,7 +3298,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GEQL2_BATCHED computes the QL factorization of a batch of general\nm-by-n matrices.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form\n\n\\f[\nA_j = Q_j\\left[\\begin{array}{c}\n0\\\\\nL_j\n\\end{array}\\right]\n\\f]\n\nwhere \\f$L_j\\f$ is lower triangular (lower trapezoidal if m < n), and \\f$Q_j\\f$ is\na m-by-m orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ = H_{j_k}H_{j_{k-1}}\\cdots H_{j_1}, \\quad \\text{with} \\: k = \\text{min}(m,n)\n\\f]\n\nEach Householder matrix \\f$H_{j_i}\\f$ is given by\n\n\\f[\nH_{j_i} = I - \\text{ipiv}_j[i] \\cdot v_{j_i} v_{j_i}'\n\\f]\n\nwhere the last m-i elements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all the matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all the matrices A_j in the batch.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the elements on and below the (m-n)-th subdiagonal (when\nm >= n) or the (n-m)-th superdiagonal (when n > m) contain the\nfactor L_j; the elements above the sub/superdiagonal are the first i - 1\nelements of Householder vector v_(j_i).\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors ipiv_j of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GEQL2_BATCHED computes the QL factorization of a batch of general\nm-by-n matrices.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form\n\n\\f[\nA_l = Q_l\\left[\\begin{array}{c}\n0\\\\\nL_l\n\\end{array}\\right]\n\\f]\n\nwhere \\f$L_l\\f$ is lower triangular (lower trapezoidal if m < n), and \\f$Q_l\\f$ is\na m-by-m orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_l = H_l(k)H_l(k-1)\\cdots H_l(1), \\quad \\text{with} \\: k = \\text{min}(m,n)\n\\f]\n\nEach Householder matrix \\f$H_l(i)\\f$ is given by\n\n\\f[\nH_l^{}(i) = I - \\text{ipiv}_l^{}[i] \\cdot v_{l_i}^{} v_{l_i}'\n\\f]\n\nwhere the last m-i elements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all the matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all the matrices A_l in the batch.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the elements on and below the (m-n)-th subdiagonal (when\nm >= n) or the (n-m)-th superdiagonal (when n > m) contain the\nfactor L_l; the elements above the sub/superdiagonal are the first i - 1\nelements of Householder vector v_(l_i).\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors ipiv_l of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgeql2_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -3332,7 +3351,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GEQL2_STRIDED_BATCHED computes the QL factorization of a batch of\ngeneral m-by-n matrices.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form\n\n\\f[\nA_j = Q_j\\left[\\begin{array}{c}\n0\\\\\nL_j\n\\end{array}\\right]\n\\f]\n\nwhere \\f$L_j\\f$ is lower triangular (lower trapezoidal if m < n), and \\f$Q_j\\f$ is\na m-by-m orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ = H_{j_k}H_{j_{k-1}}\\cdots H_{j_1}, \\quad \\text{with} \\: k = \\text{min}(m,n)\n\\f]\n\nEach Householder matrix \\f$H_{j_i}\\f$ is given by\n\n\\f[\nH_{j_i} = I - \\text{ipiv}_j[i] \\cdot v_{j_i} v_{j_i}'\n\\f]\n\nwhere the last m-i elements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all the matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all the matrices A_j in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the elements on and below the (m-n)-th subdiagonal (when\nm >= n) or the (n-m)-th superdiagonal (when n > m) contain the\nfactor L_j; the elements above the sub/superdiagonal are the first i - 1\nelements of Householder vector v_(j_i).\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors ipiv_j of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GEQL2_STRIDED_BATCHED computes the QL factorization of a batch of\ngeneral m-by-n matrices.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form\n\n\\f[\nA_l = Q_l\\left[\\begin{array}{c}\n0\\\\\nL_l\n\\end{array}\\right]\n\\f]\n\nwhere \\f$L_l\\f$ is lower triangular (lower trapezoidal if m < n), and \\f$Q_l\\f$ is\na m-by-m orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_l = H_l(k)H_l(k-1)\\cdots H_l(1), \\quad \\text{with} \\: k = \\text{min}(m,n)\n\\f]\n\nEach Householder matrix \\f$H_l(i)\\f$ is given by\n\n\\f[\nH_l^{}(i) = I - \\text{ipiv}_l^{}[i] \\cdot v_{l_i}^{} v_{l_i}'\n\\f]\n\nwhere the last m-i elements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all the matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all the matrices A_l in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the elements on and below the (m-n)-th subdiagonal (when\nm >= n) or the (n-m)-th superdiagonal (when n > m) contain the\nfactor L_l; the elements above the sub/superdiagonal are the first i - 1\nelements of Householder vector v_(l_i).\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors ipiv_l of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgeql2_strided_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -3389,7 +3408,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GELQ2 computes a LQ factorization of a general m-by-n matrix A.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\nA = \\left[\\begin{array}{cc}\nL & 0\n\\end{array}\\right] Q\n\\f]\n\nwhere L is lower triangular (lower trapezoidal if m > n), and Q is\na n-by-n orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ = H_k'H_{k-1}' \\cdots H_1', \\quad \\text{with} \\: k = \\text{min}(m,n).\n\\f]\n\nEach Householder matrix \\f$H_i\\f$ is given by\n\n\\f[\nH_i = I - \\text{ipiv}[i] \\cdot v_i' v_i\n\\f]\n\nwhere the first i-1 elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrix to be factored.\nOn exit, the elements on and below the diagonal contain the\nfactor L; the elements above the diagonal are the last n - i elements\nof Householder vector v_i.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[out]\nipiv        pointer to type. Array on the GPU of dimension min(m,n).\\n\nThe Householder scalars."]
+    #[doc = " @{\n\\brief GELQ2 computes a LQ factorization of a general m-by-n matrix A.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\nA = \\left[\\begin{array}{cc}\nL & 0\n\\end{array}\\right] Q\n\\f]\n\nwhere L is lower triangular (lower trapezoidal if m > n), and Q is\na n-by-n orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ = H(k)'H(k-1)' \\cdots H(1)', \\quad \\text{with} \\: k = \\text{min}(m,n).\n\\f]\n\nEach Householder matrix \\f$H(i)\\f$ is given by\n\n\\f[\nH(i) = I - \\text{ipiv}[i] \\cdot v_i' v_i^{}\n\\f]\n\nwhere the first i-1 elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrix to be factored.\nOn exit, the elements on and below the diagonal contain the\nfactor L; the elements above the diagonal are the last n - i elements\nof Householder vector v_i.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[out]\nipiv        pointer to type. Array on the GPU of dimension min(m,n).\nThe Householder scalars."]
     pub fn rocsolver_sgelq2(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -3434,7 +3453,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GELQ2_BATCHED computes the LQ factorization of a batch of general\nm-by-n matrices.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form\n\n\\f[\nA_j = \\left[\\begin{array}{cc}\nL_j & 0\n\\end{array}\\right] Q_j\n\\f]\n\nwhere \\f$L_j\\f$ is lower triangular (lower trapezoidal if m > n), and \\f$Q_j\\f$ is\na n-by-n orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_j = H_{j_k}'H_{j_{k-1}}' \\cdots H_{j_1}', \\quad \\text{with} \\: k = \\text{min}(m,n).\n\\f]\n\nEach Householder matrices \\f$H_{j_i}\\f$ is given by\n\n\\f[\nH_{j_i} = I - \\text{ipiv}_j[i] \\cdot v_{j_i}' v_{j_i}\n\\f]\n\nwhere the first i-1 elements of Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all the matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all the matrices A_j in the batch.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the elements on and below the diagonal contain the\nfactor L_j. The elements above the diagonal are the last n - i elements\nof Householder vector v_(j_i).\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors ipiv_j of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GELQ2_BATCHED computes the LQ factorization of a batch of general\nm-by-n matrices.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form\n\n\\f[\nA_l = \\left[\\begin{array}{cc}\nL_l & 0\n\\end{array}\\right] Q_l\n\\f]\n\nwhere \\f$L_l\\f$ is lower triangular (lower trapezoidal if m > n), and \\f$Q_l\\f$ is\na n-by-n orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_l = H_l(k)'H_l(k-1)' \\cdots H_l(1)', \\quad \\text{with} \\: k = \\text{min}(m,n).\n\\f]\n\nEach Householder matrices \\f$H_l(i)\\f$ is given by\n\n\\f[\nH_l^{}(i) = I - \\text{ipiv}_l^{}[i] \\cdot v_{l_i}' v_{l_i}^{}\n\\f]\n\nwhere the first i-1 elements of Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all the matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all the matrices A_l in the batch.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the elements on and below the diagonal contain the\nfactor L_l. The elements above the diagonal are the last n - i elements\nof Householder vector v_(l_i).\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors ipiv_l of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgelq2_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -3487,7 +3506,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GELQ2_STRIDED_BATCHED computes the LQ factorization of a batch of\ngeneral m-by-n matrices.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form\n\n\\f[\nA_j = \\left[\\begin{array}{cc}\nL_j & 0\n\\end{array}\\right] Q_j\n\\f]\n\nwhere \\f$L_j\\f$ is lower triangular (lower trapezoidal if m > n), and \\f$Q_j\\f$ is\na n-by-n orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_j = H_{j_k}'H_{j_{k-1}}' \\cdots H_{j_1}', \\quad \\text{with} \\: k = \\text{min}(m,n).\n\\f]\n\nEach Householder matrices \\f$H_{j_i}\\f$ is given by\n\n\\f[\nH_{j_i} = I - \\text{ipiv}_j[i] \\cdot v_{j_i}' v_{j_i}\n\\f]\n\nwhere the first i-1 elements of Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle    rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all the matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all the matrices A_j in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the elements on and below the diagonal contain the\nfactor L_j. The elements above the diagonal are the last n - i elements\nof Householder vector v_(j_i).\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors ipiv_j of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GELQ2_STRIDED_BATCHED computes the LQ factorization of a batch of\ngeneral m-by-n matrices.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form\n\n\\f[\nA_l = \\left[\\begin{array}{cc}\nL_l & 0\n\\end{array}\\right] Q_l\n\\f]\n\nwhere \\f$L_l\\f$ is lower triangular (lower trapezoidal if m > n), and \\f$Q_l\\f$ is\na n-by-n orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_l = H_l(k)'H_l(k-1)' \\cdots H_l(1)', \\quad \\text{with} \\: k = \\text{min}(m,n).\n\\f]\n\nEach Householder matrices \\f$H_l(i)\\f$ is given by\n\n\\f[\nH_l^{}(i) = I - \\text{ipiv}_l^{}[i] \\cdot v_{l_i}' v_{l_i}^{}\n\\f]\n\nwhere the first i-1 elements of Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle    rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all the matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all the matrices A_l in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the elements on and below the diagonal contain the\nfactor L_l. The elements above the diagonal are the last n - i elements\nof Householder vector v_(l_i).\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors ipiv_l of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgelq2_strided_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -3544,7 +3563,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GEQRF computes a QR factorization of a general m-by-n matrix A.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\nA = Q\\left[\\begin{array}{c}\nR\\\\\n0\n\\end{array}\\right]\n\\f]\n\nwhere R is upper triangular (upper trapezoidal if m < n), and Q is\na m-by-m orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ = H_1H_2\\cdots H_k, \\quad \\text{with} \\: k = \\text{min}(m,n)\n\\f]\n\nEach Householder matrix \\f$H_i\\f$ is given by\n\n\\f[\nH_i = I - \\text{ipiv}[i] \\cdot v_i v_i'\n\\f]\n\nwhere the first i-1 elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrix to be factored.\nOn exit, the elements on and above the diagonal contain the\nfactor R; the elements below the diagonal are the last m - i elements\nof Householder vector v_i.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[out]\nipiv        pointer to type. Array on the GPU of dimension min(m,n).\\n\nThe Householder scalars."]
+    #[doc = " @{\n\\brief GEQRF computes a QR factorization of a general m-by-n matrix A.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\nA = Q\\left[\\begin{array}{c}\nR\\\\\n0\n\\end{array}\\right]\n\\f]\n\nwhere R is upper triangular (upper trapezoidal if m < n), and Q is\na m-by-m orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ = H(1)H(2)\\cdots H(k), \\quad \\text{with} \\: k = \\text{min}(m,n)\n\\f]\n\nEach Householder matrix \\f$H(i)\\f$ is given by\n\n\\f[\nH(i) = I - \\text{ipiv}[i] \\cdot v_i^{} v_i'\n\\f]\n\nwhere the first i-1 elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrix to be factored.\nOn exit, the elements on and above the diagonal contain the\nfactor R; the elements below the diagonal are the last m - i elements\nof Householder vector v_i.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[out]\nipiv        pointer to type. Array on the GPU of dimension min(m,n).\nThe Householder scalars."]
     pub fn rocsolver_sgeqrf(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -3589,7 +3608,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GEQRF_BATCHED computes the QR factorization of a batch of general\nm-by-n matrices.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form\n\n\\f[\nA_j = Q_j\\left[\\begin{array}{c}\nR_j\\\\\n0\n\\end{array}\\right]\n\\f]\n\nwhere \\f$R_j\\f$ is upper triangular (upper trapezoidal if m < n), and \\f$Q_j\\f$ is\na m-by-m orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_j = H_{j_1}H_{j_2}\\cdots H_{j_k}, \\quad \\text{with} \\: k = \\text{min}(m,n)\n\\f]\n\nEach Householder matrix \\f$H_{j_i}\\f$ is given by\n\n\\f[\nH_{j_i} = I - \\text{ipiv}_j[i] \\cdot v_{j_i} v_{j_i}'\n\\f]\n\nwhere the first i-1 elements of Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all the matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all the matrices A_j in the batch.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the elements on and above the diagonal contain the\nfactor R_j. The elements below the diagonal are the last m - i elements\nof Householder vector v_(j_i).\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors ipiv_j of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GEQRF_BATCHED computes the QR factorization of a batch of general\nm-by-n matrices.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form\n\n\\f[\nA_l = Q_l\\left[\\begin{array}{c}\nR_l\\\\\n0\n\\end{array}\\right]\n\\f]\n\nwhere \\f$R_l\\f$ is upper triangular (upper trapezoidal if m < n), and \\f$Q_l\\f$ is\na m-by-m orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_l = H_l(1)H_l(2)\\cdots H_l(k), \\quad \\text{with} \\: k = \\text{min}(m,n)\n\\f]\n\nEach Householder matrix \\f$H_l(i)\\f$ is given by\n\n\\f[\nH_l^{}(i) = I - \\text{ipiv}_l^{}[i] \\cdot v_{l_i}^{} v_{l_i}'\n\\f]\n\nwhere the first i-1 elements of Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all the matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all the matrices A_l in the batch.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the elements on and above the diagonal contain the\nfactor R_l. The elements below the diagonal are the last m - i elements\nof Householder vector v_(l_i).\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors ipiv_l of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgeqrf_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -3642,7 +3661,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GEQRF_STRIDED_BATCHED computes the QR factorization of a batch of\ngeneral m-by-n matrices.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form\n\n\\f[\nA_j = Q_j\\left[\\begin{array}{c}\nR_j\\\\\n0\n\\end{array}\\right]\n\\f]\n\nwhere \\f$R_j\\f$ is upper triangular (upper trapezoidal if m < n), and \\f$Q_j\\f$ is\na m-by-m orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_j = H_{j_1}H_{j_2}\\cdots H_{j_k}, \\quad \\text{with} \\: k = \\text{min}(m,n)\n\\f]\n\nEach Householder matrix \\f$H_{j_i}\\f$ is given by\n\n\\f[\nH_{j_i} = I - \\text{ipiv}_j[i] \\cdot v_{j_i} v_{j_i}'\n\\f]\n\nwhere the first i-1 elements of Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all the matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all the matrices A_j in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the elements on and above the diagonal contain the\nfactor R_j. The elements below the diagonal are the last m - i elements\nof Householder vector v_(j_i).\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors ipiv_j of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GEQRF_STRIDED_BATCHED computes the QR factorization of a batch of\ngeneral m-by-n matrices.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form\n\n\\f[\nA_l = Q_l\\left[\\begin{array}{c}\nR_l\\\\\n0\n\\end{array}\\right]\n\\f]\n\nwhere \\f$R_l\\f$ is upper triangular (upper trapezoidal if m < n), and \\f$Q_l\\f$ is\na m-by-m orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_l = H_l(1)H_l(2)\\cdots H_l(k), \\quad \\text{with} \\: k = \\text{min}(m,n)\n\\f]\n\nEach Householder matrix \\f$H_l(i)\\f$ is given by\n\n\\f[\nH_l^{}(i) = I - \\text{ipiv}_l^{}[i] \\cdot v_{l_i}^{} v_{l_i}'\n\\f]\n\nwhere the first i-1 elements of Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all the matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all the matrices A_l in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the elements on and above the diagonal contain the\nfactor R_l. The elements below the diagonal are the last m - i elements\nof Householder vector v_(l_i).\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors ipiv_l of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgeqrf_strided_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -3699,7 +3718,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GERQF computes a RQ factorization of a general m-by-n matrix A.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\nA = \\left[\\begin{array}{cc}\n0 & R\n\\end{array}\\right] Q\n\\f]\n\nwhere R is upper triangular (upper trapezoidal if m > n), and Q is\na n-by-n orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ = H_1'H_2' \\cdots H_k', \\quad \\text{with} \\: k = \\text{min}(m,n).\n\\f]\n\nEach Householder matrix \\f$H_i\\f$ is given by\n\n\\f[\nH_i = I - \\text{ipiv}[i] \\cdot v_i v_i'\n\\f]\n\nwhere the last n-i elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrix to be factored.\nOn exit, the elements on and above the (m-n)-th subdiagonal (when\nm >= n) or the (n-m)-th superdiagonal (when n > m) contain the\nfactor R; the elements below the sub/superdiagonal are the first i - 1\nelements of Householder vector v_i.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[out]\nipiv        pointer to type. Array on the GPU of dimension min(m,n).\\n\nThe Householder scalars."]
+    #[doc = " @{\n\\brief GERQF computes a RQ factorization of a general m-by-n matrix A.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\nA = \\left[\\begin{array}{cc}\n0 & R\n\\end{array}\\right] Q\n\\f]\n\nwhere R is upper triangular (upper trapezoidal if m > n), and Q is\na n-by-n orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ = H(1)'H(2)' \\cdots H(k)', \\quad \\text{with} \\: k = \\text{min}(m,n).\n\\f]\n\nEach Householder matrix \\f$H(i)\\f$ is given by\n\n\\f[\nH(i) = I - \\text{ipiv}[i] \\cdot v_i^{} v_i'\n\\f]\n\nwhere the last n-i elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrix to be factored.\nOn exit, the elements on and above the (m-n)-th subdiagonal (when\nm >= n) or the (n-m)-th superdiagonal (when n > m) contain the\nfactor R; the elements below the sub/superdiagonal are the first i - 1\nelements of Householder vector v_i.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[out]\nipiv        pointer to type. Array on the GPU of dimension min(m,n).\nThe Householder scalars."]
     pub fn rocsolver_sgerqf(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -3744,7 +3763,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GERQF_BATCHED computes the RQ factorization of a batch of general\nm-by-n matrices.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form\n\n\\f[\nA_j = \\left[\\begin{array}{cc}\n0 & R_j\n\\end{array}\\right] Q_j\n\\f]\n\nwhere \\f$R_j\\f$ is upper triangular (upper trapezoidal if m > n), and \\f$Q_j\\f$ is\na n-by-n orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_j = H_{j_1}'H_{j_2}' \\cdots H_{j_k}', \\quad \\text{with} \\: k = \\text{min}(m,n).\n\\f]\n\nEach Householder matrices \\f$H_{j_i}\\f$ is given by\n\n\\f[\nH_{j_i} = I - \\text{ipiv}_j[i] \\cdot v_{j_i} v_{j_i}'\n\\f]\n\nwhere the last n-i elements of Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all the matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all the matrices A_j in the batch.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the elements on and above the (m-n)-th subdiagonal (when\nm >= n) or the (n-m)-th superdiagonal (when n > m) contain the\nfactor R_j; the elements below the sub/superdiagonal are the first i - 1\nelements of Householder vector v_(j_i).\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors ipiv_j of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GERQF_BATCHED computes the RQ factorization of a batch of general\nm-by-n matrices.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form\n\n\\f[\nA_l = \\left[\\begin{array}{cc}\n0 & R_l\n\\end{array}\\right] Q_l\n\\f]\n\nwhere \\f$R_l\\f$ is upper triangular (upper trapezoidal if m > n), and \\f$Q_l\\f$ is\na n-by-n orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_l = H_l(1)'H_l(2)' \\cdots H_l(k)', \\quad \\text{with} \\: k = \\text{min}(m,n).\n\\f]\n\nEach Householder matrices \\f$H_l(i)\\f$ is given by\n\n\\f[\nH_l^{}(i) = I - \\text{ipiv}_l^{}[i] \\cdot v_{l_i}^{} v_{l_i}'\n\\f]\n\nwhere the last n-i elements of Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all the matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all the matrices A_l in the batch.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the elements on and above the (m-n)-th subdiagonal (when\nm >= n) or the (n-m)-th superdiagonal (when n > m) contain the\nfactor R_l; the elements below the sub/superdiagonal are the first i - 1\nelements of Householder vector v_(l_i).\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors ipiv_l of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgerqf_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -3797,7 +3816,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GERQF_STRIDED_BATCHED computes the RQ factorization of a batch of\ngeneral m-by-n matrices.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form\n\n\\f[\nA_j = \\left[\\begin{array}{cc}\n0 & R_j\n\\end{array}\\right] Q_j\n\\f]\n\nwhere \\f$R_j\\f$ is upper triangular (upper trapezoidal if m > n), and \\f$Q_j\\f$ is\na n-by-n orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_j = H_{j_1}'H_{j_2}' \\cdots H_{j_k}', \\quad \\text{with} \\: k = \\text{min}(m,n).\n\\f]\n\nEach Householder matrices \\f$H_{j_i}\\f$ is given by\n\n\\f[\nH_{j_i} = I - \\text{ipiv}_j[i] \\cdot v_{j_i} v_{j_i}'\n\\f]\n\nwhere the last n-i elements of Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all the matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all the matrices A_j in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the elements on and above the (m-n)-th subdiagonal (when\nm >= n) or the (n-m)-th superdiagonal (when n > m) contain the\nfactor R_j; the elements below the sub/superdiagonal are the first i - 1\nelements of Householder vector v_(j_i).\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors ipiv_j of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GERQF_STRIDED_BATCHED computes the RQ factorization of a batch of\ngeneral m-by-n matrices.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form\n\n\\f[\nA_l = \\left[\\begin{array}{cc}\n0 & R_l\n\\end{array}\\right] Q_l\n\\f]\n\nwhere \\f$R_l\\f$ is upper triangular (upper trapezoidal if m > n), and \\f$Q_l\\f$ is\na n-by-n orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_l = H_l(1)'H_l(2)' \\cdots H_l(k)', \\quad \\text{with} \\: k = \\text{min}(m,n).\n\\f]\n\nEach Householder matrices \\f$H_l(i)\\f$ is given by\n\n\\f[\nH_l^{}(i) = I - \\text{ipiv}_l^{}[i] \\cdot v_{l_i}^{} v_{l_i}'\n\\f]\n\nwhere the last n-i elements of Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all the matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all the matrices A_l in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the elements on and above the (m-n)-th subdiagonal (when\nm >= n) or the (n-m)-th superdiagonal (when n > m) contain the\nfactor R_l; the elements below the sub/superdiagonal are the first i - 1\nelements of Householder vector v_(l_i).\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors ipiv_l of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgerqf_strided_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -3854,7 +3873,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GEQLF computes a QL factorization of a general m-by-n matrix A.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\nA = Q\\left[\\begin{array}{c}\n0\\\\\nL\n\\end{array}\\right]\n\\f]\n\nwhere L is lower triangular (lower trapezoidal if m < n), and Q is\na m-by-m orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ = H_kH_{k-1}\\cdots H_1, \\quad \\text{with} \\: k = \\text{min}(m,n)\n\\f]\n\nEach Householder matrix \\f$H_i\\f$ is given by\n\n\\f[\nH_i = I - \\text{ipiv}[i] \\cdot v_i v_i'\n\\f]\n\nwhere the last m-i elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrix to be factored.\nOn exit, the elements on and below the (m-n)-th subdiagonal (when\nm >= n) or the (n-m)-th superdiagonal (when n > m) contain the\nfactor L; the elements above the sub/superdiagonal are the first i - 1\nelements of Householder vector v_i.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[out]\nipiv        pointer to type. Array on the GPU of dimension min(m,n).\\n\nThe Householder scalars."]
+    #[doc = " @{\n\\brief GEQLF computes a QL factorization of a general m-by-n matrix A.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\nA = Q\\left[\\begin{array}{c}\n0\\\\\nL\n\\end{array}\\right]\n\\f]\n\nwhere L is lower triangular (lower trapezoidal if m < n), and Q is\na m-by-m orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ = H(k)H(k-1)\\cdots H(1), \\quad \\text{with} \\: k = \\text{min}(m,n)\n\\f]\n\nEach Householder matrix \\f$H(i)\\f$ is given by\n\n\\f[\nH(i) = I - \\text{ipiv}[i] \\cdot v_i^{} v_i'\n\\f]\n\nwhere the last m-i elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrix to be factored.\nOn exit, the elements on and below the (m-n)-th subdiagonal (when\nm >= n) or the (n-m)-th superdiagonal (when n > m) contain the\nfactor L; the elements above the sub/superdiagonal are the first i - 1\nelements of Householder vector v_i.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[out]\nipiv        pointer to type. Array on the GPU of dimension min(m,n).\nThe Householder scalars."]
     pub fn rocsolver_sgeqlf(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -3899,7 +3918,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GEQLF_BATCHED computes the QL factorization of a batch of general\nm-by-n matrices.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form\n\n\\f[\nA_j = Q_j\\left[\\begin{array}{c}\n0\\\\\nL_j\n\\end{array}\\right]\n\\f]\n\nwhere \\f$L_j\\f$ is lower triangular (lower trapezoidal if m < n), and \\f$Q_j\\f$ is\na m-by-m orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ = H_{j_k}H_{j_{k-1}}\\cdots H_{j_1}, \\quad \\text{with} \\: k = \\text{min}(m,n)\n\\f]\n\nEach Householder matrix \\f$H_{j_i}\\f$ is given by\n\n\\f[\nH_{j_i} = I - \\text{ipiv}_j[i] \\cdot v_{j_i} v_{j_i}'\n\\f]\n\nwhere the last m-i elements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all the matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all the matrices A_j in the batch.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the elements on and below the (m-n)-th subdiagonal (when\nm >= n) or the (n-m)-th superdiagonal (when n > m) contain the\nfactor L_j; the elements above the sub/superdiagonal are the first i - 1\nelements of Householder vector v_(j_i).\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors ipiv_j of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GEQLF_BATCHED computes the QL factorization of a batch of general\nm-by-n matrices.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form\n\n\\f[\nA_l = Q_l\\left[\\begin{array}{c}\n0\\\\\nL_l\n\\end{array}\\right]\n\\f]\n\nwhere \\f$L_l\\f$ is lower triangular (lower trapezoidal if m < n), and \\f$Q_l\\f$ is\na m-by-m orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_l = H_l(k)H_l(k-1)\\cdots H_l(1), \\quad \\text{with} \\: k = \\text{min}(m,n)\n\\f]\n\nEach Householder matrix \\f$H_l(i)\\f$ is given by\n\n\\f[\nH_l^{}(i) = I - \\text{ipiv}_l^{}[i] \\cdot v_{l_i}^{} v_{l_i}'\n\\f]\n\nwhere the last m-i elements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all the matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all the matrices A_l in the batch.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the elements on and below the (m-n)-th subdiagonal (when\nm >= n) or the (n-m)-th superdiagonal (when n > m) contain the\nfactor L_l; the elements above the sub/superdiagonal are the first i - 1\nelements of Householder vector v_(l_i).\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors ipiv_l of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgeqlf_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -3952,7 +3971,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GEQLF_STRIDED_BATCHED computes the QL factorization of a batch of\ngeneral m-by-n matrices.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form\n\n\\f[\nA_j = Q_j\\left[\\begin{array}{c}\n0\\\\\nL_j\n\\end{array}\\right]\n\\f]\n\nwhere \\f$L_j\\f$ is lower triangular (lower trapezoidal if m < n), and \\f$Q_j\\f$ is\na m-by-m orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ = H_{j_k}H_{j_{k-1}}\\cdots H_{j_1}, \\quad \\text{with} \\: k = \\text{min}(m,n)\n\\f]\n\nEach Householder matrix \\f$H_{j_i}\\f$ is given by\n\n\\f[\nH_{j_i} = I - \\text{ipiv}_j[i] \\cdot v_{j_i} v_{j_i}'\n\\f]\n\nwhere the last m-i elements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all the matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all the matrices A_j in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the elements on and below the (m-n)-th subdiagonal (when\nm >= n) or the (n-m)-th superdiagonal (when n > m) contain the\nfactor L_j; the elements above the sub/superdiagonal are the first i - 1\nelements of Householder vector v_(j_i).\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors ipiv_j of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GEQLF_STRIDED_BATCHED computes the QL factorization of a batch of\ngeneral m-by-n matrices.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form\n\n\\f[\nA_l = Q_l\\left[\\begin{array}{c}\n0\\\\\nL_l\n\\end{array}\\right]\n\\f]\n\nwhere \\f$L_l\\f$ is lower triangular (lower trapezoidal if m < n), and \\f$Q_l\\f$ is\na m-by-m orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_l = H_l(k)H_l(k-1)\\cdots H_l(1), \\quad \\text{with} \\: k = \\text{min}(m,n)\n\\f]\n\nEach Householder matrix \\f$H_l(i)\\f$ is given by\n\n\\f[\nH_l^{}(i) = I - \\text{ipiv}_l^{}[i] \\cdot v_{l_i}^{} v_{l_i}'\n\\f]\n\nwhere the last m-i elements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all the matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all the matrices A_l in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the elements on and below the (m-n)-th subdiagonal (when\nm >= n) or the (n-m)-th superdiagonal (when n > m) contain the\nfactor L_l; the elements above the sub/superdiagonal are the first i - 1\nelements of Householder vector v_(l_i).\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors ipiv_l of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgeqlf_strided_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -4009,7 +4028,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GELQF computes a LQ factorization of a general m-by-n matrix A.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\nA = \\left[\\begin{array}{cc}\nL & 0\n\\end{array}\\right] Q\n\\f]\n\nwhere L is lower triangular (lower trapezoidal if m > n), and Q is\na n-by-n orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ = H_k'H_{k-1}' \\cdots H_1', \\quad \\text{with} \\: k = \\text{min}(m,n).\n\\f]\n\nEach Householder matrix \\f$H_i\\f$ is given by\n\n\\f[\nH_i = I - \\text{ipiv}[i] \\cdot v_i' v_i\n\\f]\n\nwhere the first i-1 elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrix to be factored.\nOn exit, the elements on and below the diagonal contain the\nfactor L; the elements above the diagonal are the last n - i elements\nof Householder vector v_i.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of A.\n@param[out]\nipiv        pointer to type. Array on the GPU of dimension min(m,n).\\n\nThe Householder scalars."]
+    #[doc = " @{\n\\brief GELQF computes a LQ factorization of a general m-by-n matrix A.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\nA = \\left[\\begin{array}{cc}\nL & 0\n\\end{array}\\right] Q\n\\f]\n\nwhere L is lower triangular (lower trapezoidal if m > n), and Q is\na n-by-n orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ = H(k)'H(k-1)' \\cdots H(1)', \\quad \\text{with} \\: k = \\text{min}(m,n).\n\\f]\n\nEach Householder matrix \\f$H(i)\\f$ is given by\n\n\\f[\nH(i) = I - \\text{ipiv}[i] \\cdot v_i' v_i^{}\n\\f]\n\nwhere the first i-1 elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrix to be factored.\nOn exit, the elements on and below the diagonal contain the\nfactor L; the elements above the diagonal are the last n - i elements\nof Householder vector v_i.\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of A.\n@param[out]\nipiv        pointer to type. Array on the GPU of dimension min(m,n).\nThe Householder scalars."]
     pub fn rocsolver_sgelqf(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -4054,7 +4073,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GELQF_BATCHED computes the LQ factorization of a batch of general\nm-by-n matrices.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form\n\n\\f[\nA_j = \\left[\\begin{array}{cc}\nL_j & 0\n\\end{array}\\right] Q_j\n\\f]\n\nwhere \\f$L_j\\f$ is lower triangular (lower trapezoidal if m > n), and \\f$Q_j\\f$ is\na n-by-n orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_j = H_{j_k}'H_{j_{k-1}}' \\cdots H_{j_1}', \\quad \\text{with} \\: k = \\text{min}(m,n).\n\\f]\n\nEach Householder matrices \\f$H_{j_i}\\f$ is given by\n\n\\f[\nH_{j_i} = I - \\text{ipiv}_j[i] \\cdot v_{j_i}' v_{j_i}\n\\f]\n\nwhere the first i-1 elements of Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all the matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all the matrices A_j in the batch.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the elements on and below the diagonal contain the\nfactor L_j. The elements above the diagonal are the last n - i elements\nof Householder vector v_(j_i).\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors ipiv_j of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GELQF_BATCHED computes the LQ factorization of a batch of general\nm-by-n matrices.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form\n\n\\f[\nA_l = \\left[\\begin{array}{cc}\nL_l & 0\n\\end{array}\\right] Q_l\n\\f]\n\nwhere \\f$L_l\\f$ is lower triangular (lower trapezoidal if m > n), and \\f$Q_l\\f$ is\na n-by-n orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_l = H_l(k)'H_l(k-1)' \\cdots H_l(1)', \\quad \\text{with} \\: k = \\text{min}(m,n).\n\\f]\n\nEach Householder matrices \\f$H_l(i)\\f$ is given by\n\n\\f[\nH_l^{}(i) = I - \\text{ipiv}_l^{}[i] \\cdot v_{l_i}' v_{l_i}^{}\n\\f]\n\nwhere the first i-1 elements of Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all the matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all the matrices A_l in the batch.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the elements on and below the diagonal contain the\nfactor L_l. The elements above the diagonal are the last n - i elements\nof Householder vector v_(l_i).\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors ipiv_l of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgelqf_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -4107,7 +4126,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GELQF_STRIDED_BATCHED computes the LQ factorization of a batch of\ngeneral m-by-n matrices.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form\n\n\\f[\nA_j = \\left[\\begin{array}{cc}\nL_j & 0\n\\end{array}\\right] Q_j\n\\f]\n\nwhere \\f$L_j\\f$ is lower triangular (lower trapezoidal if m > n), and \\f$Q_j\\f$ is\na n-by-n orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_j = H_{j_k}'H_{j_{k-1}}' \\cdots H_{j_1}', \\quad \\text{with} \\: k = \\text{min}(m,n).\n\\f]\n\nEach Householder matrices \\f$H_{j_i}\\f$ is given by\n\n\\f[\nH_{j_i} = I - \\text{ipiv}_j[i] \\cdot v_{j_i}' v_{j_i}\n\\f]\n\nwhere the first i-1 elements of Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all the matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all the matrices A_j in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the elements on and below the diagonal contain the\nfactor L_j. The elements above the diagonal are the last n - i elements\nof Householder vector v_(j_i).\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors ipiv_j of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GELQF_STRIDED_BATCHED computes the LQ factorization of a batch of\ngeneral m-by-n matrices.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form\n\n\\f[\nA_l = \\left[\\begin{array}{cc}\nL_l & 0\n\\end{array}\\right] Q_l\n\\f]\n\nwhere \\f$L_l\\f$ is lower triangular (lower trapezoidal if m > n), and \\f$Q_l\\f$ is\na n-by-n orthogonal/unitary matrix represented as the product of Householder matrices\n\n\\f[\nQ_l = H_l(k)'H_l(k-1)' \\cdots H_l(1)', \\quad \\text{with} \\: k = \\text{min}(m,n).\n\\f]\n\nEach Householder matrices \\f$H_l(i)\\f$ is given by\n\n\\f[\nH_l^{}(i) = I - \\text{ipiv}_l^{}[i] \\cdot v_{l_i}' v_{l_i}^{}\n\\f]\n\nwhere the first i-1 elements of Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all the matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all the matrices A_l in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the elements on and below the diagonal contain the\nfactor L_l. The elements above the diagonal are the last n - i elements\nof Householder vector v_(l_i).\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nipiv        pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors ipiv_l of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgelqf_strided_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -4164,7 +4183,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GEBD2 computes the bidiagonal form of a general m-by-n matrix A.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe bidiagonal form is given by:\n\n\\f[\nB = Q'  A  P\n\\f]\n\nwhere B is upper bidiagonal if m >= n and lower bidiagonal if m < n, and Q and\nP are orthogonal/unitary matrices represented as the product of Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ = H_1H_2\\cdots H_n\\:  \\text{and} \\: P = G_1G_2\\cdots G_{n-1}, & \\: \\text{if}\\: m >= n, \\:\\text{or}\\\\\nQ = H_1H_2\\cdots H_{m-1}\\:  \\text{and} \\: P = G_1G_2\\cdots G_{m}, & \\: \\text{if}\\: m < n.\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_i\\f$ and \\f$G_i\\f$ is given by\n\n\\f[\n\\begin{array}{cl}\nH_i = I - \\text{tauq}[i] \\cdot v_i v_i', & \\: \\text{and}\\\\\nG_i = I - \\text{taup}[i] \\cdot u_i' u_i.\n\\end{array}\n\\f]\n\nIf m >= n, the first i-1 elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$;\nwhile the first i elements of the Householder vector \\f$u_i\\f$ are zero, and \\f$u_i[i+1] = 1\\f$.\nIf m < n, the first i elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i+1] = 1\\f$;\nwhile the first i-1 elements of the Householder vector \\f$u_i\\f$ are zero, and \\f$u_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrix to be factored.\nOn exit, the elements on the diagonal and superdiagonal (if m >= n), or\nsubdiagonal (if m < n) contain the bidiagonal form B.\nIf m >= n, the elements below the diagonal are the last m - i elements\nof Householder vector v_i, and the elements above the\nsuperdiagonal are the last n - i - 1 elements of Householder vector u_i.\nIf m < n, the elements below the subdiagonal are the last m - i - 1\nelements of Householder vector v_i, and the elements above the\ndiagonal are the last n - i elements of Householder vector u_i.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nspecifies the leading dimension of A.\n@param[out]\nD           pointer to real type. Array on the GPU of dimension min(m,n).\\n\nThe diagonal elements of B.\n@param[out]\nE           pointer to real type. Array on the GPU of dimension min(m,n)-1.\\n\nThe off-diagonal elements of B.\n@param[out]\ntauq        pointer to type. Array on the GPU of dimension min(m,n).\\n\nThe Householder scalars associated with matrix Q.\n@param[out]\ntaup        pointer to type. Array on the GPU of dimension min(m,n).\\n\nThe Householder scalars associated with matrix P."]
+    #[doc = " @{\n\\brief GEBD2 computes the bidiagonal form of a general m-by-n matrix A.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe bidiagonal form is given by:\n\n\\f[\nB = Q'  A  P\n\\f]\n\nwhere B is upper bidiagonal if m >= n and lower bidiagonal if m < n, and Q and\nP are orthogonal/unitary matrices represented as the product of Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ = H(1)H(2)\\cdots H(n)\\:  \\text{and} \\: P = G(1)G(2)\\cdots G(n-1), & \\: \\text{if}\\: m >= n, \\:\\text{or}\\\\\nQ = H(1)H(2)\\cdots H(m-1)\\:  \\text{and} \\: P = G(1)G(2)\\cdots G(m), & \\: \\text{if}\\: m < n.\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H(i)\\f$ and \\f$G(i)\\f$ is given by\n\n\\f[\n\\begin{array}{cl}\nH(i) = I - \\text{tauq}[i] \\cdot v_i^{} v_i', & \\: \\text{and}\\\\\nG(i) = I - \\text{taup}[i] \\cdot u_i' u_i^{}.\n\\end{array}\n\\f]\n\nIf m >= n, the first i-1 elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$;\nwhile the first i elements of the Householder vector \\f$u_i\\f$ are zero, and \\f$u_i[i+1] = 1\\f$.\nIf m < n, the first i elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i+1] = 1\\f$;\nwhile the first i-1 elements of the Householder vector \\f$u_i\\f$ are zero, and \\f$u_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrix to be factored.\nOn exit, the elements on the diagonal and superdiagonal (if m >= n), or\nsubdiagonal (if m < n) contain the bidiagonal form B.\nIf m >= n, the elements below the diagonal are the last m - i elements\nof Householder vector v_i, and the elements above the\nsuperdiagonal are the last n - i - 1 elements of Householder vector u_i.\nIf m < n, the elements below the subdiagonal are the last m - i - 1\nelements of Householder vector v_i, and the elements above the\ndiagonal are the last n - i elements of Householder vector u_i.\n@param[in]\nlda         rocblas_int. lda >= m.\nspecifies the leading dimension of A.\n@param[out]\nD           pointer to real type. Array on the GPU of dimension min(m,n).\nThe diagonal elements of B.\n@param[out]\nE           pointer to real type. Array on the GPU of dimension min(m,n)-1.\nThe off-diagonal elements of B.\n@param[out]\ntauq        pointer to type. Array on the GPU of dimension min(m,n).\nThe Householder scalars associated with matrix Q.\n@param[out]\ntaup        pointer to type. Array on the GPU of dimension min(m,n).\nThe Householder scalars associated with matrix P."]
     pub fn rocsolver_sgebd2(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -4221,7 +4240,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GEBD2_BATCHED computes the bidiagonal form of a batch of general\nm-by-n matrices.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nFor each instance in the batch, the bidiagonal form is given by:\n\n\\f[\nB_j = Q_j'  A_j  P_j\n\\f]\n\nwhere \\f$B_j\\f$ is upper bidiagonal if m >= n and lower bidiagonal if m < n, and \\f$Q_j\\f$ and\n\\f$P_j\\f$ are orthogonal/unitary matrices represented as the product of Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ_j = H_{j_1}H_{j_2}\\cdots H_{j_n}\\:  \\text{and} \\: P_j = G_{j_1}G_{j_2}\\cdots G_{j_{n-1}}, & \\: \\text{if}\\: m >= n, \\:\\text{or}\\\\\nQ_j = H_{j_1}H_{j_2}\\cdots H_{j_{m-1}}\\:  \\text{and} \\: P_j = G_{j_1}G_{j_2}\\cdots G_{j_m}, & \\: \\text{if}\\: m < n.\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_{j_i}\\f$ and \\f$G_{j_i}\\f$ is given by\n\n\\f[\n\\begin{array}{cl}\nH_{j_i} = I - \\text{tauq}_j[i] \\cdot v_{j_i} v_{j_i}', & \\: \\text{and}\\\\\nG_{j_i} = I - \\text{taup}_j[i] \\cdot u_{j_i}' u_{j_i}.\n\\end{array}\n\\f]\n\nIf m >= n, the first i-1 elements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$;\nwhile the first i elements of the Householder vector \\f$u_{j_i}\\f$ are zero, and \\f$u_{j_i}[i+1] = 1\\f$.\nIf m < n, the first i elements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i+1] = 1\\f$;\nwhile the first i-1 elements of the Householder vector \\f$u_{j_i}\\f$ are zero, and \\f$u_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all the matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all the matrices A_j in the batch.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the elements on the diagonal and superdiagonal (if m >= n), or\nsubdiagonal (if m < n) contain the bidiagonal form B_j.\nIf m >= n, the elements below the diagonal are the last m - i elements\nof Householder vector v_(j_i), and the elements above the\nsuperdiagonal are the last n - i - 1 elements of Householder vector u_(j_i).\nIf m < n, the elements below the subdiagonal are the last m - i - 1\nelements of Householder vector v_(j_i), and the elements above the\ndiagonal are the last n - i elements of Householder vector u_(j_i).\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\\n\nThe diagonal elements of B_j.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= min(m,n).\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\\n\nThe off-diagonal elements of B_j.\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= min(m,n)-1.\n@param[out]\ntauq        pointer to type. Array on the GPU (the size depends on the value of strideQ).\\n\nContains the vectors tauq_j of Householder scalars associated with matrices Q_j.\n@param[in]\nstrideQ     rocblas_stride.\\n\nStride from the start of one vector tauq_j to the next one tauq_(j+1).\nThere is no restriction for the value\nof strideQ. Normal use is strideQ >= min(m,n).\n@param[out]\ntaup        pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors taup_j of Householder scalars associated with matrices P_j.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector taup_j to the next one taup_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GEBD2_BATCHED computes the bidiagonal form of a batch of general\nm-by-n matrices.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nFor each instance in the batch, the bidiagonal form is given by:\n\n\\f[\nB_l^{} = Q_l'  A_l^{}  P_l^{}\n\\f]\n\nwhere \\f$B_l\\f$ is upper bidiagonal if m >= n and lower bidiagonal if m < n, and \\f$Q_l\\f$ and\n\\f$P_l\\f$ are orthogonal/unitary matrices represented as the product of Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ_l = H_l(1)H_l(2)\\cdots H_l(n)\\:  \\text{and} \\: P_l = G_l(1)G_l(2)\\cdots G_l(n-1), & \\: \\text{if}\\: m >= n, \\:\\text{or}\\\\\nQ_l = H_l(1)H_l(2)\\cdots H_l(m-1)\\:  \\text{and} \\: P_l = G_l(1)G_l(2)\\cdots G_l(m), & \\: \\text{if}\\: m < n.\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_l(i)\\f$ and \\f$G_l(i)\\f$ is given by\n\n\\f[\n\\begin{array}{cl}\nH_l^{}(i) = I - \\text{tauq}_l^{}[i] \\cdot v_{l_i}^{} v_{l_i}', & \\: \\text{and}\\\\\nG_l^{}(i) = I - \\text{taup}_l^{}[i] \\cdot u_{l_i}' u_{l_i}^{}.\n\\end{array}\n\\f]\n\nIf m >= n, the first i-1 elements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$;\nwhile the first i elements of the Householder vector \\f$u_{l_i}\\f$ are zero, and \\f$u_{l_i}[i+1] = 1\\f$.\nIf m < n, the first i elements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i+1] = 1\\f$;\nwhile the first i-1 elements of the Householder vector \\f$u_{l_i}\\f$ are zero, and \\f$u_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all the matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all the matrices A_l in the batch.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the elements on the diagonal and superdiagonal (if m >= n), or\nsubdiagonal (if m < n) contain the bidiagonal form B_l.\nIf m >= n, the elements below the diagonal are the last m - i elements\nof Householder vector v_(l_i), and the elements above the\nsuperdiagonal are the last n - i - 1 elements of Householder vector u_(l_i).\nIf m < n, the elements below the subdiagonal are the last m - i - 1\nelements of Householder vector v_(l_i), and the elements above the\ndiagonal are the last n - i elements of Householder vector u_(l_i).\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\nThe diagonal elements of B_l.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= min(m,n).\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\nThe off-diagonal elements of B_l.\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= min(m,n)-1.\n@param[out]\ntauq        pointer to type. Array on the GPU (the size depends on the value of strideQ).\nContains the vectors tauq_l of Householder scalars associated with matrices Q_l.\n@param[in]\nstrideQ     rocblas_stride.\nStride from the start of one vector tauq_l to the next one tauq_(l+1).\nThere is no restriction for the value\nof strideQ. Normal use is strideQ >= min(m,n).\n@param[out]\ntaup        pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors taup_l of Householder scalars associated with matrices P_l.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector taup_l to the next one taup_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgebd2_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -4298,7 +4317,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GEBD2_STRIDED_BATCHED computes the bidiagonal form of a batch of\ngeneral m-by-n matrices.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nFor each instance in the batch, the bidiagonal form is given by:\n\n\\f[\nB_j = Q_j'  A_j  P_j\n\\f]\n\nwhere \\f$B_j\\f$ is upper bidiagonal if m >= n and lower bidiagonal if m < n, and \\f$Q_j\\f$ and\n\\f$P_j\\f$ are orthogonal/unitary matrices represented as the product of Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ_j = H_{j_1}H_{j_2}\\cdots H_{j_n}\\:  \\text{and} \\: P_j = G_{j_1}G_{j_2}\\cdots G_{j_{n-1}}, & \\: \\text{if}\\: m >= n, \\:\\text{or}\\\\\nQ_j = H_{j_1}H_{j_2}\\cdots H_{j_{m-1}}\\:  \\text{and} \\: P_j = G_{j_1}G_{j_2}\\cdots G_{j_m}, & \\: \\text{if}\\: m < n.\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_{j_i}\\f$ and \\f$G_{j_i}\\f$ is given by\n\n\\f[\n\\begin{array}{cl}\nH_{j_i} = I - \\text{tauq}_j[i] \\cdot v_{j_i} v_{j_i}', & \\: \\text{and}\\\\\nG_{j_i} = I - \\text{taup}_j[i] \\cdot u_{j_i}' u_{j_i}.\n\\end{array}\n\\f]\n\nIf m >= n, the first i-1 elements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$;\nwhile the first i elements of the Householder vector \\f$u_{j_i}\\f$ are zero, and \\f$u_{j_i}[i+1] = 1\\f$.\nIf m < n, the first i elements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i+1] = 1\\f$;\nwhile the first i-1 elements of the Householder vector \\f$u_{j_i}\\f$ are zero, and \\f$u_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all the matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all the matrices A_j in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the elements on the diagonal and superdiagonal (if m >= n), or\nsubdiagonal (if m < n) contain the bidiagonal form B_j.\nIf m >= n, the elements below the diagonal are the last m - i elements\nof Householder vector v_(j_i), and the elements above the\nsuperdiagonal are the last n - i - 1 elements of Householder vector u_(j_i).\nIf m < n, the elements below the subdiagonal are the last m - i - 1\nelements of Householder vector v_(j_i), and the elements above the\ndiagonal are the last n - i elements of Householder vector u_(j_i).\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\\n\nThe diagonal elements of B_j.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= min(m,n).\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\\n\nThe off-diagonal elements of B_j.\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= min(m,n)-1.\n@param[out]\ntauq        pointer to type. Array on the GPU (the size depends on the value of strideQ).\\n\nContains the vectors tauq_j of Householder scalars associated with matrices Q_j.\n@param[in]\nstrideQ     rocblas_stride.\\n\nStride from the start of one vector tauq_j to the next one tauq_(j+1).\nThere is no restriction for the value\nof strideQ. Normal use is strideQ >= min(m,n).\n@param[out]\ntaup        pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors taup_j of Householder scalars associated with matrices P_j.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector taup_j to the next one taup_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GEBD2_STRIDED_BATCHED computes the bidiagonal form of a batch of\ngeneral m-by-n matrices.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nFor each instance in the batch, the bidiagonal form is given by:\n\n\\f[\nB_l^{} = Q_l'  A_l^{}  P_l^{}\n\\f]\n\nwhere \\f$B_l\\f$ is upper bidiagonal if m >= n and lower bidiagonal if m < n, and \\f$Q_l\\f$ and\n\\f$P_l\\f$ are orthogonal/unitary matrices represented as the product of Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ_l = H_l(1)H_l(2)\\cdots H_l(n)\\:  \\text{and} \\: P_1 = G_l(1)G_l(2)\\cdots G_l(n-1), & \\: \\text{if}\\: m >= n, \\:\\text{or}\\\\\nQ_l = H_l(1)H_l(2)\\cdots H_l(m-1)\\:  \\text{and} \\: P_1 = G_l(1)G_l(2)\\cdots G_l(m), & \\: \\text{if}\\: m < n.\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_l(i)\\f$ and \\f$G_l(i)\\f$ is given by\n\n\\f[\n\\begin{array}{cl}\nH_l^{}(i) = I - \\text{tauq}_l^{}[i] \\cdot v_{l_i}^{} v_{l_i}', & \\: \\text{and}\\\\\nG_l^{}(i) = I - \\text{taup}_l^{}[i] \\cdot u_{l_i}' u_{l_i}^{}.\n\\end{array}\n\\f]\n\nIf m >= n, the first i-1 elements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$;\nwhile the first i elements of the Householder vector \\f$u_{l_i}\\f$ are zero, and \\f$u_{l_i}[i+1] = 1\\f$.\nIf m < n, the first i elements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i+1] = 1\\f$;\nwhile the first i-1 elements of the Householder vector \\f$u_{l_i}\\f$ are zero, and \\f$u_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all the matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all the matrices A_l in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the elements on the diagonal and superdiagonal (if m >= n), or\nsubdiagonal (if m < n) contain the bidiagonal form B_l.\nIf m >= n, the elements below the diagonal are the last m - i elements\nof Householder vector v_(l_i), and the elements above the\nsuperdiagonal are the last n - i - 1 elements of Householder vector u_(l_i).\nIf m < n, the elements below the subdiagonal are the last m - i - 1\nelements of Householder vector v_(l_i), and the elements above the\ndiagonal are the last n - i elements of Householder vector u_(l_i).\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\nThe diagonal elements of B_l.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= min(m,n).\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\nThe off-diagonal elements of B_l.\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= min(m,n)-1.\n@param[out]\ntauq        pointer to type. Array on the GPU (the size depends on the value of strideQ).\nContains the vectors tauq_l of Householder scalars associated with matrices Q_l.\n@param[in]\nstrideQ     rocblas_stride.\nStride from the start of one vector tauq_l to the next one tauq_(l+1).\nThere is no restriction for the value\nof strideQ. Normal use is strideQ >= min(m,n).\n@param[out]\ntaup        pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors taup_l of Householder scalars associated with matrices P_l.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector taup_l to the next one taup_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgebd2_strided_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -4379,7 +4398,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GEBRD computes the bidiagonal form of a general m-by-n matrix A.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe bidiagonal form is given by:\n\n\\f[\nB = Q'  A  P\n\\f]\n\nwhere B is upper bidiagonal if m >= n and lower bidiagonal if m < n, and Q and\nP are orthogonal/unitary matrices represented as the product of Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ = H_1H_2\\cdots H_n\\:  \\text{and} \\: P = G_1G_2\\cdots G_{n-1}, & \\: \\text{if}\\: m >= n, \\:\\text{or}\\\\\nQ = H_1H_2\\cdots H_{m-1}\\:  \\text{and} \\: P = G_1G_2\\cdots G_{m}, & \\: \\text{if}\\: m < n.\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_i\\f$ and \\f$G_i\\f$ is given by\n\n\\f[\n\\begin{array}{cl}\nH_i = I - \\text{tauq}[i] \\cdot v_i v_i', & \\: \\text{and}\\\\\nG_i = I - \\text{taup}[i] \\cdot u_i' u_i.\n\\end{array}\n\\f]\n\nIf m >= n, the first i-1 elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$;\nwhile the first i elements of the Householder vector \\f$u_i\\f$ are zero, and \\f$u_i[i+1] = 1\\f$.\nIf m < n, the first i elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i+1] = 1\\f$;\nwhile the first i-1 elements of the Householder vector \\f$u_i\\f$ are zero, and \\f$u_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrix to be factored.\nOn exit, the elements on the diagonal and superdiagonal (if m >= n), or\nsubdiagonal (if m < n) contain the bidiagonal form B.\nIf m >= n, the elements below the diagonal are the last m - i elements\nof Householder vector v_i, and the elements above the\nsuperdiagonal are the last n - i - 1 elements of Householder vector u_i.\nIf m < n, the elements below the subdiagonal are the last m - i - 1\nelements of Householder vector v_i, and the elements above the\ndiagonal are the last n - i elements of Householder vector u_i.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nspecifies the leading dimension of A.\n@param[out]\nD           pointer to real type. Array on the GPU of dimension min(m,n).\\n\nThe diagonal elements of B.\n@param[out]\nE           pointer to real type. Array on the GPU of dimension min(m,n)-1.\\n\nThe off-diagonal elements of B.\n@param[out]\ntauq        pointer to type. Array on the GPU of dimension min(m,n).\\n\nThe Householder scalars associated with matrix Q.\n@param[out]\ntaup        pointer to type. Array on the GPU of dimension min(m,n).\\n\nThe Householder scalars associated with matrix P."]
+    #[doc = " @{\n\\brief GEBRD computes the bidiagonal form of a general m-by-n matrix A.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe bidiagonal form is given by:\n\n\\f[\nB = Q'  A  P\n\\f]\n\nwhere B is upper bidiagonal if m >= n and lower bidiagonal if m < n, and Q and\nP are orthogonal/unitary matrices represented as the product of Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ = H(1)H(2)\\cdots H(n)\\:  \\text{and} \\: P = G(1)G(2)\\cdots G(n-1), & \\: \\text{if}\\: m >= n, \\:\\text{or}\\\\\nQ = H(1)H(2)\\cdots H(m-1)\\:  \\text{and} \\: P = G(1)G(2)\\cdots G(m), & \\: \\text{if}\\: m < n.\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H(i)\\f$ and \\f$G(i)\\f$ is given by\n\n\\f[\n\\begin{array}{cl}\nH(i) = I - \\text{tauq}[i] \\cdot v_i^{} v_i', & \\: \\text{and}\\\\\nG(i) = I - \\text{taup}[i] \\cdot u_i' u_i^{}.\n\\end{array}\n\\f]\n\nIf m >= n, the first i-1 elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$;\nwhile the first i elements of the Householder vector \\f$u_i\\f$ are zero, and \\f$u_i[i+1] = 1\\f$.\nIf m < n, the first i elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i+1] = 1\\f$;\nwhile the first i-1 elements of the Householder vector \\f$u_i\\f$ are zero, and \\f$u_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of the matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrix to be factored.\nOn exit, the elements on the diagonal and superdiagonal (if m >= n), or\nsubdiagonal (if m < n) contain the bidiagonal form B.\nIf m >= n, the elements below the diagonal are the last m - i elements\nof Householder vector v_i, and the elements above the\nsuperdiagonal are the last n - i - 1 elements of Householder vector u_i.\nIf m < n, the elements below the subdiagonal are the last m - i - 1\nelements of Householder vector v_i, and the elements above the\ndiagonal are the last n - i elements of Householder vector u_i.\n@param[in]\nlda         rocblas_int. lda >= m.\nspecifies the leading dimension of A.\n@param[out]\nD           pointer to real type. Array on the GPU of dimension min(m,n).\nThe diagonal elements of B.\n@param[out]\nE           pointer to real type. Array on the GPU of dimension min(m,n)-1.\nThe off-diagonal elements of B.\n@param[out]\ntauq        pointer to type. Array on the GPU of dimension min(m,n).\nThe Householder scalars associated with matrix Q.\n@param[out]\ntaup        pointer to type. Array on the GPU of dimension min(m,n).\nThe Householder scalars associated with matrix P."]
     pub fn rocsolver_sgebrd(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -4436,7 +4455,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GEBRD_BATCHED computes the bidiagonal form of a batch of general\nm-by-n matrices.\n\n\\details\n(This is the blocked version of the algorithm).\n\nFor each instance in the batch, the bidiagonal form is given by:\n\n\\f[\nB_j = Q_j'  A_j  P_j\n\\f]\n\nwhere \\f$B_j\\f$ is upper bidiagonal if m >= n and lower bidiagonal if m < n, and \\f$Q_j\\f$ and\n\\f$P_j\\f$ are orthogonal/unitary matrices represented as the product of Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ_j = H_{j_1}H_{j_2}\\cdots H_{j_n}\\:  \\text{and} \\: P_j = G_{j_1}G_{j_2}\\cdots G_{j_{n-1}}, & \\: \\text{if}\\: m >= n, \\:\\text{or}\\\\\nQ_j = H_{j_1}H_{j_2}\\cdots H_{j_{m-1}}\\:  \\text{and} \\: P_j = G_{j_1}G_{j_2}\\cdots G_{j_m}, & \\: \\text{if}\\: m < n.\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_{j_i}\\f$ and \\f$G_{j_i}\\f$ is given by\n\n\\f[\n\\begin{array}{cl}\nH_{j_i} = I - \\text{tauq}_j[i] \\cdot v_{j_i} v_{j_i}', & \\: \\text{and}\\\\\nG_{j_i} = I - \\text{taup}_j[i] \\cdot u_{j_i}' u_{j_i}.\n\\end{array}\n\\f]\n\nIf m >= n, the first i-1 elements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$;\nwhile the first i elements of the Householder vector \\f$u_{j_i}\\f$ are zero, and \\f$u_{j_i}[i+1] = 1\\f$.\nIf m < n, the first i elements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i+1] = 1\\f$;\nwhile the first i-1 elements of the Householder vector \\f$u_{j_i}\\f$ are zero, and \\f$u_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all the matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all the matrices A_j in the batch.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the elements on the diagonal and superdiagonal (if m >= n), or\nsubdiagonal (if m < n) contain the bidiagonal form B_j.\nIf m >= n, the elements below the diagonal are the last m - i elements\nof Householder vector v_(j_i), and the elements above the\nsuperdiagonal are the last n - i - 1 elements of Householder vector u_(j_i).\nIf m < n, the elements below the subdiagonal are the last m - i - 1\nelements of Householder vector v_(j_i), and the elements above the\ndiagonal are the last n - i elements of Householder vector u_(j_i).\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\\n\nThe diagonal elements of B_j.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= min(m,n).\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\\n\nThe off-diagonal elements of B_j.\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= min(m,n)-1.\n@param[out]\ntauq        pointer to type. Array on the GPU (the size depends on the value of strideQ).\\n\nContains the vectors tauq_j of Householder scalars associated with matrices Q_j.\n@param[in]\nstrideQ     rocblas_stride.\\n\nStride from the start of one vector tauq_j to the next one tauq_(j+1).\nThere is no restriction for the value\nof strideQ. Normal use is strideQ >= min(m,n).\n@param[out]\ntaup        pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors taup_j of Householder scalars associated with matrices P_j.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector taup_j to the next one taup_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GEBRD_BATCHED computes the bidiagonal form of a batch of general\nm-by-n matrices.\n\n\\details\n(This is the blocked version of the algorithm).\n\nFor each instance in the batch, the bidiagonal form is given by:\n\n\\f[\nB_l^{} = Q_l'  A_l^{}  P_l^{}\n\\f]\n\nwhere \\f$B_l\\f$ is upper bidiagonal if m >= n and lower bidiagonal if m < n, and \\f$Q_l\\f$ and\n\\f$P_l\\f$ are orthogonal/unitary matrices represented as the product of Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ_l = H_l(1)H_l(2)\\cdots H_l(n)\\:  \\text{and} \\: P_l = G_l(1)G_l(2)\\cdots G_l(n-1), & \\: \\text{if}\\: m >= n, \\:\\text{or}\\\\\nQ_l = H_l(1)H_l(2)\\cdots H_l(m-1)\\:  \\text{and} \\: P_l = G_l(1)G_l(2)\\cdots G_l(m), & \\: \\text{if}\\: m < n.\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_l(i)\\f$ and \\f$G_l(i)\\f$ is given by\n\n\\f[\n\\begin{array}{cl}\nH_l^{}(i) = I - \\text{tauq}_l^{}[i] \\cdot v_{l_i}^{} v_{l_i}', & \\: \\text{and}\\\\\nG_l^{}(i) = I - \\text{taup}_l^{}[i] \\cdot u_{l_i}' u_{l_i}^{}.\n\\end{array}\n\\f]\n\nIf m >= n, the first i-1 elements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$;\nwhile the first i elements of the Householder vector \\f$u_{l_i}\\f$ are zero, and \\f$u_{l_i}[i+1] = 1\\f$.\nIf m < n, the first i elements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i+1] = 1\\f$;\nwhile the first i-1 elements of the Householder vector \\f$u_{l_i}\\f$ are zero, and \\f$u_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all the matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all the matrices A_l in the batch.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the elements on the diagonal and superdiagonal (if m >= n), or\nsubdiagonal (if m < n) contain the bidiagonal form B_l.\nIf m >= n, the elements below the diagonal are the last m - i elements\nof Householder vector v_(l_i), and the elements above the\nsuperdiagonal are the last n - i - 1 elements of Householder vector u_(l_i).\nIf m < n, the elements below the subdiagonal are the last m - i - 1\nelements of Householder vector v_(l_i), and the elements above the\ndiagonal are the last n - i elements of Householder vector u_(l_i).\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\nThe diagonal elements of B_l.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= min(m,n).\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\nThe off-diagonal elements of B_l.\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= min(m,n)-1.\n@param[out]\ntauq        pointer to type. Array on the GPU (the size depends on the value of strideQ).\nContains the vectors tauq_l of Householder scalars associated with matrices Q_l.\n@param[in]\nstrideQ     rocblas_stride.\nStride from the start of one vector tauq_l to the next one tauq_(l+1).\nThere is no restriction for the value\nof strideQ. Normal use is strideQ >= min(m,n).\n@param[out]\ntaup        pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors taup_l of Householder scalars associated with matrices P_l.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector taup_l to the next one taup_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgebrd_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -4513,7 +4532,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GEBRD_STRIDED_BATCHED computes the bidiagonal form of a batch of\ngeneral m-by-n matrices.\n\n\\details\n(This is the blocked version of the algorithm).\n\nFor each instance in the batch, the bidiagonal form is given by:\n\n\\f[\nB_j = Q_j'  A_j  P_j\n\\f]\n\nwhere \\f$B_j\\f$ is upper bidiagonal if m >= n and lower bidiagonal if m < n, and \\f$Q_j\\f$ and\n\\f$P_j\\f$ are orthogonal/unitary matrices represented as the product of Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ_j = H_{j_1}H_{j_2}\\cdots H_{j_n}\\:  \\text{and} \\: P_j = G_{j_1}G_{j_2}\\cdots G_{j_{n-1}}, & \\: \\text{if}\\: m >= n, \\:\\text{or}\\\\\nQ_j = H_{j_1}H_{j_2}\\cdots H_{j_{m-1}}\\:  \\text{and} \\: P_j = G_{j_1}G_{j_2}\\cdots G_{j_m}, & \\: \\text{if}\\: m < n.\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_{j_i}\\f$ and \\f$G_{j_i}\\f$ is given by\n\n\\f[\n\\begin{array}{cl}\nH_{j_i} = I - \\text{tauq}_j[i] \\cdot v_{j_i} v_{j_i}', & \\: \\text{and}\\\\\nG_{j_i} = I - \\text{taup}_j[i] \\cdot u_{j_i}' u_{j_i}.\n\\end{array}\n\\f]\n\nIf m >= n, the first i-1 elements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$;\nwhile the first i elements of the Householder vector \\f$u_{j_i}\\f$ are zero, and \\f$u_{j_i}[i+1] = 1\\f$.\nIf m < n, the first i elements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i+1] = 1\\f$;\nwhile the first i-1 elements of the Householder vector \\f$u_{j_i}\\f$ are zero, and \\f$u_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all the matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all the matrices A_j in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the m-by-n matrices A_j to be factored.\nOn exit, the elements on the diagonal and superdiagonal (if m >= n), or\nsubdiagonal (if m < n) contain the bidiagonal form B_j.\nIf m >= n, the elements below the diagonal are the last m - i elements\nof Householder vector v_(j_i), and the elements above the\nsuperdiagonal are the last n - i - 1 elements of Householder vector u_(j_i).\nIf m < n, the elements below the subdiagonal are the last m - i - 1\nelements of Householder vector v_(j_i), and the elements above the\ndiagonal are the last n - i elements of Householder vector u_(j_i).\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\\n\nThe diagonal elements of B_j.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= min(m,n).\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\\n\nThe off-diagonal elements of B_j.\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= min(m,n)-1.\n@param[out]\ntauq        pointer to type. Array on the GPU (the size depends on the value of strideQ).\\n\nContains the vectors tauq_j of Householder scalars associated with matrices Q_j.\n@param[in]\nstrideQ     rocblas_stride.\\n\nStride from the start of one vector tauq_j to the next one tauq_(j+1).\nThere is no restriction for the value\nof strideQ. Normal use is strideQ >= min(m,n).\n@param[out]\ntaup        pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors taup_j of Householder scalars associated with matrices P_j.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector taup_j to the next one taup_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GEBRD_STRIDED_BATCHED computes the bidiagonal form of a batch of\ngeneral m-by-n matrices.\n\n\\details\n(This is the blocked version of the algorithm).\n\nFor each instance in the batch, the bidiagonal form is given by:\n\n\\f[\nB_l^{} = Q_l'  A_l^{}  P_l^{}\n\\f]\n\nwhere \\f$B_l\\f$ is upper bidiagonal if m >= n and lower bidiagonal if m < n, and \\f$Q_l\\f$ and\n\\f$P_l\\f$ are orthogonal/unitary matrices represented as the product of Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ_l = H_l(1)H_l(2)\\cdots H_l(n)\\:  \\text{and} \\: P_l = G_l(1)G_l(2)\\cdots G_l(n-1), & \\: \\text{if}\\: m >= n, \\:\\text{or}\\\\\nQ_l = H_l(1)H_l(2)\\cdots H_l(m-1)\\:  \\text{and} \\: P_l = G_l(1)G_l(2)\\cdots G_l(m), & \\: \\text{if}\\: m < n.\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_l(i)\\f$ and \\f$G_l(i)\\f$ is given by\n\n\\f[\n\\begin{array}{cl}\nH_l^{}(i) = I - \\text{tauq}_l^{}[i] \\cdot v_{l_i}^{} v_{l_i}', & \\: \\text{and}\\\\\nG_l^{}(i) = I - \\text{taup}_l^{}[i] \\cdot u_{l_i}' u_{l_i}^{}.\n\\end{array}\n\\f]\n\nIf m >= n, the first i-1 elements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$;\nwhile the first i elements of the Householder vector \\f$u_{l_i}\\f$ are zero, and \\f$u_{l_i}[i+1] = 1\\f$.\nIf m < n, the first i elements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i+1] = 1\\f$;\nwhile the first i-1 elements of the Householder vector \\f$u_{l_i}\\f$ are zero, and \\f$u_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all the matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all the matrices A_l in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the m-by-n matrices A_l to be factored.\nOn exit, the elements on the diagonal and superdiagonal (if m >= n), or\nsubdiagonal (if m < n) contain the bidiagonal form B_l.\nIf m >= n, the elements below the diagonal are the last m - i elements\nof Householder vector v_(l_i), and the elements above the\nsuperdiagonal are the last n - i - 1 elements of Householder vector u_(l_i).\nIf m < n, the elements below the subdiagonal are the last m - i - 1\nelements of Householder vector v_(l_i), and the elements above the\ndiagonal are the last n - i elements of Householder vector u_(l_i).\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\nThe diagonal elements of B_l.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= min(m,n).\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\nThe off-diagonal elements of B_l.\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= min(m,n)-1.\n@param[out]\ntauq        pointer to type. Array on the GPU (the size depends on the value of strideQ).\nContains the vectors tauq_l of Householder scalars associated with matrices Q_l.\n@param[in]\nstrideQ     rocblas_stride.\nStride from the start of one vector tauq_l to the next one tauq_(l+1).\nThere is no restriction for the value\nof strideQ. Normal use is strideQ >= min(m,n).\n@param[out]\ntaup        pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors taup_l of Householder scalars associated with matrices P_l.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector taup_l to the next one taup_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= min(m,n).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgebrd_strided_batched(
         handle: rocblas_handle,
         m: rocblas_int,
@@ -4594,7 +4613,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GETRS solves a system of n linear equations on n variables in its factorized form.\n\n\\details\nIt solves one of the following systems, depending on the value of trans:\n\n\\f[\n\\begin{array}{cl}\nA X = B & \\: \\text{not transposed,}\\\\\nA^T X = B & \\: \\text{transposed, or}\\\\\nA^H X = B & \\: \\text{conjugate transposed.}\n\\end{array}\n\\f]\n\nMatrix A is defined by its triangular factors as returned by \\ref rocsolver_sgetrf \"GETRF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\ntrans       rocblas_operation.\\n\nSpecifies the form of the system of equations.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe order of the system, i.e. the number of columns and rows of A.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\\n\nThe number of right hand sides, i.e., the number of columns\nof the matrix B.\n@param[in]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nThe factors L and U of the factorization A = P*L*U returned by \\ref rocsolver_sgetrf \"GETRF\".\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nThe leading dimension of A.\n@param[in]\nipiv        pointer to rocblas_int. Array on the GPU of dimension n.\\n\nThe pivot indices returned by \\ref rocsolver_sgetrf \"GETRF\".\n@param[in,out]\nB           pointer to type. Array on the GPU of dimension ldb*nrhs.\\n\nOn entry, the right hand side matrix B.\nOn exit, the solution matrix X.\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nThe leading dimension of B."]
+    #[doc = " @{\n\\brief GETRS solves a system of n linear equations on n variables in its factorized form.\n\n\\details\nIt solves one of the following systems, depending on the value of trans:\n\n\\f[\n\\begin{array}{cl}\nA X = B & \\: \\text{not transposed,}\\\\\nA^T X = B & \\: \\text{transposed, or}\\\\\nA^H X = B & \\: \\text{conjugate transposed.}\n\\end{array}\n\\f]\n\nMatrix A is defined by its triangular factors as returned by \\ref rocsolver_sgetrf \"GETRF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\ntrans       rocblas_operation.\nSpecifies the form of the system of equations.\n@param[in]\nn           rocblas_int. n >= 0.\nThe order of the system, i.e. the number of columns and rows of A.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\nThe number of right hand sides, i.e., the number of columns\nof the matrix B.\n@param[in]\nA           pointer to type. Array on the GPU of dimension lda*n.\nThe factors L and U of the factorization A = P*L*U returned by \\ref rocsolver_sgetrf \"GETRF\".\n@param[in]\nlda         rocblas_int. lda >= n.\nThe leading dimension of A.\n@param[in]\nipiv        pointer to rocblas_int. Array on the GPU of dimension n.\nThe pivot indices returned by \\ref rocsolver_sgetrf \"GETRF\".\n@param[inout]\nB           pointer to type. Array on the GPU of dimension ldb*nrhs.\nOn entry, the right hand side matrix B.\nOn exit, the solution matrix X.\n@param[in]\nldb         rocblas_int. ldb >= n.\nThe leading dimension of B."]
     pub fn rocsolver_sgetrs(
         handle: rocblas_handle,
         trans: rocblas_operation,
@@ -4651,7 +4670,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GETRS_BATCHED solves a batch of systems of n linear equations on n\nvariables in its factorized forms.\n\n\\details\nFor each instance j in the batch, it solves one of the following systems, depending on the value of trans:\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = B_j & \\: \\text{not transposed,}\\\\\nA_j^T X_j = B_j & \\: \\text{transposed, or}\\\\\nA_j^H X_j = B_j & \\: \\text{conjugate transposed.}\n\\end{array}\n\\f]\n\nMatrix \\f$A_j\\f$ is defined by its triangular factors as returned by \\ref rocsolver_sgetrf_batched \"GETRF_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\ntrans       rocblas_operation.\\n\nSpecifies the form of the system of equations of each instance in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe order of the system, i.e. the number of columns and rows of all A_j matrices.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\\n\nThe number of right hand sides, i.e., the number of columns\nof all the matrices B_j.\n@param[in]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nThe factors L_j and U_j of the factorization A_j = P_j*L_j*U_j returned by \\ref rocsolver_sgetrf_batched \"GETRF_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nThe leading dimension of matrices A_j.\n@param[in]\nipiv        pointer to rocblas_int. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors ipiv_j of pivot indices returned by \\ref rocsolver_sgetrf_batched \"GETRF_BATCHED\".\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= n.\n@param[in,out]\nB           Array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*nrhs.\\n\nOn entry, the right hand side matrices B_j.\nOn exit, the solution matrix X_j of each system in the batch.\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nThe leading dimension of matrices B_j.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of instances (systems) in the batch."]
+    #[doc = " @{\n\\brief GETRS_BATCHED solves a batch of systems of n linear equations on n\nvariables in its factorized forms.\n\n\\details\nFor each instance l in the batch, it solves one of the following systems, depending on the value of trans:\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = B_l & \\: \\text{not transposed,}\\\\\nA_l^T X_l^{} = B_l^{} & \\: \\text{transposed, or}\\\\\nA_l^H X_l^{} = B_l^{} & \\: \\text{conjugate transposed.}\n\\end{array}\n\\f]\n\nMatrix \\f$A_l\\f$ is defined by its triangular factors as returned by \\ref rocsolver_sgetrf_batched \"GETRF_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\ntrans       rocblas_operation.\nSpecifies the form of the system of equations of each instance in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe order of the system, i.e. the number of columns and rows of all A_l matrices.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\nThe number of right hand sides, i.e., the number of columns\nof all the matrices B_l.\n@param[in]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nThe factors L_l and U_l of the factorization A_l = P_l*L_l*U_l returned by \\ref rocsolver_sgetrf_batched \"GETRF_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= n.\nThe leading dimension of matrices A_l.\n@param[in]\nipiv        pointer to rocblas_int. Array on the GPU (the size depends on the value of strideP).\nContains the vectors ipiv_l of pivot indices returned by \\ref rocsolver_sgetrf_batched \"GETRF_BATCHED\".\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= n.\n@param[inout]\nB           Array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*nrhs.\nOn entry, the right hand side matrices B_l.\nOn exit, the solution matrix X_l of each system in the batch.\n@param[in]\nldb         rocblas_int. ldb >= n.\nThe leading dimension of matrices B_l.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of instances (systems) in the batch."]
     pub fn rocsolver_sgetrs_batched(
         handle: rocblas_handle,
         trans: rocblas_operation,
@@ -4716,7 +4735,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GETRS_STRIDED_BATCHED solves a batch of systems of n linear equations\non n variables in its factorized forms.\n\n\\details\nFor each instance j in the batch, it solves one of the following systems, depending on the value of trans:\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = B_j & \\: \\text{not transposed,}\\\\\nA_j^T X_j = B_j & \\: \\text{transposed, or}\\\\\nA_j^H X_j = B_j & \\: \\text{conjugate transposed.}\n\\end{array}\n\\f]\n\nMatrix \\f$A_j\\f$ is defined by its triangular factors as returned by \\ref rocsolver_sgetrf_strided_batched \"GETRF_STRIDED_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\ntrans       rocblas_operation.\\n\nSpecifies the form of the system of equations of each instance in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe order of the system, i.e. the number of columns and rows of all A_j matrices.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\\n\nThe number of right hand sides, i.e., the number of columns\nof all the matrices B_j.\n@param[in]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nThe factors L_j and U_j of the factorization A_j = P_j*L_j*U_j returned by \\ref rocsolver_sgetrf_strided_batched \"GETRF_STRIDED_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nThe leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[in]\nipiv        pointer to rocblas_int. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors ipiv_j of pivot indices returned by \\ref rocsolver_sgetrf_strided_batched \"GETRF_STRIDED_BATCHED\".\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= n.\n@param[in,out]\nB           pointer to type. Array on the GPU (size depends on the value of strideB).\\n\nOn entry, the right hand side matrices B_j.\nOn exit, the solution matrix X_j of each system in the batch.\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nThe leading dimension of matrices B_j.\n@param[in]\nstrideB     rocblas_stride.\\n\nStride from the start of one matrix B_j to the next one B_(j+1).\nThere is no restriction for the value of strideB. Normal use case is strideB >= ldb*nrhs.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of instances (systems) in the batch."]
+    #[doc = " @{\n\\brief GETRS_STRIDED_BATCHED solves a batch of systems of n linear equations\non n variables in its factorized forms.\n\n\\details\nFor each instance l in the batch, it solves one of the following systems, depending on the value of trans:\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = B_l & \\: \\text{not transposed,}\\\\\nA_l^T X_l^{} = B_l^{} & \\: \\text{transposed, or}\\\\\nA_l^H X_l^{} = B_l^{} & \\: \\text{conjugate transposed.}\n\\end{array}\n\\f]\n\nMatrix \\f$A_l\\f$ is defined by its triangular factors as returned by \\ref rocsolver_sgetrf_strided_batched \"GETRF_STRIDED_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\ntrans       rocblas_operation.\nSpecifies the form of the system of equations of each instance in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe order of the system, i.e. the number of columns and rows of all A_l matrices.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\nThe number of right hand sides, i.e., the number of columns\nof all the matrices B_l.\n@param[in]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nThe factors L_l and U_l of the factorization A_l = P_l*L_l*U_l returned by \\ref rocsolver_sgetrf_strided_batched \"GETRF_STRIDED_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= n.\nThe leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[in]\nipiv        pointer to rocblas_int. Array on the GPU (the size depends on the value of strideP).\nContains the vectors ipiv_l of pivot indices returned by \\ref rocsolver_sgetrf_strided_batched \"GETRF_STRIDED_BATCHED\".\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= n.\n@param[inout]\nB           pointer to type. Array on the GPU (size depends on the value of strideB).\nOn entry, the right hand side matrices B_l.\nOn exit, the solution matrix X_l of each system in the batch.\n@param[in]\nldb         rocblas_int. ldb >= n.\nThe leading dimension of matrices B_l.\n@param[in]\nstrideB     rocblas_stride.\nStride from the start of one matrix B_l to the next one B_(l+1).\nThere is no restriction for the value of strideB. Normal use case is strideB >= ldb*nrhs.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of instances (systems) in the batch."]
     pub fn rocsolver_sgetrs_strided_batched(
         handle: rocblas_handle,
         trans: rocblas_operation,
@@ -4789,7 +4808,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GESV solves a general system of n linear equations on n variables.\n\n\\details\nThe linear system is of the form\n\n\\f[\nA X = B\n\\f]\n\nwhere A is a general n-by-n matrix. Matrix A is first factorized in triangular factors L and U\nusing \\ref rocsolver_sgetrf \"GETRF\"; then, the solution is computed with \\ref rocsolver_sgetrs \"GETRS\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe order of the system, i.e. the number of columns and rows of A.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\\n\nThe number of right hand sides, i.e., the number of columns\nof the matrix B.\n@param[in]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A.\nOn exit, if info = 0, the factors L and U of the LU decomposition of A returned by\n\\ref rocsolver_sgetrf \"GETRF\".\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nThe leading dimension of A.\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU of dimension n.\\n\nThe pivot indices returned by \\ref rocsolver_sgetrf \"GETRF\".\n@param[in,out]\nB           pointer to type. Array on the GPU of dimension ldb*nrhs.\\n\nOn entry, the right hand side matrix B.\nOn exit, the solution matrix X.\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nThe leading dimension of B.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0, U is singular, and the solution could not be computed.\nU[i,i] is the first zero element in the diagonal."]
+    #[doc = " @{\n\\brief GESV solves a general system of n linear equations on n variables.\n\n\\details\nThe linear system is of the form\n\n\\f[\nA X = B\n\\f]\n\nwhere A is a general n-by-n matrix. Matrix A is first factorized in triangular factors L and U\nusing \\ref rocsolver_sgetrf \"GETRF\"; then, the solution is computed with \\ref rocsolver_sgetrs \"GETRS\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\nThe order of the system, i.e. the number of columns and rows of A.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\nThe number of right hand sides, i.e., the number of columns\nof the matrix B.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A.\nOn exit, if info = 0, the factors L and U of the LU decomposition of A returned by\n\\ref rocsolver_sgetrf \"GETRF\".\n@param[in]\nlda         rocblas_int. lda >= n.\nThe leading dimension of A.\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU of dimension n.\nThe pivot indices returned by \\ref rocsolver_sgetrf \"GETRF\".\n@param[inout]\nB           pointer to type. Array on the GPU of dimension ldb*nrhs.\nOn entry, the right hand side matrix B.\nOn exit, the solution matrix X.\n@param[in]\nldb         rocblas_int. ldb >= n.\nThe leading dimension of B.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0, U is singular, and the solution could not be computed.\nU[i,i] is the first zero element in the diagonal."]
     pub fn rocsolver_sgesv(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -4846,7 +4865,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GESV_BATCHED solves a batch of general systems of n linear equations on n\nvariables.\n\n\\details\nThe linear systems are of the form\n\n\\f[\nA_j X_j = B_j\n\\f]\n\nwhere \\f$A_j\\f$ is a general n-by-n matrix. Matrix \\f$A_j\\f$ is first factorized in triangular factors \\f$L_j\\f$ and \\f$U_j\\f$\nusing \\ref rocsolver_sgetrf_batched \"GETRF_BATCHED\"; then, the solutions are computed with \\ref rocsolver_sgetrs_batched \"GETRS_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe order of the system, i.e. the number of columns and rows of all A_j matrices.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\\n\nThe number of right hand sides, i.e., the number of columns\nof all the matrices B_j.\n@param[in]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the matrices A_j.\nOn exit, if info_j = 0, the factors L_j and U_j of the LU decomposition of A_j returned by\n\\ref rocsolver_sgetrf_batched \"GETRF_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nThe leading dimension of matrices A_j.\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU (the size depends on the value of strideP).\\n\nThe vectors ipiv_j of pivot indices returned by \\ref rocsolver_sgetrf_batched \"GETRF_BATCHED\".\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= n.\n@param[in,out]\nB           Array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*nrhs.\\n\nOn entry, the right hand side matrices B_j.\nOn exit, the solution matrix X_j of each system in the batch.\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nThe leading dimension of matrices B_j.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for A_j.\nIf info[i] = j > 0, U_i is singular, and the solution could not be computed.\nU_j[i,i] is the first zero element in the diagonal.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of instances (systems) in the batch."]
+    #[doc = " @{\n\\brief GESV_BATCHED solves a batch of general systems of n linear equations on n\nvariables.\n\n\\details\nThe linear systems are of the form\n\n\\f[\nA_l X_l = B_l\n\\f]\n\nwhere \\f$A_l\\f$ is a general n-by-n matrix. Matrix \\f$A_l\\f$ is first factorized in triangular factors \\f$L_l\\f$ and \\f$U_l\\f$\nusing \\ref rocsolver_sgetrf_batched \"GETRF_BATCHED\"; then, the solutions are computed with \\ref rocsolver_sgetrs_batched \"GETRS_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\nThe order of the system, i.e. the number of columns and rows of all A_l matrices.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\nThe number of right hand sides, i.e., the number of columns\nof all the matrices B_l.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the matrices A_l.\nOn exit, if info[l] = 0, the factors L_l and U_l of the LU decomposition of A_l returned by\n\\ref rocsolver_sgetrf_batched \"GETRF_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= n.\nThe leading dimension of matrices A_l.\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU (the size depends on the value of strideP).\nThe vectors ipiv_l of pivot indices returned by \\ref rocsolver_sgetrf_batched \"GETRF_BATCHED\".\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= n.\n@param[inout]\nB           Array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*nrhs.\nOn entry, the right hand side matrices B_l.\nOn exit, the solution matrix X_l of each system in the batch.\n@param[in]\nldb         rocblas_int. ldb >= n.\nThe leading dimension of matrices B_l.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for A_l.\nIf info[l] = i > 0, U_l is singular, and the solution could not be computed.\nU_l[i,i] is the first zero element in the diagonal.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of instances (systems) in the batch."]
     pub fn rocsolver_sgesv_batched(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -4911,7 +4930,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GESV_STRIDED_BATCHED solves a batch of general systems of n linear equations\non n variables.\n\n\\details\nThe linear systems are of the form\n\n\\f[\nA_j X_j = B_j\n\\f]\n\nwhere \\f$A_j\\f$ is a general n-by-n matrix. Matrix \\f$A_j\\f$ is first factorized in triangular factors \\f$L_j\\f$ and \\f$U_j\\f$\nusing \\ref rocsolver_sgetrf_strided_batched \"GETRF_STRIDED_BATCHED\"; then, the solutions are computed with\n\\ref rocsolver_sgetrs_strided_batched \"GETRS_STRIDED_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe order of the system, i.e. the number of columns and rows of all A_j matrices.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\\n\nThe number of right hand sides, i.e., the number of columns\nof all the matrices B_j.\n@param[in]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_j.\nOn exit, if info_j = 0, the factors L_j and U_j of the LU decomposition of A_j returned by\n\\ref rocsolver_sgetrf_strided_batched \"GETRF_STRIDED_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nThe leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU (the size depends on the value of strideP).\\n\nThe vectors ipiv_j of pivot indices returned by \\ref rocsolver_sgetrf_strided_batched \"GETRF_STRIDED_BATCHED\".\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= n.\n@param[in,out]\nB           pointer to type. Array on the GPU (size depends on the value of strideB).\\n\nOn entry, the right hand side matrices B_j.\nOn exit, the solution matrix X_j of each system in the batch.\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nThe leading dimension of matrices B_j.\n@param[in]\nstrideB     rocblas_stride.\\n\nStride from the start of one matrix B_j to the next one B_(j+1).\nThere is no restriction for the value of strideB. Normal use case is strideB >= ldb*nrhs.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for A_j.\nIf info[i] = j > 0, U_i is singular, and the solution could not be computed.\nU_j[i,i] is the first zero element in the diagonal.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of instances (systems) in the batch."]
+    #[doc = " @{\n\\brief GESV_STRIDED_BATCHED solves a batch of general systems of n linear equations\non n variables.\n\n\\details\nThe linear systems are of the form\n\n\\f[\nA_l X_l = B_l\n\\f]\n\nwhere \\f$A_l\\f$ is a general n-by-n matrix. Matrix \\f$A_l\\f$ is first factorized in triangular factors \\f$L_l\\f$ and \\f$U_l\\f$\nusing \\ref rocsolver_sgetrf_strided_batched \"GETRF_STRIDED_BATCHED\"; then, the solutions are computed with\n\\ref rocsolver_sgetrs_strided_batched \"GETRS_STRIDED_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\nThe order of the system, i.e. the number of columns and rows of all A_l matrices.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\nThe number of right hand sides, i.e., the number of columns\nof all the matrices B_l.\n@param[in]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the matrices A_l.\nOn exit, if info[l] = 0, the factors L_l and U_l of the LU decomposition of A_l returned by\n\\ref rocsolver_sgetrf_strided_batched \"GETRF_STRIDED_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= n.\nThe leading dimension of matrices A_l.\n@param[inout]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU (the size depends on the value of strideP).\nThe vectors ipiv_l of pivot indices returned by \\ref rocsolver_sgetrf_strided_batched \"GETRF_STRIDED_BATCHED\".\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= n.\n@param[inout]\nB           pointer to type. Array on the GPU (size depends on the value of strideB).\nOn entry, the right hand side matrices B_l.\nOn exit, the solution matrix X_l of each system in the batch.\n@param[in]\nldb         rocblas_int. ldb >= n.\nThe leading dimension of matrices B_l.\n@param[in]\nstrideB     rocblas_stride.\nStride from the start of one matrix B_l to the next one B_(l+1).\nThere is no restriction for the value of strideB. Normal use case is strideB >= ldb*nrhs.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for A_l.\nIf info[l] = i > 0, U_l is singular, and the solution could not be computed.\nU_l[i,i] is the first zero element in the diagonal.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of instances (systems) in the batch."]
     pub fn rocsolver_sgesv_strided_batched(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -4984,7 +5003,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GETRI inverts a general n-by-n matrix A using the LU factorization\ncomputed by \\ref rocsolver_sgetrf \"GETRF\".\n\n\\details\nThe inverse is computed by solving the linear system\n\n\\f[\nA^{-1}L = U^{-1}\n\\f]\n\nwhere L is the lower triangular factor of A with unit diagonal elements, and U is the\nupper triangular factor.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the factors L and U of the factorization A = P*L*U returned by \\ref rocsolver_sgetrf \"GETRF\".\nOn exit, the inverse of A if info = 0; otherwise undefined.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to rocblas_int. Array on the GPU of dimension n.\\n\nThe pivot indices returned by \\ref rocsolver_sgetrf \"GETRF\".\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0, U is singular. U[i,i] is the first zero pivot."]
+    #[doc = " @{\n\\brief GETRI inverts a general n-by-n matrix A using the LU factorization\ncomputed by \\ref rocsolver_sgetrf \"GETRF\".\n\n\\details\nThe inverse is computed by solving the linear system\n\n\\f[\nA^{-1}L = U^{-1}\n\\f]\n\nwhere L is the lower triangular factor of A with unit diagonal elements, and U is the\nupper triangular factor.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the factors L and U of the factorization A = P*L*U returned by \\ref rocsolver_sgetrf \"GETRF\".\nOn exit, the inverse of A if info = 0; otherwise undefined.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to rocblas_int. Array on the GPU of dimension n.\nThe pivot indices returned by \\ref rocsolver_sgetrf \"GETRF\".\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0, U is singular. U[i,i] is the first zero pivot."]
     pub fn rocsolver_sgetri(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -5029,7 +5048,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GETRI_BATCHED inverts a batch of general n-by-n matrices using\nthe LU factorization computed by \\ref rocsolver_sgetrf_batched \"GETRF_BATCHED\".\n\n\\details\nThe inverse of matrix \\f$A_j\\f$ in the batch is computed by solving the linear system\n\n\\f[\nA_j^{-1} L_j = U_j^{-1}\n\\f]\n\nwhere \\f$L_j\\f$ is the lower triangular factor of \\f$A_j\\f$ with unit diagonal elements, and \\f$U_j\\f$ is the\nupper triangular factor.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of all matrices A_j in the batch.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the factors L_j and U_j of the factorization A = P_j*L_j*U_j returned by\n\\ref rocsolver_sgetrf_batched \"GETRF_BATCHED\".\nOn exit, the inverses of A_j if info[j] = 0; otherwise undefined.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nipiv        pointer to rocblas_int. Array on the GPU (the size depends on the value of strideP).\\n\nThe pivot indices returned by \\ref rocsolver_sgetrf_batched \"GETRF_BATCHED\".\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(i+j).\nThere is no restriction for the value of strideP. Normal use case is strideP >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for inversion of A_j.\nIf info[j] = i > 0, U_j is singular. U_j[i,i] is the first zero pivot.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GETRI_BATCHED inverts a batch of general n-by-n matrices using\nthe LU factorization computed by \\ref rocsolver_sgetrf_batched \"GETRF_BATCHED\".\n\n\\details\nThe inverse of matrix \\f$A_l\\f$ in the batch is computed by solving the linear system\n\n\\f[\nA_l^{-1} L_l^{} = U_l^{-1}\n\\f]\n\nwhere \\f$L_l\\f$ is the lower triangular factor of \\f$A_l\\f$ with unit diagonal elements, and \\f$U_l\\f$ is the\nupper triangular factor.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of all matrices A_l in the batch.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the factors L_l and U_l of the factorization A_l = P_l*L_l*U_l returned by\n\\ref rocsolver_sgetrf_batched \"GETRF_BATCHED\".\nOn exit, the inverses of A_l if info[l] = 0; otherwise undefined.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nipiv        pointer to rocblas_int. Array on the GPU (the size depends on the value of strideP).\nThe pivot indices returned by \\ref rocsolver_sgetrf_batched \"GETRF_BATCHED\".\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+j).\nThere is no restriction for the value of strideP. Normal use case is strideP >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for inversion of A_l.\nIf info[l] = i > 0, U_l is singular. U_l[i,i] is the first zero pivot.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgetri_batched(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -5082,7 +5101,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GETRI_STRIDED_BATCHED inverts a batch of general n-by-n matrices\nusing the LU factorization computed by \\ref rocsolver_sgetrf_strided_batched \"GETRF_STRIDED_BATCHED\".\n\n\\details\nThe inverse of matrix \\f$A_j\\f$ in the batch is computed by solving the linear system\n\n\\f[\nA_j^{-1} L_j = U_j^{-1}\n\\f]\n\nwhere \\f$L_j\\f$ is the lower triangular factor of \\f$A_j\\f$ with unit diagonal elements, and \\f$U_j\\f$ is the\nupper triangular factor.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of all matrices A_j in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the factors L_j and U_j of the factorization A_j = P_j*L_j*U_j returned by\n\\ref rocsolver_sgetrf_strided_batched \"GETRF_STRIDED_BATCHED\".\nOn exit, the inverses of A_j if info[j] = 0; otherwise undefined.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n\n@param[in]\nipiv        pointer to rocblas_int. Array on the GPU (the size depends on the value of strideP).\\n\nThe pivot indices returned by \\ref rocsolver_sgetrf_strided_batched \"GETRF_STRIDED_BATCHED\".\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for inversion of A_j.\nIf info[j] = i > 0, U_j is singular. U_j[i,i] is the first zero pivot.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GETRI_STRIDED_BATCHED inverts a batch of general n-by-n matrices\nusing the LU factorization computed by \\ref rocsolver_sgetrf_strided_batched \"GETRF_STRIDED_BATCHED\".\n\n\\details\nThe inverse of matrix \\f$A_l\\f$ in the batch is computed by solving the linear system\n\n\\f[\nA_l^{-1} L_l^{} = U_l^{-1}\n\\f]\n\nwhere \\f$L_l\\f$ is the lower triangular factor of \\f$A_l\\f$ with unit diagonal elements, and \\f$U_l\\f$ is the\nupper triangular factor.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of all matrices A_l in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the factors L_l and U_l of the factorization A_l = P_l*L_l*U_l returned by\n\\ref rocsolver_sgetrf_strided_batched \"GETRF_STRIDED_BATCHED\".\nOn exit, the inverses of A_l if info[l] = 0; otherwise undefined.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n\n@param[in]\nipiv        pointer to rocblas_int. Array on the GPU (the size depends on the value of strideP).\nThe pivot indices returned by \\ref rocsolver_sgetrf_strided_batched \"GETRF_STRIDED_BATCHED\".\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for inversion of A_l.\nIf info[l] = i > 0, U_l is singular. U_l[i,i] is the first zero pivot.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgetri_strided_batched(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -5139,7 +5158,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GETRI_NPVT inverts a general n-by-n matrix A using the LU factorization\ncomputed by \\ref rocsolver_sgetrf_npvt \"GETRF_NPVT\".\n\n\\details\nThe inverse is computed by solving the linear system\n\n\\f[\nA^{-1}L = U^{-1}\n\\f]\n\nwhere L is the lower triangular factor of A with unit diagonal elements, and U is the\nupper triangular factor.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the factors L and U of the factorization A = L*U returned by \\ref rocsolver_sgetrf_npvt \"GETRF_NPVT\".\nOn exit, the inverse of A if info = 0; otherwise undefined.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0, U is singular. U[i,i] is the first zero pivot."]
+    #[doc = " @{\n\\brief GETRI_NPVT inverts a general n-by-n matrix A using the LU factorization\ncomputed by \\ref rocsolver_sgetrf_npvt \"GETRF_NPVT\".\n\n\\details\nThe inverse is computed by solving the linear system\n\n\\f[\nA^{-1}L = U^{-1}\n\\f]\n\nwhere L is the lower triangular factor of A with unit diagonal elements, and U is the\nupper triangular factor.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the factors L and U of the factorization A = L*U returned by \\ref rocsolver_sgetrf_npvt \"GETRF_NPVT\".\nOn exit, the inverse of A if info = 0; otherwise undefined.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0, U is singular. U[i,i] is the first zero pivot."]
     pub fn rocsolver_sgetri_npvt(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -5180,7 +5199,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GETRI_NPVT_BATCHED inverts a batch of general n-by-n matrices using\nthe LU factorization computed by \\ref rocsolver_sgetrf_npvt_batched \"GETRF_NPVT_BATCHED\".\n\n\\details\nThe inverse of matrix \\f$A_j\\f$ in the batch is computed by solving the linear system\n\n\\f[\nA_j^{-1} L_j = U_j^{-1}\n\\f]\n\nwhere \\f$L_j\\f$ is the lower triangular factor of \\f$A_j\\f$ with unit diagonal elements, and \\f$U_j\\f$ is the\nupper triangular factor.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of all matrices A_j in the batch.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the factors L_j and U_j of the factorization A = L_j*U_j returned by\n\\ref rocsolver_sgetrf_npvt_batched \"GETRF_NPVT_BATCHED\".\nOn exit, the inverses of A_j if info[j] = 0; otherwise undefined.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for inversion of A_j.\nIf info[j] = i > 0, U_j is singular. U_j[i,i] is the first zero pivot.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GETRI_NPVT_BATCHED inverts a batch of general n-by-n matrices using\nthe LU factorization computed by \\ref rocsolver_sgetrf_npvt_batched \"GETRF_NPVT_BATCHED\".\n\n\\details\nThe inverse of matrix \\f$A_l\\f$ in the batch is computed by solving the linear system\n\n\\f[\nA_l^{-1} L_l^{} = U_l^{-1}\n\\f]\n\nwhere \\f$L_l\\f$ is the lower triangular factor of \\f$A_l\\f$ with unit diagonal elements, and \\f$U_l\\f$ is the\nupper triangular factor.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of all matrices A_l in the batch.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the factors L_l and U_l of the factorization A_l = L_l*U_l returned by\n\\ref rocsolver_sgetrf_npvt_batched \"GETRF_NPVT_BATCHED\".\nOn exit, the inverses of A_l if info[l] = 0; otherwise undefined.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for inversion of A_l.\nIf info[l] = i > 0, U_l is singular. U_l[i,i] is the first zero pivot.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgetri_npvt_batched(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -5225,7 +5244,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GETRI_NPVT_STRIDED_BATCHED inverts a batch of general n-by-n matrices\nusing the LU factorization computed by \\ref rocsolver_sgetrf_npvt_strided_batched \"GETRF_NPVT_STRIDED_BATCHED\".\n\n\\details\nThe inverse of matrix \\f$A_j\\f$ in the batch is computed by solving the linear system\n\n\\f[\nA_j^{-1} L_j = U_j^{-1}\n\\f]\n\nwhere \\f$L_j\\f$ is the lower triangular factor of \\f$A_j\\f$ with unit diagonal elements, and \\f$U_j\\f$ is the\nupper triangular factor.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of all matrices A_j in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the factors L_j and U_j of the factorization A_j = L_j*U_j returned by\n\\ref rocsolver_sgetrf_npvt_strided_batched \"GETRF_NPVT_STRIDED_BATCHED\".\nOn exit, the inverses of A_j if info[j] = 0; otherwise undefined.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for inversion of A_j.\nIf info[j] = i > 0, U_j is singular. U_j[i,i] is the first zero pivot.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GETRI_NPVT_STRIDED_BATCHED inverts a batch of general n-by-n matrices\nusing the LU factorization computed by \\ref rocsolver_sgetrf_npvt_strided_batched \"GETRF_NPVT_STRIDED_BATCHED\".\n\n\\details\nThe inverse of matrix \\f$A_l\\f$ in the batch is computed by solving the linear system\n\n\\f[\nA_l^{-1} L_l^{} = U_l^{-1}\n\\f]\n\nwhere \\f$L_l\\f$ is the lower triangular factor of \\f$A_l\\f$ with unit diagonal elements, and \\f$U_l\\f$ is the\nupper triangular factor.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of all matrices A_l in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the factors L_l and U_l of the factorization A_l = L_l*U_l returned by\n\\ref rocsolver_sgetrf_npvt_strided_batched \"GETRF_NPVT_STRIDED_BATCHED\".\nOn exit, the inverses of A_l if info[l] = 0; otherwise undefined.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for inversion of A_l.\nIf info[l] = i > 0, U_l is singular. U_l[i,i] is the first zero pivot.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgetri_npvt_strided_batched(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -5274,7 +5293,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GELS solves an overdetermined (or underdetermined) linear system defined by an m-by-n\nmatrix A, and a corresponding matrix B, using the QR factorization computed by \\ref rocsolver_sgeqrf \"GEQRF\" (or the LQ\nfactorization computed by \\ref rocsolver_sgelqf \"GELQF\").\n\n\\details\nDepending on the value of trans, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = B & \\: \\text{not transposed, or}\\\\\nA' X = B & \\: \\text{transposed if real, or conjugate transposed if complex}\n\\end{array}\n\\f]\n\nIf m >= n (or m < n in the case of transpose/conjugate transpose), the system is overdetermined\nand a least-squares solution approximating X is found by minimizing\n\n\\f[\n|| B - A  X || \\quad \\text{(or} \\: || B - A' X ||\\text{)}\n\\f]\n\nIf m < n (or m >= n in the case of transpose/conjugate transpose), the system is underdetermined\nand a unique solution for X is chosen such that \\f$|| X ||\\f$ is minimal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\ntrans       rocblas_operation.\\n\nSpecifies the form of the system of equations.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of matrix A.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\\n\nThe number of columns of matrices B and X;\ni.e., the columns on the right hand side.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A.\nOn exit, the QR (or LQ) factorization of A as returned by \\ref rocsolver_sgeqrf \"GEQRF\" (or \\ref rocsolver_sgelqf \"GELQF\").\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrix A.\n@param[inout]\nB           pointer to type. Array on the GPU of dimension ldb*nrhs.\\n\nOn entry, the matrix B.\nOn exit, when info = 0, B is overwritten by the solution vectors (and the residuals in\nthe overdetermined cases) stored as columns.\n@param[in]\nldb         rocblas_int. ldb >= max(m,n).\\n\nSpecifies the leading dimension of matrix B.\n@param[out]\ninfo        pointer to rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0, the solution could not be computed because input matrix A is\nrank deficient; the i-th diagonal element of its triangular factor is zero."]
+    #[doc = " @{\n\\brief GELS solves an overdetermined (or underdetermined) linear system defined by an m-by-n\nmatrix A, and a corresponding matrix B, using the QR factorization computed by \\ref rocsolver_sgeqrf \"GEQRF\" (or the LQ\nfactorization computed by \\ref rocsolver_sgelqf \"GELQF\").\n\n\\details\nDepending on the value of trans, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = B & \\: \\text{not transposed, or}\\\\\nA' X = B & \\: \\text{transposed if real, or conjugate transposed if complex}\n\\end{array}\n\\f]\n\nIf m >= n (or m < n in the case of transpose/conjugate transpose), the system is overdetermined\nand a least-squares solution approximating X is found by minimizing\n\n\\f[\n|| B - A  X || \\quad \\text{(or} \\: || B - A' X ||\\text{)}\n\\f]\n\nIf m < n (or m >= n in the case of transpose/conjugate transpose), the system is underdetermined\nand a unique solution for X is chosen such that \\f$|| X ||\\f$ is minimal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\ntrans       rocblas_operation.\nSpecifies the form of the system of equations.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of matrix A.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\nThe number of columns of matrices B and X;\ni.e., the columns on the right hand side.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A.\nOn exit, the QR (or LQ) factorization of A as returned by \\ref rocsolver_sgeqrf \"GEQRF\" (or \\ref rocsolver_sgelqf \"GELQF\").\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrix A.\n@param[inout]\nB           pointer to type. Array on the GPU of dimension ldb*nrhs.\nOn entry, the matrix B.\nOn exit, when info = 0, B is overwritten by the solution vectors (and the residuals in\nthe overdetermined cases) stored as columns.\n@param[in]\nldb         rocblas_int. ldb >= max(m,n).\nSpecifies the leading dimension of matrix B.\n@param[out]\ninfo        pointer to rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0, the solution could not be computed because input matrix A is\nrank deficient; the i-th diagonal element of its triangular factor is zero."]
     pub fn rocsolver_sgels(
         handle: rocblas_handle,
         trans: rocblas_operation,
@@ -5335,7 +5354,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GELS_BATCHED solves a batch of overdetermined (or underdetermined) linear systems\ndefined by a set of m-by-n matrices \\f$A_j\\f$, and corresponding matrices \\f$B_j\\f$, using the\nQR factorizations computed by \\ref rocsolver_sgeqrf_batched \"GEQRF_BATCHED\" (or the LQ factorizations computed by \\ref rocsolver_sgelqf_batched \"GELQF_BATCHED\").\n\n\\details\nFor each instance in the batch, depending on the value of trans, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = B_j & \\: \\text{not transposed, or}\\\\\nA_j' X_j = B_j & \\: \\text{transposed if real, or conjugate transposed if complex}\n\\end{array}\n\\f]\n\nIf m >= n (or m < n in the case of transpose/conjugate transpose), the system is overdetermined\nand a least-squares solution approximating X_j is found by minimizing\n\n\\f[\n|| B_j - A_j  X_j || \\quad \\text{(or} \\: || B_j - A_j' X_j ||\\text{)}\n\\f]\n\nIf m < n (or m >= n in the case of transpose/conjugate transpose), the system is underdetermined\nand a unique solution for X_j is chosen such that \\f$|| X_j ||\\f$ is minimal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\ntrans       rocblas_operation.\\n\nSpecifies the form of the system of equations.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all matrices A_j in the batch.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\\n\nThe number of columns of all matrices B_j and X_j in the batch;\ni.e., the columns on the right hand side.\n@param[inout]\nA           array of pointer to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the matrices A_j.\nOn exit, the QR (or LQ) factorizations of A_j as returned by \\ref rocsolver_sgeqrf_batched \"GEQRF_BATCHED\"\n(or \\ref rocsolver_sgelqf_batched \"GELQF_BATCHED\").\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[inout]\nB           array of pointer to type. Each pointer points to an array on the GPU of dimension ldb*nrhs.\\n\nOn entry, the matrices B_j.\nOn exit, when info[j] = 0, B_j is overwritten by the solution vectors (and the residuals in\nthe overdetermined cases) stored as columns.\n@param[in]\nldb         rocblas_int. ldb >= max(m,n).\\n\nSpecifies the leading dimension of matrices B_j.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for solution of A_j.\nIf info[j] = i > 0, the solution of A_j could not be computed because input\nmatrix A_j is rank deficient; the i-th diagonal element of its triangular factor is zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GELS_BATCHED solves a batch of overdetermined (or underdetermined) linear systems\ndefined by a set of m-by-n matrices \\f$A_l\\f$, and corresponding matrices \\f$B_l\\f$, using the\nQR factorizations computed by \\ref rocsolver_sgeqrf_batched \"GEQRF_BATCHED\" (or the LQ factorizations computed by \\ref rocsolver_sgelqf_batched \"GELQF_BATCHED\").\n\n\\details\nFor each instance in the batch, depending on the value of trans, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = B_l & \\: \\text{not transposed, or}\\\\\nA_l' X_l^{} = B_l^{} & \\: \\text{transposed if real, or conjugate transposed if complex}\n\\end{array}\n\\f]\n\nIf m >= n (or m < n in the case of transpose/conjugate transpose), the system is overdetermined\nand a least-squares solution approximating X_l is found by minimizing\n\n\\f[\n|| B_l - A_l  X_l || \\quad \\text{(or} \\: || B_l^{} - A_l' X_l^{} ||\\text{)}\n\\f]\n\nIf m < n (or m >= n in the case of transpose/conjugate transpose), the system is underdetermined\nand a unique solution for X_l is chosen such that \\f$|| X_l ||\\f$ is minimal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\ntrans       rocblas_operation.\nSpecifies the form of the system of equations.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all matrices A_l in the batch.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\nThe number of columns of all matrices B_l and X_l in the batch;\ni.e., the columns on the right hand side.\n@param[inout]\nA           array of pointer to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the matrices A_l.\nOn exit, the QR (or LQ) factorizations of A_l as returned by \\ref rocsolver_sgeqrf_batched \"GEQRF_BATCHED\"\n(or \\ref rocsolver_sgelqf_batched \"GELQF_BATCHED\").\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[inout]\nB           array of pointer to type. Each pointer points to an array on the GPU of dimension ldb*nrhs.\nOn entry, the matrices B_l.\nOn exit, when info[l] = 0, B_l is overwritten by the solution vectors (and the residuals in\nthe overdetermined cases) stored as columns.\n@param[in]\nldb         rocblas_int. ldb >= max(m,n).\nSpecifies the leading dimension of matrices B_l.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for solution of A_l.\nIf info[l] = i > 0, the solution of A_l could not be computed because input\nmatrix A_l is rank deficient; the i-th diagonal element of its triangular factor is zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgels_batched(
         handle: rocblas_handle,
         trans: rocblas_operation,
@@ -5400,7 +5419,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GELS_STRIDED_BATCHED solves a batch of overdetermined (or underdetermined) linear\nsystems defined by a set of m-by-n matrices \\f$A_j\\f$, and corresponding matrices \\f$B_j\\f$,\nusing the QR factorizations computed by \\ref rocsolver_sgeqrf_strided_batched \"GEQRF_STRIDED_BATCHED\"\n(or the LQ factorizations computed by \\ref rocsolver_sgelqf_strided_batched \"GELQF_STRIDED_BATCHED\").\n\n\\details\nFor each instance in the batch, depending on the value of trans, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = B_j & \\: \\text{not transposed, or}\\\\\nA_j' X_j = B_j & \\: \\text{transposed if real, or conjugate transposed if complex}\n\\end{array}\n\\f]\n\nIf m >= n (or m < n in the case of transpose/conjugate transpose), the system is overdetermined\nand a least-squares solution approximating X_j is found by minimizing\n\n\\f[\n|| B_j - A_j  X_j || \\quad \\text{(or} \\: || B_j - A_j' X_j ||\\text{)}\n\\f]\n\nIf m < n (or m >= n in the case of transpose/conjugate transpose), the system is underdetermined\nand a unique solution for X_j is chosen such that \\f$|| X_j ||\\f$ is minimal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\ntrans       rocblas_operation.\\n\nSpecifies the form of the system of equations.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all matrices A_j in the batch.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\\n\nThe number of columns of all matrices B_j and X_j in the batch;\ni.e., the columns on the right hand side.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_j.\nOn exit, the QR (or LQ) factorizations of A_j as returned by \\ref rocsolver_sgeqrf_strided_batched \"GEQRF_STRIDED_BATCHED\"\n(or \\ref rocsolver_sgelqf_strided_batched \"GELQF_STRIDED_BATCHED\").\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n\n@param[inout]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\\n\nOn entry, the matrices B_j.\nOn exit, when info[j] = 0, each B_j is overwritten by the solution vectors (and the residuals in\nthe overdetermined cases) stored as columns.\n@param[in]\nldb         rocblas_int. ldb >= max(m,n).\\n\nSpecifies the leading dimension of matrices B_j.\n@param[in]\nstrideB     rocblas_stride.\\n\nStride from the start of one matrix B_j to the next one B_(j+1).\nThere is no restriction for the value of strideB. Normal use case is strideB >= ldb*nrhs\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for solution of A_j.\nIf info[j] = i > 0, the solution of A_j could not be computed because input\nmatrix A_j is rank deficient; the i-th diagonal element of its triangular factor is zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GELS_STRIDED_BATCHED solves a batch of overdetermined (or underdetermined) linear\nsystems defined by a set of m-by-n matrices \\f$A_l\\f$, and corresponding matrices \\f$B_l\\f$,\nusing the QR factorizations computed by \\ref rocsolver_sgeqrf_strided_batched \"GEQRF_STRIDED_BATCHED\"\n(or the LQ factorizations computed by \\ref rocsolver_sgelqf_strided_batched \"GELQF_STRIDED_BATCHED\").\n\n\\details\nFor each instance in the batch, depending on the value of trans, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = B_l & \\: \\text{not transposed, or}\\\\\nA_l' X_l^{} = B_l^{} & \\: \\text{transposed if real, or conjugate transposed if complex}\n\\end{array}\n\\f]\n\nIf m >= n (or m < n in the case of transpose/conjugate transpose), the system is overdetermined\nand a least-squares solution approximating X_l is found by minimizing\n\n\\f[\n|| B_l - A_l  X_l || \\quad \\text{(or} \\: || B_l^{} - A_l' X_l^{} ||\\text{)}\n\\f]\n\nIf m < n (or m >= n in the case of transpose/conjugate transpose), the system is underdetermined\nand a unique solution for X_l is chosen such that \\f$|| X_l ||\\f$ is minimal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\ntrans       rocblas_operation.\nSpecifies the form of the system of equations.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all matrices A_l in the batch.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\nThe number of columns of all matrices B_l and X_l in the batch;\ni.e., the columns on the right hand side.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the matrices A_l.\nOn exit, the QR (or LQ) factorizations of A_l as returned by \\ref rocsolver_sgeqrf_strided_batched \"GEQRF_STRIDED_BATCHED\"\n(or \\ref rocsolver_sgelqf_strided_batched \"GELQF_STRIDED_BATCHED\").\n@param[in]\nlda         rocblas_int. lda >= m.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n\n@param[inout]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\nOn entry, the matrices B_l.\nOn exit, when info[l] = 0, each B_l is overwritten by the solution vectors (and the residuals in\nthe overdetermined cases) stored as columns.\n@param[in]\nldb         rocblas_int. ldb >= max(m,n).\nSpecifies the leading dimension of matrices B_l.\n@param[in]\nstrideB     rocblas_stride.\nStride from the start of one matrix B_l to the next one B_(l+1).\nThere is no restriction for the value of strideB. Normal use case is strideB >= ldb*nrhs\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for solution of A_l.\nIf info[l] = i > 0, the solution of A_l could not be computed because input\nmatrix A_l is rank deficient; the i-th diagonal element of its triangular factor is zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgels_strided_batched(
         handle: rocblas_handle,
         trans: rocblas_operation,
@@ -5473,7 +5492,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief POTF2 computes the Cholesky factorization of a real symmetric (complex\nHermitian) positive definite matrix A.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization has the form:\n\n\\f[\n\\begin{array}{cl}\nA = U'U & \\: \\text{if uplo is upper, or}\\\\\nA = LL' & \\: \\text{if uplo is lower.}\n\\end{array}\n\\f]\n\nU is an upper triangular matrix and L is lower triangular.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A to be factored. On exit, the lower or upper triangular factor.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful factorization of matrix A.\nIf info = i > 0, the leading minor of order i of A is not positive definite.\nThe factorization stopped at this point."]
+    #[doc = " @{\n\\brief POTF2 computes the Cholesky factorization of a real symmetric (complex\nHermitian) positive definite matrix A.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization has the form:\n\n\\f[\n\\begin{array}{cl}\nA = U'U & \\: \\text{if uplo is upper, or}\\\\\nA = LL' & \\: \\text{if uplo is lower.}\n\\end{array}\n\\f]\n\nU is an upper triangular matrix and L is lower triangular.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A to be factored. On exit, the lower or upper triangular factor.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful factorization of matrix A.\nIf info = i > 0, the leading minor of order i of A is not positive definite.\nThe factorization stopped at this point."]
     pub fn rocsolver_spotf2(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -5518,7 +5537,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief POTF2_BATCHED computes the Cholesky factorization of a\nbatch of real symmetric (complex Hermitian) positive definite matrices.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form:\n\n\\f[\n\\begin{array}{cl}\nA_j = U_j'U_j & \\: \\text{if uplo is upper, or}\\\\\nA_j = L_jL_j' & \\: \\text{if uplo is lower.}\n\\end{array}\n\\f]\n\n\\f$U_j\\f$ is an upper triangular matrix and \\f$L_j\\f$ is lower triangular.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of matrix A_j.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the matrices A_j to be factored. On exit, the upper or lower triangular factors.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A_j.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful factorization of matrix A_j.\nIf info[j] = i > 0, the leading minor of order i of A_j is not positive definite.\nThe j-th factorization stopped at this point.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief POTF2_BATCHED computes the Cholesky factorization of a\nbatch of real symmetric (complex Hermitian) positive definite matrices.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form:\n\n\\f[\n\\begin{array}{cl}\nA_l^{} = U_l'U_l^{} & \\: \\text{if uplo is upper, or}\\\\\nA_l^{} = L_l^{}L_l' & \\: \\text{if uplo is lower.}\n\\end{array}\n\\f]\n\n\\f$U_l\\f$ is an upper triangular matrix and \\f$L_l\\f$ is lower triangular.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of matrix A_l.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the matrices A_l to be factored. On exit, the upper or lower triangular factors.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A_l.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful factorization of matrix A_l.\nIf info[l] = i > 0, the leading minor of order i of A_l is not positive definite.\nThe l-th factorization stopped at this point.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_spotf2_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -5567,7 +5586,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief POTF2_STRIDED_BATCHED computes the Cholesky factorization of a\nbatch of real symmetric (complex Hermitian) positive definite matrices.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form:\n\n\\f[\n\\begin{array}{cl}\nA_j = U_j'U_j & \\: \\text{if uplo is upper, or}\\\\\nA_j = L_jL_j' & \\: \\text{if uplo is lower.}\n\\end{array}\n\\f]\n\n\\f$U_j\\f$ is an upper triangular matrix and \\f$L_j\\f$ is lower triangular.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of matrix A_j.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_j to be factored. On exit, the upper or lower triangular factors.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A_j.\n@param[in]\nstrideA    rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful factorization of matrix A_j.\nIf info[j] = i > 0, the leading minor of order i of A_j is not positive definite.\nThe j-th factorization stopped at this point.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief POTF2_STRIDED_BATCHED computes the Cholesky factorization of a\nbatch of real symmetric (complex Hermitian) positive definite matrices.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form:\n\n\\f[\n\\begin{array}{cl}\nA_l^{} = U_l'U_l^{} & \\: \\text{if uplo is upper, or}\\\\\nA_l^{} = L_l^{}L_l' & \\: \\text{if uplo is lower.}\n\\end{array}\n\\f]\n\n\\f$U_l\\f$ is an upper triangular matrix and \\f$L_l\\f$ is lower triangular.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of matrix A_l.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the matrices A_l to be factored. On exit, the upper or lower triangular factors.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A_l.\n@param[in]\nstrideA    rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful factorization of matrix A_l.\nIf info[l] = i > 0, the leading minor of order i of A_l is not positive definite.\nThe l-th factorization stopped at this point.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_spotf2_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -5620,7 +5639,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief POTRF computes the Cholesky factorization of a real symmetric (complex\nHermitian) positive definite matrix A.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization has the form:\n\n\\f[\n\\begin{array}{cl}\nA = U'U & \\: \\text{if uplo is upper, or}\\\\\nA = LL' & \\: \\text{if uplo is lower.}\n\\end{array}\n\\f]\n\nU is an upper triangular matrix and L is lower triangular.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A to be factored. On exit, the lower or upper triangular factor.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful factorization of matrix A.\nIf info = i > 0, the leading minor of order i of A is not positive definite.\nThe factorization stopped at this point."]
+    #[doc = " @{\n\\brief POTRF computes the Cholesky factorization of a real symmetric (complex\nHermitian) positive definite matrix A.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization has the form:\n\n\\f[\n\\begin{array}{cl}\nA = U'U & \\: \\text{if uplo is upper, or}\\\\\nA = LL' & \\: \\text{if uplo is lower.}\n\\end{array}\n\\f]\n\nU is an upper triangular matrix and L is lower triangular.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A to be factored. On exit, the lower or upper triangular factor.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful factorization of matrix A.\nIf info = i > 0, the leading minor of order i of A is not positive definite.\nThe factorization stopped at this point."]
     pub fn rocsolver_spotrf(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -5665,7 +5684,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief POTRF_BATCHED computes the Cholesky factorization of a\nbatch of real symmetric (complex Hermitian) positive definite matrices.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form:\n\n\\f[\n\\begin{array}{cl}\nA_j = U_j'U_j & \\: \\text{if uplo is upper, or}\\\\\nA_j = L_jL_j' & \\: \\text{if uplo is lower.}\n\\end{array}\n\\f]\n\n\\f$U_j\\f$ is an upper triangular matrix and \\f$L_j\\f$ is lower triangular.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of matrix A_j.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the matrices A_j to be factored. On exit, the upper or lower triangular factors.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A_j.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful factorization of matrix A_j.\nIf info[j] = i > 0, the leading minor of order i of A_j is not positive definite.\nThe j-th factorization stopped at this point.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief POTRF_BATCHED computes the Cholesky factorization of a\nbatch of real symmetric (complex Hermitian) positive definite matrices.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form:\n\n\\f[\n\\begin{array}{cl}\nA_l^{} = U_l'U_l^{} & \\: \\text{if uplo is upper, or}\\\\\nA_l^{} = L_l^{}L_l' & \\: \\text{if uplo is lower.}\n\\end{array}\n\\f]\n\n\\f$U_l\\f$ is an upper triangular matrix and \\f$L_l\\f$ is lower triangular.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of matrix A_l.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the matrices A_l to be factored. On exit, the upper or lower triangular factors.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A_l.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful factorization of matrix A_l.\nIf info[l] = i > 0, the leading minor of order i of A_l is not positive definite.\nThe l-th factorization stopped at this point.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_spotrf_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -5714,7 +5733,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief POTRF_STRIDED_BATCHED computes the Cholesky factorization of a\nbatch of real symmetric (complex Hermitian) positive definite matrices.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization of matrix \\f$A_j\\f$ in the batch has the form:\n\n\\f[\n\\begin{array}{cl}\nA_j = U_j'U_j & \\: \\text{if uplo is upper, or}\\\\\nA_j = L_jL_j' & \\: \\text{if uplo is lower.}\n\\end{array}\n\\f]\n\n\\f$U_j\\f$ is an upper triangular matrix and \\f$L_j\\f$ is lower triangular.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of matrix A_j.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_j to be factored. On exit, the upper or lower triangular factors.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful factorization of matrix A_j.\nIf info[j] = i > 0, the leading minor of order i of A_j is not positive definite.\nThe j-th factorization stopped at this point.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief POTRF_STRIDED_BATCHED computes the Cholesky factorization of a\nbatch of real symmetric (complex Hermitian) positive definite matrices.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization of matrix \\f$A_l\\f$ in the batch has the form:\n\n\\f[\n\\begin{array}{cl}\nA_l^{} = U_l'U_l^{} & \\: \\text{if uplo is upper, or}\\\\\nA_l^{} = L_l^{}L_l' & \\: \\text{if uplo is lower.}\n\\end{array}\n\\f]\n\n\\f$U_l\\f$ is an upper triangular matrix and \\f$L_l\\f$ is lower triangular.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of matrix A_l.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the matrices A_l to be factored. On exit, the upper or lower triangular factors.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful factorization of matrix A_l.\nIf info[l] = i > 0, the leading minor of order i of A_l is not positive definite.\nThe l-th factorization stopped at this point.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_spotrf_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -5767,7 +5786,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief POTRS solves a symmetric/hermitian system of n linear equations on n variables in its factorized form.\n\n\\details\nIt solves the system\n\n\\f[\nA X = B\n\\f]\n\nwhere A is a real symmetric (complex hermitian) positive definite matrix defined by its triangular factor\n\n\\f[\n\\begin{array}{cl}\nA = U'U & \\: \\text{if uplo is upper, or}\\\\\nA = LL' & \\: \\text{if uplo is lower.}\n\\end{array}\n\\f]\n\nas returned by \\ref rocsolver_spotrf \"POTRF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe order of the system, i.e. the number of columns and rows of A.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\\n\nThe number of right hand sides, i.e., the number of columns\nof the matrix B.\n@param[in]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nThe factor L or U of the Cholesky factorization of A returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nThe leading dimension of A.\n@param[in,out]\nB           pointer to type. Array on the GPU of dimension ldb*nrhs.\\n\nOn entry, the right hand side matrix B.\nOn exit, the solution matrix X.\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nThe leading dimension of B."]
+    #[doc = " @{\n\\brief POTRS solves a symmetric/hermitian system of n linear equations on n variables in its factorized form.\n\n\\details\nIt solves the system\n\n\\f[\nA X = B\n\\f]\n\nwhere A is a real symmetric (complex hermitian) positive definite matrix defined by its triangular factor\n\n\\f[\n\\begin{array}{cl}\nA = U'U & \\: \\text{if uplo is upper, or}\\\\\nA = LL' & \\: \\text{if uplo is lower.}\n\\end{array}\n\\f]\n\nas returned by \\ref rocsolver_spotrf \"POTRF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe order of the system, i.e. the number of columns and rows of A.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\nThe number of right hand sides, i.e., the number of columns\nof the matrix B.\n@param[in]\nA           pointer to type. Array on the GPU of dimension lda*n.\nThe factor L or U of the Cholesky factorization of A returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nlda         rocblas_int. lda >= n.\nThe leading dimension of A.\n@param[inout]\nB           pointer to type. Array on the GPU of dimension ldb*nrhs.\nOn entry, the right hand side matrix B.\nOn exit, the solution matrix X.\n@param[in]\nldb         rocblas_int. ldb >= n.\nThe leading dimension of B."]
     pub fn rocsolver_spotrs(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -5820,7 +5839,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief POTRS_BATCHED solves a batch of symmetric/hermitian systems of n linear equations on n\nvariables in its factorized forms.\n\n\\details\nFor each instance j in the batch, it solves the system\n\n\\f[\nA_j X_j = B_j\n\\f]\n\nwhere \\f$A_j\\f$ is a real symmetric (complex hermitian) positive definite matrix defined by its\ntriangular factor\n\n\\f[\n\\begin{array}{cl}\nA_j = U_j'U_j & \\: \\text{if uplo is upper, or}\\\\\nA_j = L_jL_j' & \\: \\text{if uplo is lower.}\n\\end{array}\n\\f]\n\nas returned by \\ref rocsolver_spotrf \"POTRF_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe order of the system, i.e. the number of columns and rows of all A_j matrices.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\\n\nThe number of right hand sides, i.e., the number of columns\nof all the matrices B_j.\n@param[in]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nThe factor L_j or U_j of the Cholesky factorization of A_j returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nThe leading dimension of matrices A_j.\n@param[in,out]\nB           Array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*nrhs.\\n\nOn entry, the right hand side matrices B_j.\nOn exit, the solution matrix X_j of each system in the batch.\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nThe leading dimension of matrices B_j.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of instances (systems) in the batch."]
+    #[doc = " @{\n\\brief POTRS_BATCHED solves a batch of symmetric/hermitian systems of n linear equations on n\nvariables in its factorized forms.\n\n\\details\nFor each instance l in the batch, it solves the system\n\n\\f[\nA_l X_l = B_l\n\\f]\n\nwhere \\f$A_l\\f$ is a real symmetric (complex hermitian) positive definite matrix defined by its\ntriangular factor\n\n\\f[\n\\begin{array}{cl}\nA_l^{} = U_l'U_l^{} & \\: \\text{if uplo is upper, or}\\\\\nA_l^{} = L_l^{}L_l' & \\: \\text{if uplo is lower.}\n\\end{array}\n\\f]\n\nas returned by \\ref rocsolver_spotrf \"POTRF_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe order of the system, i.e. the number of columns and rows of all A_l matrices.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\nThe number of right hand sides, i.e., the number of columns\nof all the matrices B_l.\n@param[in]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nThe factor L_l or U_l of the Cholesky factorization of A_l returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= n.\nThe leading dimension of matrices A_l.\n@param[inout]\nB           Array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*nrhs.\nOn entry, the right hand side matrices B_l.\nOn exit, the solution matrix X_l of each system in the batch.\n@param[in]\nldb         rocblas_int. ldb >= n.\nThe leading dimension of matrices B_l.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of instances (systems) in the batch."]
     pub fn rocsolver_spotrs_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -5877,7 +5896,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief POTRS_STRIDED_BATCHED solves a batch of symmetric/hermitian systems of n linear equations\non n variables in its factorized forms.\n\n\\details\nFor each instance j in the batch, it solves the system\n\n\\f[\nA_j X_j = B_j\n\\f]\n\nwhere \\f$A_j\\f$ is a real symmetric (complex hermitian) positive definite matrix defined by its\ntriangular factor\n\n\\f[\n\\begin{array}{cl}\nA_j = U_j'U_j & \\: \\text{if uplo is upper, or}\\\\\nA_j = L_jL_j' & \\: \\text{if uplo is lower.}\n\\end{array}\n\\f]\n\nas returned by \\ref rocsolver_spotrf \"POTRF_STRIDED_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe order of the system, i.e. the number of columns and rows of all A_j matrices.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\\n\nThe number of right hand sides, i.e., the number of columns\nof all the matrices B_j.\n@param[in]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nThe factor L_j or U_j of the Cholesky factorization of A_j returned by \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nThe leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[in,out]\nB           pointer to type. Array on the GPU (size depends on the value of strideB).\\n\nOn entry, the right hand side matrices B_j.\nOn exit, the solution matrix X_j of each system in the batch.\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nThe leading dimension of matrices B_j.\n@param[in]\nstrideB     rocblas_stride.\\n\nStride from the start of one matrix B_j to the next one B_(j+1).\nThere is no restriction for the value of strideB. Normal use case is strideB >= ldb*nrhs.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of instances (systems) in the batch."]
+    #[doc = " @{\n\\brief POTRS_STRIDED_BATCHED solves a batch of symmetric/hermitian systems of n linear equations\non n variables in its factorized forms.\n\n\\details\nFor each instance l in the batch, it solves the system\n\n\\f[\nA_l X_l = B_l\n\\f]\n\nwhere \\f$A_l\\f$ is a real symmetric (complex hermitian) positive definite matrix defined by its\ntriangular factor\n\n\\f[\n\\begin{array}{cl}\nA_l^{} = U_l'U_l^{} & \\: \\text{if uplo is upper, or}\\\\\nA_l^{} = L_l^{}L_l' & \\: \\text{if uplo is lower.}\n\\end{array}\n\\f]\n\nas returned by \\ref rocsolver_spotrf \"POTRF_STRIDED_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe order of the system, i.e. the number of columns and rows of all A_l matrices.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\nThe number of right hand sides, i.e., the number of columns\nof all the matrices B_l.\n@param[in]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nThe factor L_l or U_l of the Cholesky factorization of A_l returned by \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= n.\nThe leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[inout]\nB           pointer to type. Array on the GPU (size depends on the value of strideB).\nOn entry, the right hand side matrices B_l.\nOn exit, the solution matrix X_l of each system in the batch.\n@param[in]\nldb         rocblas_int. ldb >= n.\nThe leading dimension of matrices B_l.\n@param[in]\nstrideB     rocblas_stride.\nStride from the start of one matrix B_l to the next one B_(l+1).\nThere is no restriction for the value of strideB. Normal use case is strideB >= ldb*nrhs.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of instances (systems) in the batch."]
     pub fn rocsolver_spotrs_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -5942,7 +5961,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief POSV solves a symmetric/hermitian system of n linear equations on n variables.\n\n\\details\nIt solves the system\n\n\\f[\nA X = B\n\\f]\n\nwhere A is a real symmetric (complex hermitian) positive definite matrix. Matrix A is first\nfactorized as \\f$A=LL'\\f$ or \\f$A=U'U\\f$, depending on the value of uplo, using \\ref rocsolver_spotrf \"POTRF\";\nthen, the solution is computed with \\ref rocsolver_spotrs \"POTRS\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe order of the system, i.e. the number of columns and rows of A.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\\n\nThe number of right hand sides, i.e., the number of columns\nof the matrix B.\n@param[in]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the symmetric/hermitian matrix A.\nOn exit, if info = 0, the factor L or U of the Cholesky factorization of A returned by\n\\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nThe leading dimension of A.\n@param[in,out]\nB           pointer to type. Array on the GPU of dimension ldb*nrhs.\\n\nOn entry, the right hand side matrix B.\nOn exit, the solution matrix X.\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nThe leading dimension of B.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0, the leading minor of order i of A is not positive definite.\nThe solution could not be computed."]
+    #[doc = " @{\n\\brief POSV solves a symmetric/hermitian system of n linear equations on n variables.\n\n\\details\nIt solves the system\n\n\\f[\nA X = B\n\\f]\n\nwhere A is a real symmetric (complex hermitian) positive definite matrix. Matrix A is first\nfactorized as \\f$A=LL'\\f$ or \\f$A=U'U\\f$, depending on the value of uplo, using \\ref rocsolver_spotrf \"POTRF\";\nthen, the solution is computed with \\ref rocsolver_spotrs \"POTRS\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe order of the system, i.e. the number of columns and rows of A.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\nThe number of right hand sides, i.e., the number of columns\nof the matrix B.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the symmetric/hermitian matrix A.\nOn exit, if info = 0, the factor L or U of the Cholesky factorization of A returned by\n\\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nlda         rocblas_int. lda >= n.\nThe leading dimension of A.\n@param[inout]\nB           pointer to type. Array on the GPU of dimension ldb*nrhs.\nOn entry, the right hand side matrix B.\nOn exit, the solution matrix X.\n@param[in]\nldb         rocblas_int. ldb >= n.\nThe leading dimension of B.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0, the leading minor of order i of A is not positive definite.\nThe solution could not be computed."]
     pub fn rocsolver_sposv(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -5999,7 +6018,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief POSV_BATCHED solves a batch of symmetric/hermitian systems of n linear equations on n\nvariables.\n\n\\details\nFor each instance j in the batch, it solves the system\n\n\\f[\nA_j X_j = B_j\n\\f]\n\nwhere \\f$A_j\\f$ is a real symmetric (complex hermitian) positive definite matrix. Matrix \\f$A_j\\f$ is first\nfactorized as \\f$A_j=L_jL_j'\\f$ or \\f$A_j=U_j'U_j\\f$, depending on the value of uplo, using \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\";\nthen, the solution is computed with \\ref rocsolver_spotrs_batched \"POTRS_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe order of the system, i.e. the number of columns and rows of all A_j matrices.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\\n\nThe number of right hand sides, i.e., the number of columns\nof all the matrices B_j.\n@param[in]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the symmetric/hermitian matrices A_j.\nOn exit, if info[j] = 0, the factor L_j or U_j of the Cholesky factorization of A_j returned by\n\\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nThe leading dimension of matrices A_j.\n@param[in,out]\nB           Array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*nrhs.\\n\nOn entry, the right hand side matrices B_j.\nOn exit, the solution matrix X_j of each system in the batch.\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nThe leading dimension of matrices B_j.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit.\nIf info[j] = i > 0, the leading minor of order i of A_j is not positive definite.\nThe j-th solution could not be computed.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of instances (systems) in the batch."]
+    #[doc = " @{\n\\brief POSV_BATCHED solves a batch of symmetric/hermitian systems of n linear equations on n\nvariables.\n\n\\details\nFor each instance l in the batch, it solves the system\n\n\\f[\nA_l X_l = B_l\n\\f]\n\nwhere \\f$A_l\\f$ is a real symmetric (complex hermitian) positive definite matrix. Matrix \\f$A_l\\f$ is first\nfactorized as \\f$A_l^{}=L_l^{}L_l'\\f$ or \\f$A_l^{}=U_l'U_l^{}\\f$, depending on the value of uplo, using \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\";\nthen, the solution is computed with \\ref rocsolver_spotrs_batched \"POTRS_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe order of the system, i.e. the number of columns and rows of all A_l matrices.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\nThe number of right hand sides, i.e., the number of columns\nof all the matrices B_l.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the symmetric/hermitian matrices A_l.\nOn exit, if info[l] = 0, the factor L_l or U_l of the Cholesky factorization of A_l returned by\n\\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= n.\nThe leading dimension of matrices A_l.\n@param[inout]\nB           Array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*nrhs.\nOn entry, the right hand side matrices B_l.\nOn exit, the solution matrix X_l of each system in the batch.\n@param[in]\nldb         rocblas_int. ldb >= n.\nThe leading dimension of matrices B_l.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit.\nIf info[l] = i > 0, the leading minor of order i of A_l is not positive definite.\nThe l-th solution could not be computed.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of instances (systems) in the batch."]
     pub fn rocsolver_sposv_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -6060,7 +6079,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief POSV_STRIDED_BATCHED solves a batch of symmetric/hermitian systems of n linear equations\non n variables.\n\n\\details\nFor each instance j in the batch, it solves the system\n\n\\f[\nA_j X_j = B_j\n\\f]\n\nwhere \\f$A_j\\f$ is a real symmetric (complex hermitian) positive definite matrix. Matrix \\f$A_j\\f$ is first\nfactorized as \\f$A_j=L_jL_j'\\f$ or \\f$A_j=U_j'U_j\\f$, depending on the value of uplo, using \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\";\nthen, the solution is computed with \\ref rocsolver_spotrs_strided_batched \"POTRS_STRIDED_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe order of the system, i.e. the number of columns and rows of all A_j matrices.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\\n\nThe number of right hand sides, i.e., the number of columns\nof all the matrices B_j.\n@param[in]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the symmetric/hermitian matrices A_j.\nOn exit, if info[j] = 0, the factor L_j or U_j of the Cholesky factorization of A_j returned by\n\\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nThe leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[in,out]\nB           pointer to type. Array on the GPU (size depends on the value of strideB).\\n\nOn entry, the right hand side matrices B_j.\nOn exit, the solution matrix X_j of each system in the batch.\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nThe leading dimension of matrices B_j.\n@param[in]\nstrideB     rocblas_stride.\\n\nStride from the start of one matrix B_j to the next one B_(j+1).\nThere is no restriction for the value of strideB. Normal use case is strideB >= ldb*nrhs.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit.\nIf info[j] = i > 0, the leading minor of order i of A_j is not positive definite.\nThe j-th solution could not be computed.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of instances (systems) in the batch."]
+    #[doc = " @{\n\\brief POSV_STRIDED_BATCHED solves a batch of symmetric/hermitian systems of n linear equations\non n variables.\n\n\\details\nFor each instance l in the batch, it solves the system\n\n\\f[\nA_l X_l = B_l\n\\f]\n\nwhere \\f$A_l\\f$ is a real symmetric (complex hermitian) positive definite matrix. Matrix \\f$A_l\\f$ is first\nfactorized as \\f$A_l^{}=L_l^{}L_l'\\f$ or \\f$A_l^{}=U_l'U_l^{}\\f$, depending on the value of uplo, using \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\";\nthen, the solution is computed with \\ref rocsolver_spotrs_strided_batched \"POTRS_STRIDED_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe order of the system, i.e. the number of columns and rows of all A_l matrices.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\nThe number of right hand sides, i.e., the number of columns\nof all the matrices B_l.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the symmetric/hermitian matrices A_l.\nOn exit, if info[l] = 0, the factor L_l or U_l of the Cholesky factorization of A_l returned by\n\\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= n.\nThe leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[inout]\nB           pointer to type. Array on the GPU (size depends on the value of strideB).\nOn entry, the right hand side matrices B_l.\nOn exit, the solution matrix X_l of each system in the batch.\n@param[in]\nldb         rocblas_int. ldb >= n.\nThe leading dimension of matrices B_l.\n@param[in]\nstrideB     rocblas_stride.\nStride from the start of one matrix B_l to the next one B_(l+1).\nThere is no restriction for the value of strideB. Normal use case is strideB >= ldb*nrhs.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit.\nIf info[l] = i > 0, the leading minor of order i of A_l is not positive definite.\nThe l-th solution could not be computed.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of instances (systems) in the batch."]
     pub fn rocsolver_sposv_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -6129,7 +6148,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief POTRI inverts a symmetric/hermitian positive definite matrix A.\n\n\\details\nThe inverse of matrix \\f$A\\f$ is computed as\n\n\\f[\n\\begin{array}{cl}\nA^{-1} = U^{-1} {U^{-1}}' & \\: \\text{if uplo is upper, or}\\\\\nA^{-1} = {L^{-1}}' L^{-1} & \\: \\text{if uplo is lower.}\n\\end{array}\n\\f]\n\nwhere \\f$U\\f$ or \\f$L\\f$ is the triangular factor of the Cholesky factorization of \\f$A\\f$ returned by\n\\ref rocsolver_spotrf \"POTRF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the factor L or U of the Cholesky factorization of A returned by\n\\ref rocsolver_spotrf \"POTRF\".\nOn exit, the inverse of A if info = 0.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit for inversion of A.\nIf info = i > 0, A is singular. L[i,i] or U[i,i] is zero."]
+    #[doc = " @{\n\\brief POTRI inverts a symmetric/hermitian positive definite matrix A.\n\n\\details\nThe inverse of matrix \\f$A\\f$ is computed as\n\n\\f[\n\\begin{array}{cl}\nA^{-1} = U^{-1} {U^{-1}}' & \\: \\text{if uplo is upper, or}\\\\\nA^{-1} = {L^{-1}}' L^{-1} & \\: \\text{if uplo is lower.}\n\\end{array}\n\\f]\n\nwhere \\f$U\\f$ or \\f$L\\f$ is the triangular factor of the Cholesky factorization of \\f$A\\f$ returned by\n\\ref rocsolver_spotrf \"POTRF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the factor L or U of the Cholesky factorization of A returned by\n\\ref rocsolver_spotrf \"POTRF\".\nOn exit, the inverse of A if info = 0.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit for inversion of A.\nIf info = i > 0, A is singular. L[i,i] or U[i,i] is zero."]
     pub fn rocsolver_spotri(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -6174,7 +6193,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief POTRI_BATCHED inverts a batch of symmetric/hermitian positive definite matrices \\f$A_j\\f$.\n\n\\details\nThe inverse of matrix \\f$A_j\\f$ in the batch is computed as\n\n\\f[\n\\begin{array}{cl}\nA_j^{-1} = U_j^{-1} {U_j^{-1}}' & \\: \\text{if uplo is upper, or}\\\\\nA_j^{-1} = {L_j^{-1}}' L_j^{-1} & \\: \\text{if uplo is lower.}\n\\end{array}\n\\f]\n\nwhere \\f$U_j\\f$ or \\f$L_j\\f$ is the triangular factor of the Cholesky factorization of \\f$A_j\\f$ returned by\n\\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of matrix A_j.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the factor L_j or U_j of the Cholesky factorization of A_j returned by\n\\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\nOn exit, the inverses of A_j if info[j] = 0.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A_j.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for inversion of A_j.\nIf info[j] = i > 0, A_j is singular. L_j[i,i] or U_j[i,i] is zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief POTRI_BATCHED inverts a batch of symmetric/hermitian positive definite matrices \\f$A_l\\f$.\n\n\\details\nThe inverse of matrix \\f$A_l\\f$ in the batch is computed as\n\n\\f[\n\\begin{array}{cl}\nA_l^{-1} = U_l^{-1} {U_l^{-1}}' & \\: \\text{if uplo is upper, or}\\\\\nA_l^{-1} = {L_l^{-1}}' L_l^{-1} & \\: \\text{if uplo is lower.}\n\\end{array}\n\\f]\n\nwhere \\f$U_l\\f$ or \\f$L_l\\f$ is the triangular factor of the Cholesky factorization of \\f$A_l\\f$ returned by\n\\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of matrix A_l.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the factor L_l or U_l of the Cholesky factorization of A_l returned by\n\\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\nOn exit, the inverses of A_l if info[l] = 0.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A_l.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for inversion of A_l.\nIf info[l] = i > 0, A_l is singular. L_l[i,i] or U_l[i,i] is zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_spotri_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -6223,7 +6242,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief POTRI_STRIDED_BATCHED inverts a batch of symmetric/hermitian positive definite matrices \\f$A_j\\f$.\n\n\\details\nThe inverse of matrix \\f$A_j\\f$ in the batch is computed as\n\n\\f[\n\\begin{array}{cl}\nA_j^{-1} = U_j^{-1} {U_j^{-1}}' & \\: \\text{if uplo is upper, or}\\\\\nA_j^{-1} = {L_j^{-1}}' L_j^{-1} & \\: \\text{if uplo is lower.}\n\\end{array}\n\\f]\n\nwhere \\f$U_j\\f$ or \\f$L_j\\f$ is the triangular factor of the Cholesky factorization of \\f$A_j\\f$ returned by\n\\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of matrix A_j.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the factor L_j or U_j of the Cholesky factorization of A_j returned by\n\\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\nOn exit, the inverses of A_j if info[j] = 0.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for inversion of A_j.\nIf info[j] = i > 0, A_j is singular. L_j[i,i] or U_j[i,i] is zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief POTRI_STRIDED_BATCHED inverts a batch of symmetric/hermitian positive definite matrices \\f$A_l\\f$.\n\n\\details\nThe inverse of matrix \\f$A_l\\f$ in the batch is computed as\n\n\\f[\n\\begin{array}{cl}\nA_l^{-1} = U_l^{-1} {U_l^{-1}}' & \\: \\text{if uplo is upper, or}\\\\\nA_l^{-1} = {L_l^{-1}}' L_l^{-1} & \\: \\text{if uplo is lower.}\n\\end{array}\n\\f]\n\nwhere \\f$U_l\\f$ or \\f$L_l\\f$ is the triangular factor of the Cholesky factorization of \\f$A_l\\f$ returned by\n\\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the factorization is upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of matrix A_l.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the factor L_l or U_l of the Cholesky factorization of A_l returned by\n\\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\nOn exit, the inverses of A_l if info[l] = 0.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for inversion of A_l.\nIf info[l] = i > 0, A_l is singular. L_l[i,i] or U_l[i,i] is zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_spotri_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -6276,7 +6295,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GESVD computes the singular values and optionally the singular\nvectors of a general m-by-n matrix A (Singular Value Decomposition).\n\n\\details\nThe SVD of matrix A is given by:\n\n\\f[\nA = U  S  V'\n\\f]\n\nwhere the m-by-n matrix S is zero except, possibly, for its min(m,n)\ndiagonal elements, which are the singular values of A. U and V are orthogonal\n(unitary) matrices. The first min(m,n) columns of U and V are the left and\nright singular vectors of A, respectively.\n\nThe computation of the singular vectors is optional and it is controlled by\nthe function arguments left_svect and right_svect as described below. When\ncomputed, this function returns the transpose (or transpose conjugate) of the\nright singular vectors, i.e. the rows of V'.\n\nleft_svect and right_svect are #rocblas_svect enums that can take the\nfollowing values:\n\n- rocblas_svect_all: the entire matrix U (or V') is computed,\n- rocblas_svect_singular: only the singular vectors (first min(m,n)\ncolumns of U or rows of V') are computed,\n- rocblas_svect_overwrite: the first\ncolumns (or rows) of A are overwritten with the singular vectors, or\n- rocblas_svect_none: no columns (or rows) of U (or V') are computed, i.e.\nno singular vectors.\n\nleft_svect and right_svect cannot both be set to overwrite. When neither is\nset to overwrite, the contents of A are destroyed by the time the function\nreturns.\n\n\\note\nWhen m >> n (or n >> m) the algorithm could be sped up by compressing\nthe matrix A via a QR (or LQ) factorization, and working with the triangular\nfactor afterwards (thin-SVD). If the singular vectors are also requested, its\ncomputation could be sped up as well via executing some intermediate\noperations out-of-place, and relying more on matrix multiplications (GEMMs);\nthis will require, however, a larger memory workspace. The parameter fast_alg\ncontrols whether the fast algorithm is executed or not. For more details, see\nthe \"Tuning rocSOLVER performance\" and \"Memory model\" sections of the documentation.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nleft_svect  #rocblas_svect.\\n\nSpecifies how the left singular vectors are computed.\n@param[in]\nright_svect #rocblas_svect.\\n\nSpecifies how the right singular vectors are computed.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A.\nOn exit, if left_svect (or right_svect) is equal to overwrite,\nthe first columns (or rows) contain the left (or right) singular vectors;\notherwise, the contents of A are destroyed.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nThe leading dimension of A.\n@param[out]\nS           pointer to real type. Array on the GPU of dimension min(m,n). \\n\nThe singular values of A in decreasing order.\n@param[out]\nU           pointer to type. Array on the GPU of dimension ldu*min(m,n) if\nleft_svect is set to singular, or ldu*m when left_svect is equal to all.\\n\nThe matrix of left singular vectors stored as columns. Not\nreferenced if left_svect is set to overwrite or none.\n@param[in]\nldu         rocblas_int. ldu >= m if left_svect is all or singular; ldu >= 1 otherwise.\\n\nThe leading dimension of U.\n@param[out]\nV           pointer to type. Array on the GPU of dimension ldv*n. \\n\nThe matrix of right singular vectors stored as rows (transposed / conjugate-transposed).\nNot referenced if right_svect is set to overwrite or none.\n@param[in]\nldv         rocblas_int. ldv >= n if right_svect is all; ldv >= min(m,n) if right_svect is\nset to singular; or ldv >= 1 otherwise.\\n\nThe leading dimension of V.\n@param[out]\nE           pointer to real type. Array on the GPU of dimension min(m,n)-1.\\n\nThis array is used to work internally with the bidiagonal matrix\nB associated with A (using \\ref rocsolver_sbdsqr \"BDSQR\"). On exit, if info > 0, it contains the\nunconverged off-diagonal elements of B (or properly speaking, a bidiagonal\nmatrix orthogonally equivalent to B). The diagonal elements of this matrix\nare in S; those that converged correspond to a subset of the singular values\nof A (not necessarily ordered).\n@param[in]\nfast_alg    #rocblas_workmode. \\n\nIf set to rocblas_outofplace, the function will execute the\nfast thin-SVD version of the algorithm when possible.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0, \\ref rocsolver_sbdsqr \"BDSQR\" did not converge. i elements of E did not converge to zero."]
+    #[doc = " @{\n\\brief GESVD computes the singular values and optionally the singular\nvectors of a general m-by-n matrix A (Singular Value Decomposition).\n\n\\details\nThe SVD of matrix A is given by:\n\n\\f[\nA = U  S  V'\n\\f]\n\nwhere the m-by-n matrix S is zero except, possibly, for its min(m,n)\ndiagonal elements, which are the singular values of A. U and V are orthogonal\n(unitary) matrices. The first min(m,n) columns of U and V are the left and\nright singular vectors of A, respectively.\n\nThe computation of the singular vectors is optional and it is controlled by\nthe function arguments left_svect and right_svect as described below. When\ncomputed, this function returns the transpose (or transpose conjugate) of the\nright singular vectors, i.e. the rows of V'.\n\nleft_svect and right_svect are #rocblas_svect enums that can take the\nfollowing values:\n\n- rocblas_svect_all: the entire matrix U (or V') is computed,\n- rocblas_svect_singular: only the singular vectors (first min(m,n)\ncolumns of U or rows of V') are computed,\n- rocblas_svect_overwrite: the first\ncolumns (or rows) of A are overwritten with the singular vectors, or\n- rocblas_svect_none: no columns (or rows) of U (or V') are computed, i.e.\nno singular vectors.\n\nleft_svect and right_svect cannot both be set to overwrite. When neither is\nset to overwrite, the contents of A are destroyed by the time the function\nreturns.\n\n\\note\nWhen m >> n (or n >> m) the algorithm could be sped up by compressing\nthe matrix A via a QR (or LQ) factorization, and working with the triangular\nfactor afterwards (thin-SVD). If the singular vectors are also requested, its\ncomputation could be sped up as well via executing some intermediate\noperations out-of-place, and relying more on matrix multiplications (GEMMs);\nthis will require, however, a larger memory workspace. The parameter fast_alg\ncontrols whether the fast algorithm is executed or not. For more details, see\nthe \"Tuning rocSOLVER performance\" and \"Memory model\" sections of the documentation.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nleft_svect  #rocblas_svect.\nSpecifies how the left singular vectors are computed.\n@param[in]\nright_svect #rocblas_svect.\nSpecifies how the right singular vectors are computed.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A.\nOn exit, if left_svect (or right_svect) is equal to overwrite,\nthe first columns (or rows) contain the left (or right) singular vectors;\notherwise, the contents of A are destroyed.\n@param[in]\nlda         rocblas_int. lda >= m.\nThe leading dimension of A.\n@param[out]\nS           pointer to real type. Array on the GPU of dimension min(m,n).\nThe singular values of A in decreasing order.\n@param[out]\nU           pointer to type. Array on the GPU of dimension ldu*min(m,n) if\nleft_svect is set to singular, or ldu*m when left_svect is equal to all.\nThe matrix of left singular vectors stored as columns. Not\nreferenced if left_svect is set to overwrite or none.\n@param[in]\nldu         rocblas_int. ldu >= m if left_svect is all or singular; ldu >= 1 otherwise.\nThe leading dimension of U.\n@param[out]\nV           pointer to type. Array on the GPU of dimension ldv*n.\nThe matrix of right singular vectors stored as rows (transposed / conjugate-transposed).\nNot referenced if right_svect is set to overwrite or none.\n@param[in]\nldv         rocblas_int. ldv >= n if right_svect is all; ldv >= min(m,n) if right_svect is\nset to singular; or ldv >= 1 otherwise.\nThe leading dimension of V.\n@param[out]\nE           pointer to real type. Array on the GPU of dimension min(m,n)-1.\nThis array is used to work internally with the bidiagonal matrix\nB associated with A (using \\ref rocsolver_sbdsqr \"BDSQR\"). On exit, if info > 0, it contains the\nunconverged off-diagonal elements of B (or properly speaking, a bidiagonal\nmatrix orthogonally equivalent to B). The diagonal elements of this matrix\nare in S; those that converged correspond to a subset of the singular values\nof A (not necessarily ordered).\n@param[in]\nfast_alg    #rocblas_workmode.\nIf set to rocblas_outofplace, the function will execute the\nfast thin-SVD version of the algorithm when possible.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0, \\ref rocsolver_sbdsqr \"BDSQR\" did not converge. i elements of E did not converge to zero."]
     pub fn rocsolver_sgesvd(
         handle: rocblas_handle,
         left_svect: rocblas_svect,
@@ -6357,7 +6376,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GESVD_BATCHED computes the singular values and optionally the\nsingular vectors of a batch of general m-by-n matrix A (Singular Value\nDecomposition).\n\n\\details\nThe SVD of matrix A_j in the batch is given by:\n\n\\f[\nA_j = U_j  S_j  V_j'\n\\f]\n\nwhere the m-by-n matrix \\f$S_j\\f$ is zero except, possibly, for its min(m,n)\ndiagonal elements, which are the singular values of \\f$A_j\\f$. \\f$U_j\\f$ and \\f$V_j\\f$ are\northogonal (unitary) matrices. The first min(m,n) columns of \\f$U_j\\f$ and \\f$V_j\\f$ are\nthe left and right singular vectors of \\f$A_j\\f$, respectively.\n\nThe computation of the singular vectors is optional and it is controlled by\nthe function arguments left_svect and right_svect as described below. When\ncomputed, this function returns the transpose (or transpose conjugate) of the\nright singular vectors, i.e. the rows of \\f$V_j'\\f$.\n\nleft_svect and right_svect are #rocblas_svect enums that can take the\nfollowing values:\n\n- rocblas_svect_all: the entire matrix \\f$U_j\\f$ (or \\f$V_j'\\f$) is computed,\n- rocblas_svect_singular: only the singular vectors (first min(m,n)\ncolumns of \\f$U_j\\f$ or rows of \\f$V_j'\\f$) are computed,\n- rocblas_svect_overwrite: the\nfirst columns (or rows) of \\f$A_j\\f$ are overwritten with the singular vectors, or\n- rocblas_svect_none: no columns (or rows) of \\f$U_j\\f$ (or \\f$V_j'\\f$) are computed,\ni.e. no singular vectors.\n\nleft_svect and right_svect cannot both be set to overwrite. When neither is\nset to overwrite, the contents of \\f$A_j\\f$ are destroyed by the time the function\nreturns.\n\n\\note\nWhen m >> n (or n >> m) the algorithm could be sped up by compressing\nthe matrix \\f$A_j\\f$ via a QR (or LQ) factorization, and working with the\ntriangular factor afterwards (thin-SVD). If the singular vectors are also\nrequested, its computation could be sped up as well via executing some\nintermediate operations out-of-place, and relying more on matrix\nmultiplications (GEMMs); this will require, however, a larger memory\nworkspace. The parameter fast_alg controls whether the fast algorithm is\nexecuted or not. For more details, see the \"Tuning rocSOLVER performance\"\nand \"Memory model\" sections of the documentation.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nleft_svect  #rocblas_svect.\\n\nSpecifies how the left singular vectors are computed.\n@param[in]\nright_svect #rocblas_svect.\\n\nSpecifies how the right singular vectors are computed.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all matrices A_j in the batch.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on\nthe GPU of dimension lda*n.\\n\nOn entry, the matrices A_j.\nOn exit, if left_svect (or right_svect) is equal to overwrite,\nthe first columns (or rows) of A_j contain the left (or right)\ncorresponding singular vectors; otherwise, the contents of A_j are destroyed.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nThe leading dimension of A_j.\n@param[out]\nS           pointer to real type. Array on the GPU (the size depends on the value of strideS).\\n\nThe singular values of A_j in decreasing order.\n@param[in]\nstrideS     rocblas_stride.\\n\nStride from the start of one vector S_j to the next one S_(j+1).\nThere is no restriction for the value of strideS.\nNormal use case is strideS >= min(m,n).\n@param[out]\nU           pointer to type. Array on the GPU (the side depends on the value of strideU). \\n\nThe matrices U_j of left singular vectors stored as columns.\nNot referenced if left_svect is set to overwrite or none.\n@param[in]\nldu         rocblas_int. ldu >= m if left_svect is all or singular; ldu >= 1 otherwise.\\n\nThe leading dimension of U_j.\n@param[in]\nstrideU     rocblas_stride.\\n\nStride from the start of one matrix U_j to the next one U_(j+1).\nThere is no restriction for the value of strideU.\nNormal use case is strideU >= ldu*min(m,n) if left_svect is set to singular,\nor strideU >= ldu*m when left_svect is equal to all.\n@param[out]\nV           pointer to type. Array on the GPU (the size depends on the value of strideV). \\n\nThe matrices V_j of right singular vectors stored as rows (transposed / conjugate-transposed).\nNot referenced if right_svect is set to overwrite or none.\n@param[in]\nldv         rocblas_int. ldv >= n if right_svect is all; ldv >= min(m,n) if\nright_svect is set to singular; or ldv >= 1 otherwise.\\n\nThe leading dimension of V.\n@param[in]\nstrideV     rocblas_stride.\\n\nStride from the start of one matrix V_j to the next one V_(j+1).\nThere is no restriction for the value of strideV.\nNormal use case is strideV >= ldv*n.\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\\n\nThis array is used to work internally with the bidiagonal matrix B_j associated with A_j (using \\ref rocsolver_sbdsqr \"BDSQR\").\nOn exit, if info[j] > 0, E_j contains the unconverged off-diagonal elements of B_j (or properly speaking,\na bidiagonal matrix orthogonally equivalent to B_j). The diagonal elements of this matrix are in S_j;\nthose that converged correspond to a subset of the singular values of A_j (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= min(m,n)-1.\n@param[in]\nfast_alg    #rocblas_workmode. \\n\nIf set to rocblas_outofplace, the function will execute the fast thin-SVD version\nof the algorithm when possible.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info[j] = 0, successful exit.\nIf info[j] = i > 0, \\ref rocsolver_sbdsqr \"BDSQR\" did not converge. i elements of E_j did not converge to zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GESVD_BATCHED computes the singular values and optionally the\nsingular vectors of a batch of general m-by-n matrices A_l (Singular Value\nDecomposition).\n\n\\details\nThe SVD of matrix A_l in the batch is given by:\n\n\\f[\nA_l^{} = U_l^{}  S_l^{}  V_l'\n\\f]\n\nwhere the m-by-n matrix \\f$S_l\\f$ is zero except, possibly, for its min(m,n)\ndiagonal elements, which are the singular values of \\f$A_l\\f$. \\f$U_l\\f$ and \\f$V_l\\f$ are\northogonal (unitary) matrices. The first min(m,n) columns of \\f$U_l\\f$ and \\f$V_l\\f$ are\nthe left and right singular vectors of \\f$A_l\\f$, respectively.\n\nThe computation of the singular vectors is optional and it is controlled by\nthe function arguments left_svect and right_svect as described below. When\ncomputed, this function returns the transpose (or transpose conjugate) of the\nright singular vectors, i.e. the rows of \\f$V_l'\\f$.\n\nleft_svect and right_svect are #rocblas_svect enums that can take the\nfollowing values:\n\n- rocblas_svect_all: the entire matrix \\f$U_l\\f$ (or \\f$V_l'\\f$) is computed,\n- rocblas_svect_singular: only the singular vectors (first min(m,n)\ncolumns of \\f$U_l\\f$ or rows of \\f$V_l'\\f$) are computed,\n- rocblas_svect_overwrite: the\nfirst columns (or rows) of \\f$A_l\\f$ are overwritten with the singular vectors, or\n- rocblas_svect_none: no columns (or rows) of \\f$U_l\\f$ (or \\f$V_l'\\f$) are computed,\ni.e. no singular vectors.\n\nleft_svect and right_svect cannot both be set to overwrite. When neither is\nset to overwrite, the contents of \\f$A_l\\f$ are destroyed by the time the function\nreturns.\n\n\\note\nWhen m >> n (or n >> m) the algorithm could be sped up by compressing\nthe matrix \\f$A_l\\f$ via a QR (or LQ) factorization, and working with the\ntriangular factor afterwards (thin-SVD). If the singular vectors are also\nrequested, its computation could be sped up as well via executing some\nintermediate operations out-of-place, and relying more on matrix\nmultiplications (GEMMs); this will require, however, a larger memory\nworkspace. The parameter fast_alg controls whether the fast algorithm is\nexecuted or not. For more details, see the \"Tuning rocSOLVER performance\"\nand \"Memory model\" sections of the documentation.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nleft_svect  #rocblas_svect.\nSpecifies how the left singular vectors are computed.\n@param[in]\nright_svect #rocblas_svect.\nSpecifies how the right singular vectors are computed.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all matrices A_l in the batch.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on\nthe GPU of dimension lda*n.\nOn entry, the matrices A_l.\nOn exit, if left_svect (or right_svect) is equal to overwrite,\nthe first columns (or rows) of A_l contain the left (or right)\ncorresponding singular vectors; otherwise, the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= m.\nThe leading dimension of A_l.\n@param[out]\nS           pointer to real type. Array on the GPU (the size depends on the value of strideS).\nThe singular values of A_l in decreasing order.\n@param[in]\nstrideS     rocblas_stride.\nStride from the start of one vector S_l to the next one S_(l+1).\nThere is no restriction for the value of strideS.\nNormal use case is strideS >= min(m,n).\n@param[out]\nU           pointer to type. Array on the GPU (the side depends on the value of strideU).\nThe matrices U_l of left singular vectors stored as columns.\nNot referenced if left_svect is set to overwrite or none.\n@param[in]\nldu         rocblas_int. ldu >= m if left_svect is all or singular; ldu >= 1 otherwise.\nThe leading dimension of U_l.\n@param[in]\nstrideU     rocblas_stride.\nStride from the start of one matrix U_l to the next one U_(l+1).\nThere is no restriction for the value of strideU.\nNormal use case is strideU >= ldu*min(m,n) if left_svect is set to singular,\nor strideU >= ldu*m when left_svect is equal to all.\n@param[out]\nV           pointer to type. Array on the GPU (the size depends on the value of strideV).\nThe matrices V_l of right singular vectors stored as rows (transposed / conjugate-transposed).\nNot referenced if right_svect is set to overwrite or none.\n@param[in]\nldv         rocblas_int. ldv >= n if right_svect is all; ldv >= min(m,n) if\nright_svect is set to singular; or ldv >= 1 otherwise.\nThe leading dimension of V_l.\n@param[in]\nstrideV     rocblas_stride.\nStride from the start of one matrix V_l to the next one V_(l+1).\nThere is no restriction for the value of strideV.\nNormal use case is strideV >= ldv*n.\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\nThis array is used to work internally with the bidiagonal matrix B_l associated with A_l (using \\ref rocsolver_sbdsqr \"BDSQR\").\nOn exit, if info[l] > 0, E_l contains the unconverged off-diagonal elements of B_l (or properly speaking,\na bidiagonal matrix orthogonally equivalent to B_l). The diagonal elements of this matrix are in S_l;\nthose that converged correspond to a subset of the singular values of A_l (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= min(m,n)-1.\n@param[in]\nfast_alg    #rocblas_workmode.\nIf set to rocblas_outofplace, the function will execute the fast thin-SVD version\nof the algorithm when possible.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info[l] = 0, successful exit.\nIf info[l] = i > 0, \\ref rocsolver_sbdsqr \"BDSQR\" did not converge. i elements of E_l did not converge to zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgesvd_batched(
         handle: rocblas_handle,
         left_svect: rocblas_svect,
@@ -6458,7 +6477,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GESVD_STRIDED_BATCHED computes the singular values and optionally the\nsingular vectors of a batch of general m-by-n matrix A (Singular Value\nDecomposition).\n\n\\details\nThe SVD of matrix A_j in the batch is given by:\n\n\\f[\nA_j = U_j  S_j  V_j'\n\\f]\n\nwhere the m-by-n matrix \\f$S_j\\f$ is zero except, possibly, for its min(m,n)\ndiagonal elements, which are the singular values of \\f$A_j\\f$. \\f$U_j\\f$ and \\f$V_j\\f$ are\northogonal (unitary) matrices. The first min(m,n) columns of \\f$U_j\\f$ and \\f$V_j\\f$ are\nthe left and right singular vectors of \\f$A_j\\f$, respectively.\n\nThe computation of the singular vectors is optional and it is controlled by\nthe function arguments left_svect and right_svect as described below. When\ncomputed, this function returns the transpose (or transpose conjugate) of the\nright singular vectors, i.e. the rows of \\f$V_j'\\f$.\n\nleft_svect and right_svect are #rocblas_svect enums that can take the\nfollowing values:\n\n- rocblas_svect_all: the entire matrix \\f$U_j\\f$ (or \\f$V_j'\\f$) is computed,\n- rocblas_svect_singular: only the singular vectors (first min(m,n)\ncolumns of \\f$U_j\\f$ or rows of \\f$V_j'\\f$) are computed,\n- rocblas_svect_overwrite: the\nfirst columns (or rows) of \\f$A_j\\f$ are overwritten with the singular vectors, or\n- rocblas_svect_none: no columns (or rows) of \\f$U_j\\f$ (or \\f$V_j'\\f$) are computed,\ni.e. no singular vectors.\n\nleft_svect and right_svect cannot both be set to overwrite. When neither is\nset to overwrite, the contents of \\f$A_j\\f$ are destroyed by the time the function\nreturns.\n\n\\note\nWhen m >> n (or n >> m) the algorithm could be sped up by compressing\nthe matrix \\f$A_j\\f$ via a QR (or LQ) factorization, and working with the\ntriangular factor afterwards (thin-SVD). If the singular vectors are also\nrequested, its computation could be sped up as well via executing some\nintermediate operations out-of-place, and relying more on matrix\nmultiplications (GEMMs); this will require, however, a larger memory\nworkspace. The parameter fast_alg controls whether the fast algorithm is\nexecuted or not. For more details, see the \"Tuning rocSOLVER performance\"\nand \"Memory model\" sections of the documentation.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nleft_svect  #rocblas_svect.\\n\nSpecifies how the left singular vectors are computed.\n@param[in]\nright_svect #rocblas_svect.\\n\nSpecifies how the right singular vectors are computed.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all matrices A_j in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_j. On exit, if left_svect (or right_svect) is equal to\noverwrite, the first columns (or rows) of A_j contain the left (or right)\ncorresponding singular vectors; otherwise, the contents of A_j are destroyed.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nThe leading dimension of A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA.\nNormal use case is strideA >= lda*n.\n@param[out]\nS           pointer to real type. Array on the GPU (the size depends on the value of strideS).\\n\nThe singular values of A_j in decreasing order.\n@param[in]\nstrideS     rocblas_stride.\\n\nStride from the start of one vector S_j to the next one S_(j+1).\nThere is no restriction for the value of strideS.\nNormal use case is strideS >= min(m,n).\n@param[out]\nU           pointer to type. Array on the GPU (the side depends on the value of strideU). \\n\nThe matrices U_j of left singular vectors stored as columns.\nNot referenced if left_svect is set to overwrite or none.\n@param[in]\nldu         rocblas_int. ldu >= m if left_svect is all or singular; ldu >= 1 otherwise.\\n\nThe leading dimension of U_j.\n@param[in]\nstrideU     rocblas_stride.\\n\nStride from the start of one matrix U_j to the next one U_(j+1).\nThere is no restriction for the value of strideU.\nNormal use case is strideU >= ldu*min(m,n) if left_svect is set to singular,\nor strideU >= ldu*m when left_svect is equal to all.\n@param[out]\nV           pointer to type. Array on the GPU (the size depends on the value of strideV). \\n\nThe matrices V_j of right singular vectors stored as rows (transposed / conjugate-transposed).\nNot referenced if right_svect is set to overwrite or none.\n@param[in]\nldv         rocblas_int. ldv >= n if right_svect is all; ldv >= min(m,n) if right_svect is\nset to singular; or ldv >= 1 otherwise.\\n\nThe leading dimension of V.\n@param[in]\nstrideV     rocblas_stride.\\n\nStride from the start of one matrix V_j to the next one V_(j+1).\nThere is no restriction for the value of strideV.\nNormal use case is strideV >= ldv*n.\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\\n\nThis array is used to work internally with the bidiagonal matrix B_j associated with A_j (using \\ref rocsolver_sbdsqr \"BDSQR\").\nOn exit, if info > 0, E_j contains the unconverged off-diagonal elements of B_j (or properly speaking,\na bidiagonal matrix orthogonally equivalent to B_j). The diagonal elements of this matrix are in S_j;\nthose that converged correspond to a subset of the singular values of A_j (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE.\nNormal use case is strideE >= min(m,n)-1.\n@param[in]\nfast_alg    #rocblas_workmode. \\n\nIf set to rocblas_outofplace, the function will execute the fast thin-SVD version\nof the algorithm when possible.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info[j] = 0, successful exit.\nIf info[j] = i > 0, BDSQR did not converge. i elements of E_j did not converge to zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GESVD_STRIDED_BATCHED computes the singular values and optionally the\nsingular vectors of a batch of general m-by-n matrices A_l (Singular Value\nDecomposition).\n\n\\details\nThe SVD of matrix A_l in the batch is given by:\n\n\\f[\nA_l^{} = U_l^{}  S_l^{}  V_l'\n\\f]\n\nwhere the m-by-n matrix \\f$S_l\\f$ is zero except, possibly, for its min(m,n)\ndiagonal elements, which are the singular values of \\f$A_l\\f$. \\f$U_l\\f$ and \\f$V_l\\f$ are\northogonal (unitary) matrices. The first min(m,n) columns of \\f$U_l\\f$ and \\f$V_l\\f$ are\nthe left and right singular vectors of \\f$A_l\\f$, respectively.\n\nThe computation of the singular vectors is optional and it is controlled by\nthe function arguments left_svect and right_svect as described below. When\ncomputed, this function returns the transpose (or transpose conjugate) of the\nright singular vectors, i.e. the rows of \\f$V_l'\\f$.\n\nleft_svect and right_svect are #rocblas_svect enums that can take the\nfollowing values:\n\n- rocblas_svect_all: the entire matrix \\f$U_l\\f$ (or \\f$V_l'\\f$) is computed,\n- rocblas_svect_singular: only the singular vectors (first min(m,n)\ncolumns of \\f$U_l\\f$ or rows of \\f$V_l'\\f$) are computed,\n- rocblas_svect_overwrite: the\nfirst columns (or rows) of \\f$A_l\\f$ are overwritten with the singular vectors, or\n- rocblas_svect_none: no columns (or rows) of \\f$U_l\\f$ (or \\f$V_l'\\f$) are computed,\ni.e. no singular vectors.\n\nleft_svect and right_svect cannot both be set to overwrite. When neither is\nset to overwrite, the contents of \\f$A_l\\f$ are destroyed by the time the function\nreturns.\n\n\\note\nWhen m >> n (or n >> m) the algorithm could be sped up by compressing\nthe matrix \\f$A_l\\f$ via a QR (or LQ) factorization, and working with the\ntriangular factor afterwards (thin-SVD). If the singular vectors are also\nrequested, its computation could be sped up as well via executing some\nintermediate operations out-of-place, and relying more on matrix\nmultiplications (GEMMs); this will require, however, a larger memory\nworkspace. The parameter fast_alg controls whether the fast algorithm is\nexecuted or not. For more details, see the \"Tuning rocSOLVER performance\"\nand \"Memory model\" sections of the documentation.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nleft_svect  #rocblas_svect.\nSpecifies how the left singular vectors are computed.\n@param[in]\nright_svect #rocblas_svect.\nSpecifies how the right singular vectors are computed.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all matrices A_l in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the matrices A_l. On exit, if left_svect (or right_svect) is equal to\noverwrite, the first columns (or rows) of A_l contain the left (or right)\ncorresponding singular vectors; otherwise, the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= m.\nThe leading dimension of A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA.\nNormal use case is strideA >= lda*n.\n@param[out]\nS           pointer to real type. Array on the GPU (the size depends on the value of strideS).\nThe singular values of A_l in decreasing order.\n@param[in]\nstrideS     rocblas_stride.\nStride from the start of one vector S_l to the next one S_(l+1).\nThere is no restriction for the value of strideS.\nNormal use case is strideS >= min(m,n).\n@param[out]\nU           pointer to type. Array on the GPU (the side depends on the value of strideU).\nThe matrices U_l of left singular vectors stored as columns.\nNot referenced if left_svect is set to overwrite or none.\n@param[in]\nldu         rocblas_int. ldu >= m if left_svect is all or singular; ldu >= 1 otherwise.\nThe leading dimension of U_l.\n@param[in]\nstrideU     rocblas_stride.\nStride from the start of one matrix U_l to the next one U_(l+1).\nThere is no restriction for the value of strideU.\nNormal use case is strideU >= ldu*min(m,n) if left_svect is set to singular,\nor strideU >= ldu*m when left_svect is equal to all.\n@param[out]\nV           pointer to type. Array on the GPU (the size depends on the value of strideV).\nThe matrices V_l of right singular vectors stored as rows (transposed / conjugate-transposed).\nNot referenced if right_svect is set to overwrite or none.\n@param[in]\nldv         rocblas_int. ldv >= n if right_svect is all; ldv >= min(m,n) if right_svect is\nset to singular; or ldv >= 1 otherwise.\nThe leading dimension of V_l.\n@param[in]\nstrideV     rocblas_stride.\nStride from the start of one matrix V_l to the next one V_(l+1).\nThere is no restriction for the value of strideV.\nNormal use case is strideV >= ldv*n.\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\nThis array is used to work internally with the bidiagonal matrix B_l associated with A_l (using \\ref rocsolver_sbdsqr \"BDSQR\").\nOn exit, if info[l] > 0, E_l contains the unconverged off-diagonal elements of B_l (or properly speaking,\na bidiagonal matrix orthogonally equivalent to B_l). The diagonal elements of this matrix are in S_l;\nthose that converged correspond to a subset of the singular values of A_l (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE.\nNormal use case is strideE >= min(m,n)-1.\n@param[in]\nfast_alg    #rocblas_workmode.\nIf set to rocblas_outofplace, the function will execute the fast thin-SVD version\nof the algorithm when possible.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info[l] = 0, successful exit.\nIf info[l] = i > 0, BDSQR did not converge. i elements of E_l did not converge to zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgesvd_strided_batched(
         handle: rocblas_handle,
         left_svect: rocblas_svect,
@@ -6563,7 +6582,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GESVDJ computes the singular values and optionally the singular\nvectors of a general m-by-n matrix A (Singular Value Decomposition).\n\n\\details\nThe SVD of matrix A is given by:\n\n\\f[\nA = U  S  V'\n\\f]\n\nwhere the m-by-n matrix S is zero except, possibly, for its min(m,n)\ndiagonal elements, which are the singular values of A. U and V are orthogonal\n(unitary) matrices. The first min(m,n) columns of U and V are the left and\nright singular vectors of A, respectively.\n\nThe computation of the singular vectors is optional and it is controlled by\nthe function arguments left_svect and right_svect as described below. When\ncomputed, this function returns the transpose (or transpose conjugate) of the\nright singular vectors, i.e. the rows of V'.\n\nleft_svect and right_svect are #rocblas_svect enums that can take the\nfollowing values:\n\n- rocblas_svect_all: the entire matrix U (or V') is computed,\n- rocblas_svect_singular: the singular vectors (first min(m,n)\ncolumns of U or rows of V') are computed, or\n- rocblas_svect_none: no columns (or rows) of U (or V') are computed, i.e.\nno singular vectors.\n\nThe singular values are computed by applying QR factorization to AV if m >= n\n(resp. LQ factorization to U'A if m < n), where V (resp. U) is found as the\neigenvectors of A'A (resp. AA') using the Jacobi eigenvalue algorithm.\n\n\\note\nIn order to carry out calculations, this method may synchronize the stream contained within the\nrocblas_handle.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nleft_svect  #rocblas_svect.\\n\nSpecifies how the left singular vectors are computed.\nrocblas_svect_overwrite is not supported.\n@param[in]\nright_svect #rocblas_svect.\\n\nSpecifies how the right singular vectors are computed.\nrocblas_svect_overwrite is not supported.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A.\nOn exit, the contents of A are destroyed.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nThe leading dimension of A.\n@param[in]\nabstol      real type.\\n\nThe absolute tolerance. The algorithm is considered to have converged once off(A'A)\nis <= norm(A'A) * abstol [resp. off(AA') <= norm(AA') * abstol]. If abstol <= 0,\nthen the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to real type on the GPU.\\n\nThe Frobenius norm of the off-diagonal elements of A'A (resp. AA') at the final\niteration.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\\n\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to a rocblas_int on the GPU.\\n\nThe actual number of sweeps (iterations) used by the algorithm.\n@param[out]\nS           pointer to real type. Array on the GPU of dimension min(m,n). \\n\nThe singular values of A in decreasing order.\n@param[out]\nU           pointer to type. Array on the GPU of dimension ldu*min(m,n) if\nleft_svect is set to singular, or ldu*m when left_svect is equal to all.\\n\nThe matrix of left singular vectors stored as columns. Not\nreferenced if left_svect is set to none.\n@param[in]\nldu         rocblas_int. ldu >= m if left_svect is set to all or singular; ldu >= 1 otherwise.\\n\nThe leading dimension of U.\n@param[out]\nV           pointer to type. Array on the GPU of dimension ldv*n. \\n\nThe matrix of right singular vectors stored as rows (transposed / conjugate-transposed).\nNot referenced if right_svect is set to none.\n@param[in]\nldv         rocblas_int. ldv >= n if right_svect is set to all; ldv >= min(m,n) if right_svect is\nset to singular; or ldv >= 1 otherwise.\\n\nThe leading dimension of V.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit. If info = 1, the algorithm did not converge."]
+    #[doc = " @{\n\\brief GESVDJ computes the singular values and optionally the singular\nvectors of a general m-by-n matrix A (Singular Value Decomposition).\n\n\\details\nThe SVD of matrix A is given by:\n\n\\f[\nA = U  S  V'\n\\f]\n\nwhere the m-by-n matrix S is zero except, possibly, for its min(m,n)\ndiagonal elements, which are the singular values of A. U and V are orthogonal\n(unitary) matrices. The first min(m,n) columns of U and V are the left and\nright singular vectors of A, respectively.\n\nThe computation of the singular vectors is optional and it is controlled by\nthe function arguments left_svect and right_svect as described below. When\ncomputed, this function returns the transpose (or transpose conjugate) of the\nright singular vectors, i.e. the rows of V'.\n\nleft_svect and right_svect are #rocblas_svect enums that can take the\nfollowing values:\n\n- rocblas_svect_all: the entire matrix U (or V') is computed,\n- rocblas_svect_singular: the singular vectors (first min(m,n)\ncolumns of U or rows of V') are computed, or\n- rocblas_svect_none: no columns (or rows) of U (or V') are computed, i.e.\nno singular vectors.\n\nThe singular values are computed by applying QR factorization to AV if m >= n\n(resp. LQ factorization to U'A if m < n), where V (resp. U) is found as the\neigenvectors of A'A (resp. AA') using the Jacobi eigenvalue algorithm.\n\n\\note\nIn order to carry out calculations, this method may synchronize the stream contained within the\nrocblas_handle.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nleft_svect  #rocblas_svect.\nSpecifies how the left singular vectors are computed.\nrocblas_svect_overwrite is not supported.\n@param[in]\nright_svect #rocblas_svect.\nSpecifies how the right singular vectors are computed.\nrocblas_svect_overwrite is not supported.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A.\nOn exit, the contents of A are destroyed.\n@param[in]\nlda         rocblas_int. lda >= m.\nThe leading dimension of A.\n@param[in]\nabstol      real type.\nThe absolute tolerance. The algorithm is considered to have converged once off(A'A)\nis <= norm(A'A) * abstol [resp. off(AA') <= norm(AA') * abstol]. If abstol <= 0,\nthen the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to real type on the GPU.\nThe Frobenius norm of the off-diagonal elements of A'A (resp. AA') at the final\niteration.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to a rocblas_int on the GPU.\nThe actual number of sweeps (iterations) used by the algorithm.\n@param[out]\nS           pointer to real type. Array on the GPU of dimension min(m,n).\nThe singular values of A in decreasing order.\n@param[out]\nU           pointer to type. Array on the GPU of dimension ldu*min(m,n) if\nleft_svect is set to singular, or ldu*m when left_svect is equal to all.\nThe matrix of left singular vectors stored as columns. Not\nreferenced if left_svect is set to none.\n@param[in]\nldu         rocblas_int. ldu >= m if left_svect is set to all or singular; ldu >= 1 otherwise.\nThe leading dimension of U.\n@param[out]\nV           pointer to type. Array on the GPU of dimension ldv*n.\nThe matrix of right singular vectors stored as rows (transposed / conjugate-transposed).\nNot referenced if right_svect is set to none.\n@param[in]\nldv         rocblas_int. ldv >= n if right_svect is set to all; ldv >= min(m,n) if right_svect is\nset to singular; or ldv >= 1 otherwise.\nThe leading dimension of V.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit. If info = 1, the algorithm did not converge."]
     pub fn rocsolver_sgesvdj(
         handle: rocblas_handle,
         left_svect: rocblas_svect,
@@ -6652,7 +6671,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GESVDJ_BATCHED computes the singular values and optionally the\nsingular vectors of a batch of general m-by-n matrix A (Singular Value\nDecomposition).\n\n\\details\nThe SVD of matrix A_j in the batch is given by:\n\n\\f[\nA_j = U_j  S_j  V_j'\n\\f]\n\nwhere the m-by-n matrix \\f$S_j\\f$ is zero except, possibly, for its min(m,n)\ndiagonal elements, which are the singular values of \\f$A_j\\f$. \\f$U_j\\f$ and \\f$V_j\\f$ are\northogonal (unitary) matrices. The first min(m,n) columns of \\f$U_j\\f$ and \\f$V_j\\f$ are\nthe left and right singular vectors of \\f$A_j\\f$, respectively.\n\nThe computation of the singular vectors is optional and it is controlled by\nthe function arguments left_svect and right_svect as described below. When\ncomputed, this function returns the transpose (or transpose conjugate) of the\nright singular vectors, i.e. the rows of \\f$V_j'\\f$.\n\nleft_svect and right_svect are #rocblas_svect enums that can take the\nfollowing values:\n\n- rocblas_svect_all: the entire matrix \\f$U_j\\f$ (or \\f$V_j'\\f$) is computed,\n- rocblas_svect_singular: the singular vectors (first min(m,n)\ncolumns of \\f$U_j\\f$ or rows of \\f$V_j'\\f$) are computed, or\n- rocblas_svect_none: no columns (or rows) of \\f$U_j\\f$ (or \\f$V_j'\\f$) are computed,\ni.e. no singular vectors.\n\nThe singular values are computed by applying QR factorization to \\f$A_jV_j\\f$ if m >= n\n(resp. LQ factorization to \\f$U_j'A_j\\f$ if m < n), where \\f$V_j\\f$ (resp. \\f$U_j\\f$) is\nfound as the eigenvectors of \\f$A_j'A_j\\f$ (resp. \\f$A_jA_j'\\f$) using the Jacobi\neigenvalue algorithm.\n\n\\note\nIn order to carry out calculations, this method may synchronize the stream contained within the\nrocblas_handle.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nleft_svect  #rocblas_svect.\\n\nSpecifies how the left singular vectors are computed.\nrocblas_svect_overwrite is not supported.\n@param[in]\nright_svect #rocblas_svect.\\n\nSpecifies how the right singular vectors are computed.\nrocblas_svect_overwrite is not supported.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all matrices A_j in the batch.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on\nthe GPU of dimension lda*n.\\n\nOn entry, the matrices A_j.\nOn exit, the contents of A_j are destroyed.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nThe leading dimension of A_j.\n@param[in]\nabstol      real type.\\n\nThe absolute tolerance. The algorithm is considered to have converged once off(A_j'A_j)\nis <= norm(A_j'A_j) * abstol [resp. off(A_jA_j') <= norm(A_jA_j') * abstol]. If abstol <= 0,\nthen the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to real type on the GPU.\\n\nThe Frobenius norm of the off-diagonal elements of A_j'A_j (resp. A_jA_j') at the final\niteration.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\\n\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nThe actual number of sweeps (iterations) used by the algorithm for each batch instance.\n@param[out]\nS           pointer to real type. Array on the GPU (the size depends on the value of strideS).\\n\nThe singular values of A_j in decreasing order.\n@param[in]\nstrideS     rocblas_stride.\\n\nStride from the start of one vector S_j to the next one S_(j+1).\nThere is no restriction for the value of strideS.\nNormal use case is strideS >= min(m,n).\n@param[out]\nU           pointer to type. Array on the GPU (the side depends on the value of strideU). \\n\nThe matrices U_j of left singular vectors stored as columns.\nNot referenced if left_svect is set to none.\n@param[in]\nldu         rocblas_int. ldu >= m if left_svect is set to all or singular; ldu >= 1 otherwise.\\n\nThe leading dimension of U_j.\n@param[in]\nstrideU     rocblas_stride.\\n\nStride from the start of one matrix U_j to the next one U_(j+1).\nThere is no restriction for the value of strideU.\nNormal use case is strideU >= ldu*min(m,n) if left_svect is set to singular,\nor strideU >= ldu*m when left_svect is equal to all.\n@param[out]\nV           pointer to type. Array on the GPU (the size depends on the value of strideV). \\n\nThe matrices V_j of right singular vectors stored as rows (transposed / conjugate-transposed).\nNot referenced if right_svect is set to none.\n@param[in]\nldv         rocblas_int. ldv >= n if right_svect is set to all; ldv >= min(m,n) if right_svect is\nset to singular; or ldv >= 1 otherwise.\\n\nThe leading dimension of V.\n@param[in]\nstrideV     rocblas_stride.\\n\nStride from the start of one matrix V_j to the next one V_(j+1).\nThere is no restriction for the value of strideV.\nNormal use case is strideV >= ldv*n.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info[j] = 0, successful exit. If info[j] = 1, the algorithm did not converge.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GESVDJ_BATCHED computes the singular values and optionally the\nsingular vectors of a batch of general m-by-n matrix A (Singular Value\nDecomposition).\n\n\\details\nThe SVD of matrix A_l in the batch is given by:\n\n\\f[\nA_l = U_l  S_l  V_l'\n\\f]\n\nwhere the m-by-n matrix \\f$S_l\\f$ is zero except, possibly, for its min(m,n)\ndiagonal elements, which are the singular values of \\f$A_l\\f$. \\f$U_l\\f$ and \\f$V_l\\f$ are\northogonal (unitary) matrices. The first min(m,n) columns of \\f$U_l\\f$ and \\f$V_l\\f$ are\nthe left and right singular vectors of \\f$A_l\\f$, respectively.\n\nThe computation of the singular vectors is optional and it is controlled by\nthe function arguments left_svect and right_svect as described below. When\ncomputed, this function returns the transpose (or transpose conjugate) of the\nright singular vectors, i.e. the rows of \\f$V_l'\\f$.\n\nleft_svect and right_svect are #rocblas_svect enums that can take the\nfollowing values:\n\n- rocblas_svect_all: the entire matrix \\f$U_l\\f$ (or \\f$V_l'\\f$) is computed,\n- rocblas_svect_singular: the singular vectors (first min(m,n)\ncolumns of \\f$U_l\\f$ or rows of \\f$V_l'\\f$) are computed, or\n- rocblas_svect_none: no columns (or rows) of \\f$U_l\\f$ (or \\f$V_l'\\f$) are computed,\ni.e. no singular vectors.\n\nThe singular values are computed by applying QR factorization to \\f$A_lV_l\\f$ if m >= n\n(resp. LQ factorization to \\f$U_l'A_l\\f$ if m < n), where \\f$V_l\\f$ (resp. \\f$U_l\\f$) is\nfound as the eigenvectors of \\f$A_l'A_l\\f$ (resp. \\f$A_lA_l'\\f$) using the Jacobi\neigenvalue algorithm.\n\n\\note\nIn order to carry out calculations, this method may synchronize the stream contained within the\nrocblas_handle.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nleft_svect  #rocblas_svect.\nSpecifies how the left singular vectors are computed.\nrocblas_svect_overwrite is not supported.\n@param[in]\nright_svect #rocblas_svect.\nSpecifies how the right singular vectors are computed.\nrocblas_svect_overwrite is not supported.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all matrices A_l in the batch.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on\nthe GPU of dimension lda*n.\nOn entry, the matrices A_l.\nOn exit, the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= m.\nThe leading dimension of A_l.\n@param[in]\nabstol      real type.\nThe absolute tolerance. The algorithm is considered to have converged once off(A_l'A_l)\nis <= norm(A_l'A_l) * abstol [resp. off(A_lA_l') <= norm(A_lA_l') * abstol]. If abstol <= 0,\nthen the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to real type on the GPU.\nThe Frobenius norm of the off-diagonal elements of A_l'A_l (resp. A_lA_l') at the final\niteration.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to rocblas_int. Array of batch_count integers on the GPU.\nThe actual number of sweeps (iterations) used by the algorithm for each batch instance.\n@param[out]\nS           pointer to real type. Array on the GPU (the size depends on the value of strideS).\nThe singular values of A_l in decreasing order.\n@param[in]\nstrideS     rocblas_stride.\nStride from the start of one vector S_l to the next one S(l+1).\nThere is no restriction for the value of strideS.\nNormal use case is strideS >= min(m,n).\n@param[out]\nU           pointer to type. Array on the GPU (the side depends on the value of strideU).\nThe matrices U_l of left singular vectors stored as columns.\nNot referenced if left_svect is set to none.\n@param[in]\nldu         rocblas_int. ldu >= m if left_svect is set to all or singular; ldu >= 1 otherwise.\nThe leading dimension of U_l.\n@param[in]\nstrideU     rocblas_stride.\nStride from the start of one matrix U_l to the next one U(l+1).\nThere is no restriction for the value of strideU.\nNormal use case is strideU >= ldu*min(m,n) if left_svect is set to singular,\nor strideU >= ldu*m when left_svect is equal to all.\n@param[out]\nV           pointer to type. Array on the GPU (the size depends on the value of strideV).\nThe matrices V_l of right singular vectors stored as rows (transposed / conjugate-transposed).\nNot referenced if right_svect is set to none.\n@param[in]\nldv         rocblas_int. ldv >= n if right_svect is set to all; ldv >= min(m,n) if right_svect is\nset to singular; or ldv >= 1 otherwise.\nThe leading dimension of V.\n@param[in]\nstrideV     rocblas_stride.\nStride from the start of one matrix V_l to the next one V(l+1).\nThere is no restriction for the value of strideV.\nNormal use case is strideV >= ldv*n.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info[l] = 0, successful exit. If info[l] = 1, the algorithm did not converge.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgesvdj_batched(
         handle: rocblas_handle,
         left_svect: rocblas_svect,
@@ -6757,7 +6776,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GESVDJ_STRIDED_BATCHED computes the singular values and optionally the\nsingular vectors of a batch of general m-by-n matrix A (Singular Value\nDecomposition).\n\n\\details\nThe SVD of matrix A_j in the batch is given by:\n\n\\f[\nA_j = U_j  S_j  V_j'\n\\f]\n\nwhere the m-by-n matrix \\f$S_j\\f$ is zero except, possibly, for its min(m,n)\ndiagonal elements, which are the singular values of \\f$A_j\\f$. \\f$U_j\\f$ and \\f$V_j\\f$ are\northogonal (unitary) matrices. The first min(m,n) columns of \\f$U_j\\f$ and \\f$V_j\\f$ are\nthe left and right singular vectors of \\f$A_j\\f$, respectively.\n\nThe computation of the singular vectors is optional and it is controlled by\nthe function arguments left_svect and right_svect as described below. When\ncomputed, this function returns the transpose (or transpose conjugate) of the\nright singular vectors, i.e. the rows of \\f$V_j'\\f$.\n\nleft_svect and right_svect are #rocblas_svect enums that can take the\nfollowing values:\n\n- rocblas_svect_all: the entire matrix \\f$U_j\\f$ (or \\f$V_j'\\f$) is computed,\n- rocblas_svect_singular: the singular vectors (first min(m,n)\ncolumns of \\f$U_j\\f$ or rows of \\f$V_j'\\f$) are computed, or\n- rocblas_svect_none: no columns (or rows) of \\f$U_j\\f$ (or \\f$V_j'\\f$) are computed,\ni.e. no singular vectors.\n\nThe singular values are computed by applying QR factorization to \\f$A_jV_j\\f$ if m >= n\n(resp. LQ factorization to \\f$U_j'A_j\\f$ if m < n), where \\f$V_j\\f$ (resp. \\f$U_j\\f$) is\nfound as the eigenvectors of \\f$A_j'A_j\\f$ (resp. \\f$A_jA_j'\\f$) using the Jacobi\neigenvalue algorithm.\n\n\\note\nIn order to carry out calculations, this method may synchronize the stream contained within the\nrocblas_handle.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nleft_svect  #rocblas_svect.\\n\nSpecifies how the left singular vectors are computed.\nrocblas_svect_overwrite is not supported.\n@param[in]\nright_svect #rocblas_svect.\\n\nSpecifies how the right singular vectors are computed.\nrocblas_svect_overwrite is not supported.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of all matrices A_j in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of all matrices A_j in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_j.\nOn exit, the contents of A_j are destroyed.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nThe leading dimension of A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA.\nNormal use case is strideA >= lda*n.\n@param[in]\nabstol      real type.\\n\nThe absolute tolerance. The algorithm is considered to have converged once off(A_j'A_j)\nis <= norm(A_j'A_j) * abstol [resp. off(A_jA_j') <= norm(A_jA_j') * abstol]. If abstol <= 0,\nthen the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to real type on the GPU.\\n\nThe Frobenius norm of the off-diagonal elements of A_j'A_j (resp. A_jA_j') at the final\niteration.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\\n\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nThe actual number of sweeps (iterations) used by the algorithm for each batch instance.\n@param[out]\nS           pointer to real type. Array on the GPU (the size depends on the value of strideS).\\n\nThe singular values of A_j in decreasing order.\n@param[in]\nstrideS     rocblas_stride.\\n\nStride from the start of one vector S_j to the next one S_(j+1).\nThere is no restriction for the value of strideS.\nNormal use case is strideS >= min(m,n).\n@param[out]\nU           pointer to type. Array on the GPU (the side depends on the value of strideU). \\n\nThe matrices U_j of left singular vectors stored as columns.\nNot referenced if left_svect is set to none.\n@param[in]\nldu         rocblas_int. ldu >= m if left_svect is set to all or singular; ldu >= 1 otherwise.\\n\nThe leading dimension of U_j.\n@param[in]\nstrideU     rocblas_stride.\\n\nStride from the start of one matrix U_j to the next one U_(j+1).\nThere is no restriction for the value of strideU.\nNormal use case is strideU >= ldu*min(m,n) if left_svect is set to singular,\nor strideU >= ldu*m when left_svect is equal to all.\n@param[out]\nV           pointer to type. Array on the GPU (the size depends on the value of strideV). \\n\nThe matrices V_j of right singular vectors stored as rows (transposed / conjugate-transposed).\nNot referenced if right_svect is set to none.\n@param[in]\nldv         rocblas_int. ldv >= n if right_svect is set to all; ldv >= min(m,n) if right_svect is\nset to singular; or ldv >= 1 otherwise.\\n\nThe leading dimension of V.\n@param[in]\nstrideV     rocblas_stride.\\n\nStride from the start of one matrix V_j to the next one V_(j+1).\nThere is no restriction for the value of strideV.\nNormal use case is strideV >= ldv*n.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info[j] = 0, successful exit. If info[j] = 1, the algorithm did not converge.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GESVDJ_STRIDED_BATCHED computes the singular values and optionally the\nsingular vectors of a batch of general m-by-n matrix A (Singular Value\nDecomposition).\n\n\\details\nThe SVD of matrix A_l in the batch is given by:\n\n\\f[\nA_l = U_l  S_l  V_l'\n\\f]\n\nwhere the m-by-n matrix \\f$S_l\\f$ is zero except, possibly, for its min(m,n)\ndiagonal elements, which are the singular values of \\f$A_l\\f$. \\f$U_l\\f$ and \\f$V_l\\f$ are\northogonal (unitary) matrices. The first min(m,n) columns of \\f$U_l\\f$ and \\f$V_l\\f$ are\nthe left and right singular vectors of \\f$A_l\\f$, respectively.\n\nThe computation of the singular vectors is optional and it is controlled by\nthe function arguments left_svect and right_svect as described below. When\ncomputed, this function returns the transpose (or transpose conjugate) of the\nright singular vectors, i.e. the rows of \\f$V_l'\\f$.\n\nleft_svect and right_svect are #rocblas_svect enums that can take the\nfollowing values:\n\n- rocblas_svect_all: the entire matrix \\f$U_l\\f$ (or \\f$V_l'\\f$) is computed,\n- rocblas_svect_singular: the singular vectors (first min(m,n)\ncolumns of \\f$U_l\\f$ or rows of \\f$V_l'\\f$) are computed, or\n- rocblas_svect_none: no columns (or rows) of \\f$U_l\\f$ (or \\f$V_l'\\f$) are computed,\ni.e. no singular vectors.\n\nThe singular values are computed by applying QR factorization to \\f$A_lV_l\\f$ if m >= n\n(resp. LQ factorization to \\f$U_l'A_l\\f$ if m < n), where \\f$V_l\\f$ (resp. \\f$U_l\\f$) is\nfound as the eigenvectors of \\f$A_l'A_l\\f$ (resp. \\f$A_lA_l'\\f$) using the Jacobi\neigenvalue algorithm.\n\n\\note\nIn order to carry out calculations, this method may synchronize the stream contained within the\nrocblas_handle.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nleft_svect  #rocblas_svect.\nSpecifies how the left singular vectors are computed.\nrocblas_svect_overwrite is not supported.\n@param[in]\nright_svect #rocblas_svect.\nSpecifies how the right singular vectors are computed.\nrocblas_svect_overwrite is not supported.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of all matrices A_l in the batch.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of all matrices A_l in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the matrices A_l.\nOn exit, the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= m.\nThe leading dimension of A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA.\nNormal use case is strideA >= lda*n.\n@param[in]\nabstol      real type.\nThe absolute tolerance. The algorithm is considered to have converged once off(A_l'A_l)\nis <= norm(A_l'A_l) * abstol [resp. off(A_lA_l') <= norm(A_lA_l') * abstol]. If abstol <= 0,\nthen the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to real type on the GPU.\nThe Frobenius norm of the off-diagonal elements of A_l'A_l (resp. A_lA_l') at the final\niteration.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to rocblas_int. Array of batch_count integers on the GPU.\nThe actual number of sweeps (iterations) used by the algorithm for each batch instance.\n@param[out]\nS           pointer to real type. Array on the GPU (the size depends on the value of strideS).\nThe singular values of A_l in decreasing order.\n@param[in]\nstrideS     rocblas_stride.\nStride from the start of one vector S_l to the next one S_(j+1).\nThere is no restriction for the value of strideS.\nNormal use case is strideS >= min(m,n).\n@param[out]\nU           pointer to type. Array on the GPU (the side depends on the value of strideU).\nThe matrices U_l of left singular vectors stored as columns.\nNot referenced if left_svect is set to none.\n@param[in]\nldu         rocblas_int. ldu >= m if left_svect is set to all or singular; ldu >= 1 otherwise.\nThe leading dimension of U_l.\n@param[in]\nstrideU     rocblas_stride.\nStride from the start of one matrix U_l to the next one U_(j+1).\nThere is no restriction for the value of strideU.\nNormal use case is strideU >= ldu*min(m,n) if left_svect is set to singular,\nor strideU >= ldu*m when left_svect is equal to all.\n@param[out]\nV           pointer to type. Array on the GPU (the size depends on the value of strideV).\nThe matrices V_l of right singular vectors stored as rows (transposed / conjugate-transposed).\nNot referenced if right_svect is set to none.\n@param[in]\nldv         rocblas_int. ldv >= n if right_svect is set to all; ldv >= min(m,n) if right_svect is\nset to singular; or ldv >= 1 otherwise.\nThe leading dimension of V.\n@param[in]\nstrideV     rocblas_stride.\nStride from the start of one matrix V_l to the next one V_(j+1).\nThere is no restriction for the value of strideV.\nNormal use case is strideV >= ldv*n.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info[l] = 0, successful exit. If info[l] = 1, the algorithm did not converge.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgesvdj_strided_batched(
         handle: rocblas_handle,
         left_svect: rocblas_svect,
@@ -6866,7 +6885,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GESVDX computes a set of singular values and optionally the corresponding singular\nvectors of a general m-by-n matrix A (partial Singular Value Decomposition).\n\n\\details\nThis function computes all the singular values of A, all the singular values in the half-open interval\n\\f$[vl, vu)\\f$, or the il-th through iu-th singular values, depending on the value of srange.\n\nThe full SVD of matrix A is given by:\n\n\\f[\nA = U  S  V'\n\\f]\n\nwhere the m-by-n matrix S is zero except, possibly, for its min(m,n)\ndiagonal elements, which are the singular values of A. U and V are orthogonal\n(unitary) matrices. The first min(m,n) columns of U and V are the left and\nright singular vectors of A, respectively.\n\nThe computation of the singular vectors is optional and it is controlled by\nthe function arguments left_svect and right_svect as described below. When\ncomputed, this function returns the transpose (or transpose conjugate) of the\nright singular vectors, i.e. the rows of V'.\n\nleft_svect and right_svect are #rocblas_svect enums that, for this function, can take the\nfollowing values:\n\n- rocblas_svect_singular: the singular vectors (first min(m,n)\ncolumns of U or rows of V') corresponding to the computed singular values are computed,\n- rocblas_svect_none: no columns (or rows) of U (or V') are computed, i.e.\nno singular vectors.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nleft_svect  #rocblas_svect.\\n\nSpecifies if the left singular vectors are computed.\n@param[in]\nright_svect #rocblas_svect.\\n\nSpecifies if the right singular vectors are computed.\n@param[in]\nsrange      #rocblas_srange.\\n\nSpecifies the type of range or interval of the singular values to be computed.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A.\nOn exit, the contents of A are destroyed.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nThe leading dimension of A.\n@param[in]\nvl          real type. 0 <= vl < vu.\\n\nThe lower bound of the search interval [vl, vu). Ignored if srange indicates to look\nfor all the singular values of A or the singular values within a set of indices.\n@param[in]\nvu          real type. 0 <= vl < vu.\\n\nThe upper bound of the search interval [vl, vu). Ignored if srange indicates to look\nfor all the singular values of A or the singular values within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\\n\nThe index of the largest singular value to be computed. Ignored if srange indicates to look\nfor all the singular values of A or the singular values in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise.\\n\nThe index of the smallest singular value to be computed. Ignored if srange indicates to look\nfor all the singular values of A or the singular values in a half-open interval.\n@param[out]\nnsv         pointer to a rocblas_int on the GPU. \\n\nThe total number of singular values found. If srange is rocblas_srange_all, nsv = min(m,n).\nIf srange is rocblas_srange_index, nsv = iu - il + 1. Otherwise, 0 <= nsv <= min(m,n).\n@param[out]\nS           pointer to real type. Array on the GPU of dimension min(m,n). \\n\nThe first nsv elements contain the computed singular values in descending order.\n(The remaining elements may be used as workspace for internal computations).\n@param[out]\nU           pointer to type. Array on the GPU of dimension ldu*min(m,n).\\n\nThe matrix of left singular vectors stored as columns. Not\nreferenced if left_svect is set to none.\n@param[in]\nldu         rocblas_int. ldu >= m if left_svect singular; ldu >= 1 otherwise.\\n\nThe leading dimension of U.\n@param[out]\nV           pointer to type. Array on the GPU of dimension ldv*n. \\n\nThe matrix of right singular vectors stored as rows (transposed / conjugate-transposed).\nNot referenced if right_svect is set to none.\n@param[in]\nldv         rocblas_int. ldv >= min(m,n) if right_svect is\nset to singular; or ldv >= 1 otherwise.\\n\nThe leading dimension of V.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU of dimension min(m,n).\\n\nIf info = 0, the first nsv elements of ifail are zero.\nOtherwise, contains the indices of those eigenvectors that failed\nto converge, as returned by \\ref rocsolver_sbdsvdx \"BDSVDX\".\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0, i eigenvectors did not converge in \\ref rocsolver_sbdsvdx \"BDSVDX\"; their\nindices are stored in ifail."]
+    #[doc = " @{\n\\brief GESVDX computes a set of singular values and optionally the corresponding singular\nvectors of a general m-by-n matrix A (partial Singular Value Decomposition).\n\n\\details\nThis function computes all the singular values of A, all the singular values in the half-open interval\n\\f$[vl, vu)\\f$, or the il-th through iu-th singular values, depending on the value of srange.\n\nThe full SVD of matrix A is given by:\n\n\\f[\nA = U  S  V'\n\\f]\n\nwhere the m-by-n matrix S is zero except, possibly, for its min(m,n)\ndiagonal elements, which are the singular values of A. U and V are orthogonal\n(unitary) matrices. The first min(m,n) columns of U and V are the left and\nright singular vectors of A, respectively.\n\nThe computation of the singular vectors is optional and it is controlled by\nthe function arguments left_svect and right_svect as described below. When\ncomputed, this function returns the transpose (or transpose conjugate) of the\nright singular vectors, i.e. the rows of V'.\n\nleft_svect and right_svect are #rocblas_svect enums that, for this function, can take the\nfollowing values:\n\n- rocblas_svect_singular: the singular vectors (first min(m,n)\ncolumns of U or rows of V') corresponding to the computed singular values are computed,\n- rocblas_svect_none: no columns (or rows) of U (or V') are computed, i.e.\nno singular vectors.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nleft_svect  #rocblas_svect.\nSpecifies if the left singular vectors are computed.\n@param[in]\nright_svect #rocblas_svect.\nSpecifies if the right singular vectors are computed.\n@param[in]\nsrange      #rocblas_srange.\nSpecifies the type of range or interval of the singular values to be computed.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of matrix A.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A.\nOn exit, the contents of A are destroyed.\n@param[in]\nlda         rocblas_int. lda >= m.\nThe leading dimension of A.\n@param[in]\nvl          real type. 0 <= vl < vu.\nThe lower bound of the search interval [vl, vu). Ignored if srange indicates to look\nfor all the singular values of A or the singular values within a set of indices.\n@param[in]\nvu          real type. 0 <= vl < vu.\nThe upper bound of the search interval [vl, vu). Ignored if srange indicates to look\nfor all the singular values of A or the singular values within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\nThe index of the largest singular value to be computed. Ignored if srange indicates to look\nfor all the singular values of A or the singular values in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise.\nThe index of the smallest singular value to be computed. Ignored if srange indicates to look\nfor all the singular values of A or the singular values in a half-open interval.\n@param[out]\nnsv         pointer to a rocblas_int on the GPU.\nThe total number of singular values found. If srange is rocblas_srange_all, nsv = min(m,n).\nIf srange is rocblas_srange_index, nsv = iu - il + 1. Otherwise, 0 <= nsv <= min(m,n).\n@param[out]\nS           pointer to real type. Array on the GPU of dimension nsv.\nThe first nsv elements contain the computed singular values in descending order.\nNote: If srange is rocblas_srange_value, then the value of nsv is not known in advance.\nIn this case, the user should ensure that S is large enough to hold min(m,n) values.\n@param[out]\nU           pointer to type. Array on the GPU of dimension ldu*nsv.\nThe matrix of left singular vectors stored as columns. Not\nreferenced if left_svect is set to none.\nNote: If srange is rocblas_srange_value, then the value of nsv is not known in advance.\nIn this case, the user should ensure that U is large enough to hold min(m,n) columns.\n@param[in]\nldu         rocblas_int. ldu >= m if left_svect singular; ldu >= 1 otherwise.\nThe leading dimension of U.\n@param[out]\nV           pointer to type. Array on the GPU of dimension ldv*n.\nThe matrix of right singular vectors stored as rows (transposed / conjugate-transposed).\nNot referenced if right_svect is set to none.\n@param[in]\nldv         rocblas_int. ldv >= nsv if right_svect is set to singular; or ldv >= 1 otherwise.\nThe leading dimension of V.\nNote: If srange is rocblas_srange_value, then the value of nsv is not known in advance.\nIn this case, the user should ensure that V is large enough to hold min(m,n) rows.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU of dimension min(m,n).\nIf info = 0, the first nsv elements of ifail are zero.\nOtherwise, contains the indices of those eigenvectors that failed\nto converge, as returned by \\ref rocsolver_sbdsvdx \"BDSVDX\".\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0, i eigenvectors did not converge in \\ref rocsolver_sbdsvdx \"BDSVDX\"; their\nindices are stored in ifail."]
     pub fn rocsolver_sgesvdx(
         handle: rocblas_handle,
         left_svect: rocblas_svect,
@@ -6967,7 +6986,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GESVDX_BATCHED computes a set of singular values and optionally the corresponding singular\nvectors of a batch of general m-by-n matrices \\f$A_j\\f$ (partial Singular Value Decomposition).\n\n\\details\nThis function computes all the singular values of \\f$A_j\\f$, all the singular values in the half-open interval\n\\f$[vl, vu)\\f$, or the il-th through iu-th singular values, depending on the value of srange.\n\nThe full SVD of matrix \\f$A_j\\f$  is given by:\n\n\\f[\nA_j = U_j  S_j  V_j'\n\\f]\n\nwhere the m-by-n matrix \\f$S_j\\f$  is zero except, possibly, for its min(m,n)\ndiagonal elements, which are the singular values of \\f$A_j\\f$ . \\f$U_j\\f$  and \\f$V_j\\f$  are orthogonal\n(unitary) matrices. The first min(m,n) columns of \\f$U_j\\f$  and \\f$V_j\\f$  are the left and\nright singular vectors of \\f$A_j\\f$ , respectively.\n\nThe computation of the singular vectors is optional and it is controlled by\nthe function arguments left_svect and right_svect as described below. When\ncomputed, this function returns the transpose (or transpose conjugate) of the\nright singular vectors, i.e. the rows of \\f$V_j'\\f$.\n\nleft_svect and right_svect are #rocblas_svect enums that, for this function, can take the\nfollowing values:\n\n- rocblas_svect_singular: the singular vectors (first min(m,n)\ncolumns of \\f$U_j\\f$  or rows of \\f$V_j'\\f$ ) corresponding to the computed singular values are computed,\n- rocblas_svect_none: no columns (or rows) of \\f$U_j\\f$  (or \\f$V_j'\\f$ ) are computed, i.e.\nno singular vectors.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nleft_svect  #rocblas_svect.\\n\nSpecifies if the left singular vectors are computed.\n@param[in]\nright_svect #rocblas_svect.\\n\nSpecifies if the right singular vectors are computed.\n@param[in]\nsrange      #rocblas_srange.\\n\nSpecifies the type of range or interval of the singular values to be computed.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of matrix A_j.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of matrix A_j.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the matrices A_j.\nOn exit, the contents of A_j are destroyed.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nThe leading dimension of A_j.\n@param[in]\nvl          real type. 0 <= vl < vu.\\n\nThe lower bound of the search interval [vl, vu). Ignored if srange indicates to look\nfor all the singular values of A_j or the singular values within a set of indices.\n@param[in]\nvu          real type. 0 <= vl < vu.\\n\nThe upper bound of the search interval [vl, vu). Ignored if srange indicates to look\nfor all the singular values of A_j or the singular values within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\\n\nThe index of the largest singular value to be computed. Ignored if srange indicates to look\nfor all the singular values of A_j or the singular values in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise.\\n\nThe index of the smallest singular value to be computed. Ignored if srange indicates to look\nfor all the singular values of A_j or the singular values in a half-open interval.\n@param[out]\nnsv         pointer to rocblas_int. Array of batch_count integers on the GPU. \\n\nThe total number of singular values found. If srange is rocblas_srange_all, nsv[j] = min(m,n).\nIf srange is rocblas_srange_index, nsv[j] = iu - il + 1. Otherwise, 0 <= nsv[j] <= min(m,n).\n@param[out]\nS           pointer to real type. Array on the GPU (the size depends on the value of strideS). \\n\nThe first nsv_j elements contain the computed singular values in descending order.\n(The remaining elements may be used as workspace for internal computations).\n@param[in]\nstrideS     rocblas_stride.\\n\nStride from the start of one vector S_j to the next one S_(j+1).\nThere is no restriction for the value of strideS. Normal use case is strideS >= min(m,n).\n@param[out]\nU           pointer to type. Array on the GPU (the size depends on the value of strideU).\\n\nThe matrix U_j of left singular vectors stored as columns. Not\nreferenced if left_svect is set to none.\n@param[in]\nldu         rocblas_int. ldu >= m if left_svect singular; ldu >= 1 otherwise.\\n\nThe leading dimension of U_j.\n@param[in]\nstrideU     rocblas_stride.\\n\nStride from the start of one matrix U_j to the next one U_(j+1).\nThere is no restriction for the value of strideU. Normal use case is strideU >= ldu*min(m,n).\n@param[out]\nV           pointer to type. Array on the GPU (the size depends on the value of strideV). \\n\nThe matrix V_j of right singular vectors stored as rows (transposed / conjugate-transposed).\nNot referenced if right_svect is set to none.\n@param[in]\nldv         rocblas_int. ldv >= min(m,n) if right_svect is\nset to singular; or ldv >= 1 otherwise.\\n\nThe leading dimension of V_j.\n@param[in]\nstrideV     rocblas_stride.\\n\nStride from the start of one matrix V_j to the next one V_(j+1).\nThere is no restriction for the value of strideV. Normal use case is strideV >= ldv*n.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU (the size depends on the value of strideF).\\n\nIf info[j] = 0, the first nsv[j] elements of ifail_j are zero.\nOtherwise, contains the indices of those eigenvectors that failed\nto converge, as returned by \\ref rocsolver_sbdsvdx \"BDSVDX\".\n@param[in]\nstrideF     rocblas_stride.\\n\nStride from the start of one vector ifail_j to the next one ifail_(j+1).\nThere is no restriction for the value of strideF. Normal use case is strideF >= min(m,n).\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info[j] = 0, successful exit.\nIf info[j] = i > 0, i eigenvectors did not converge in \\ref rocsolver_sbdsvdx \"BDSVDX\"; their\nindices are stored in ifail_j.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GESVDX_BATCHED computes a set of singular values and optionally the corresponding singular\nvectors of a batch of general m-by-n matrices \\f$A_l\\f$ (partial Singular Value Decomposition).\n\n\\details\nThis function computes all the singular values of \\f$A_l\\f$, all the singular values in the half-open interval\n\\f$[vl, vu)\\f$, or the il-th through iu-th singular values, depending on the value of srange.\n\nThe full SVD of matrix \\f$A_l\\f$  is given by:\n\n\\f[\nA_l = U_l  S_l  V_l'\n\\f]\n\nwhere the m-by-n matrix \\f$S_l\\f$  is zero except, possibly, for its min(m,n)\ndiagonal elements, which are the singular values of \\f$A_l\\f$ . \\f$U_l\\f$  and \\f$V_l\\f$  are orthogonal\n(unitary) matrices. The first min(m,n) columns of \\f$U_l\\f$  and \\f$V_l\\f$  are the left and\nright singular vectors of \\f$A_l\\f$ , respectively.\n\nThe computation of the singular vectors is optional and it is controlled by\nthe function arguments left_svect and right_svect as described below. When\ncomputed, this function returns the transpose (or transpose conjugate) of the\nright singular vectors, i.e. the rows of \\f$V_l'\\f$.\n\nleft_svect and right_svect are #rocblas_svect enums that, for this function, can take the\nfollowing values:\n\n- rocblas_svect_singular: the singular vectors (first min(m,n)\ncolumns of \\f$U_l\\f$  or rows of \\f$V_l'\\f$ ) corresponding to the computed singular values are computed,\n- rocblas_svect_none: no columns (or rows) of \\f$U_l\\f$  (or \\f$V_l'\\f$ ) are computed, i.e.\nno singular vectors.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nleft_svect  #rocblas_svect.\nSpecifies if the left singular vectors are computed.\n@param[in]\nright_svect #rocblas_svect.\nSpecifies if the right singular vectors are computed.\n@param[in]\nsrange      #rocblas_srange.\nSpecifies the type of range or interval of the singular values to be computed.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of matrix A_l.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of matrix A_l.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the matrices A_l.\nOn exit, the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= m.\nThe leading dimension of A_l.\n@param[in]\nvl          real type. 0 <= vl < vu.\nThe lower bound of the search interval [vl, vu). Ignored if srange indicates to look\nfor all the singular values of A_l or the singular values within a set of indices.\n@param[in]\nvu          real type. 0 <= vl < vu.\nThe upper bound of the search interval [vl, vu). Ignored if srange indicates to look\nfor all the singular values of A_l or the singular values within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\nThe index of the largest singular value to be computed. Ignored if srange indicates to look\nfor all the singular values of A_l or the singular values in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise.\nThe index of the smallest singular value to be computed. Ignored if srange indicates to look\nfor all the singular values of A_l or the singular values in a half-open interval.\n@param[out]\nnsv         pointer to rocblas_int. Array of batch_count integers on the GPU.\nThe total number of singular values found. If srange is rocblas_srange_all, nsv[l] = min(m,n).\nIf srange is rocblas_srange_index, nsv[l] = iu - il + 1. Otherwise, 0 <= nsv[l] <= min(m,n).\n@param[out]\nS           pointer to real type. Array on the GPU (the size depends on the value of strideS).\nThe first nsv_l elements contain the computed singular values in descending order.\n(The remaining elements may be used as workspace for internal computations).\n@param[in]\nstrideS     rocblas_stride.\nStride from the start of one vector S_l to the next one S_(l+1).\nThere is no restriction for the value of strideS. Normal use case is strideS >= nsv_l.\nNote: If srange is rocblas_srange_value, then the value of nsv_l is not known in advance.\nIn this case, the user should ensure that S_l is large enough to hold min(m,n) values.\n@param[out]\nU           pointer to type. Array on the GPU (the size depends on the value of strideU).\nThe matrix U_l of left singular vectors stored as columns. Not\nreferenced if left_svect is set to none.\n@param[in]\nldu         rocblas_int. ldu >= m if left_svect singular; ldu >= 1 otherwise.\nThe leading dimension of U_l.\n@param[in]\nstrideU     rocblas_stride.\nStride from the start of one matrix U_l to the next one U_(l+1).\nThere is no restriction for the value of strideU. Normal use case is strideU >= ldu*nsv_l.\nNote: If srange is rocblas_srange_value, then the value of nsv_l is not known in advance.\nIn this case, the user should ensure that U_l is large enough to hold min(m,n) columns.\n@param[out]\nV           pointer to type. Array on the GPU (the size depends on the value of strideV).\nThe matrix V_l of right singular vectors stored as rows (transposed / conjugate-transposed).\nNot referenced if right_svect is set to none.\n@param[in]\nldv         rocblas_int. ldv >= nsv_l if right_svect is set to singular; or ldv >= 1 otherwise.\nThe leading dimension of V_l.\nNote: If srange is rocblas_srange_value, then the value of nsv_l is not known in advance.\nIn this case, the user should ensure that V_l is large enough to hold min(m,n) rows.\n@param[in]\nstrideV     rocblas_stride.\nStride from the start of one matrix V_l to the next one V_(l+1).\nThere is no restriction for the value of strideV. Normal use case is strideV >= ldv*n.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU (the size depends on the value of strideF).\nIf info[l] = 0, the first nsv[l] elements of ifail_l are zero.\nOtherwise, contains the indices of those eigenvectors that failed\nto converge, as returned by \\ref rocsolver_sbdsvdx \"BDSVDX\".\n@param[in]\nstrideF     rocblas_stride.\nStride from the start of one vector ifail_l to the next one ifail_(l+1).\nThere is no restriction for the value of strideF. Normal use case is strideF >= min(m,n).\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info[l] = 0, successful exit.\nIf info[l] = i > 0, i eigenvectors did not converge in \\ref rocsolver_sbdsvdx \"BDSVDX\"; their\nindices are stored in ifail_l.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgesvdx_batched(
         handle: rocblas_handle,
         left_svect: rocblas_svect,
@@ -7088,7 +7107,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GESVDX_STRIDED_BATCHED computes a set of singular values and optionally the corresponding singular\nvectors of a batch of general m-by-n matrices \\f$A_j\\f$ (partial Singular Value Decomposition).\n\n\\details\nThis function computes all the singular values of \\f$A_j\\f$, all the singular values in the half-open interval\n\\f$[vl, vu)\\f$, or the il-th through iu-th singular values, depending on the value of srange.\n\nThe full SVD of matrix \\f$A_j\\f$  is given by:\n\n\\f[\nA_j = U_j  S_j  V_j'\n\\f]\n\nwhere the m-by-n matrix \\f$S_j\\f$  is zero except, possibly, for its min(m,n)\ndiagonal elements, which are the singular values of \\f$A_j\\f$ . \\f$U_j\\f$  and \\f$V_j\\f$  are orthogonal\n(unitary) matrices. The first min(m,n) columns of \\f$U_j\\f$  and \\f$V_j\\f$  are the left and\nright singular vectors of \\f$A_j\\f$ , respectively.\n\nThe computation of the singular vectors is optional and it is controlled by\nthe function arguments left_svect and right_svect as described below. When\ncomputed, this function returns the transpose (or transpose conjugate) of the\nright singular vectors, i.e. the rows of \\f$V_j'\\f$.\n\nleft_svect and right_svect are #rocblas_svect enums that, for this function, can take the\nfollowing values:\n\n- rocblas_svect_singular: the singular vectors (first min(m,n)\ncolumns of \\f$U_j\\f$  or rows of \\f$V_j'\\f$ ) corresponding to the computed singular values are computed,\n- rocblas_svect_none: no columns (or rows) of \\f$U_j\\f$  (or \\f$V_j'\\f$ ) are computed, i.e.\nno singular vectors.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nleft_svect  #rocblas_svect.\\n\nSpecifies if the left singular vectors are computed.\n@param[in]\nright_svect #rocblas_svect.\\n\nSpecifies if the right singular vectors are computed.\n@param[in]\nsrange      #rocblas_srange.\\n\nSpecifies the type of range or interval of the singular values to be computed.\n@param[in]\nm           rocblas_int. m >= 0.\\n\nThe number of rows of matrix A_j.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of columns of matrix A_j.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_j.\nOn exit, the contents of A_j are destroyed.\n@param[in]\nlda         rocblas_int. lda >= m.\\n\nThe leading dimension of A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[in]\nvl          real type. 0 <= vl < vu.\\n\nThe lower bound of the search interval [vl, vu). Ignored if srange indicates to look\nfor all the singular values of A_j or the singular values within a set of indices.\n@param[in]\nvu          real type. 0 <= vl < vu.\\n\nThe upper bound of the search interval [vl, vu). Ignored if srange indicates to look\nfor all the singular values of A_j or the singular values within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\\n\nThe index of the largest singular value to be computed. Ignored if srange indicates to look\nfor all the singular values of A_j or the singular values in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise.\\n\nThe index of the smallest singular value to be computed. Ignored if srange indicates to look\nfor all the singular values of A_j or the singular values in a half-open interval.\n@param[out]\nnsv         pointer to rocblas_int. Array of batch_count integers on the GPU. \\n\nThe total number of singular values found. If srange is rocblas_srange_all, nsv[j] = min(m,n).\nIf srange is rocblas_srange_index, nsv[j] = iu - il + 1. Otherwise, 0 <= nsv[j] <= min(m,n).\n@param[out]\nS           pointer to real type. Array on the GPU (the size depends on the value of strideS). \\n\nThe first nsv_j elements contain the computed singular values in descending order.\n(The remaining elements may be used as workspace for internal computations).\n@param[in]\nstrideS     rocblas_stride.\\n\nStride from the start of one vector S_j to the next one S_(j+1).\nThere is no restriction for the value of strideS. Normal use case is strideS >= min(m,n).\n@param[out]\nU           pointer to type. Array on the GPU (the size depends on the value of strideU).\\n\nThe matrix U_j of left singular vectors stored as columns. Not\nreferenced if left_svect is set to none.\n@param[in]\nldu         rocblas_int. ldu >= m if left_svect singular; ldu >= 1 otherwise.\\n\nThe leading dimension of U_j.\n@param[in]\nstrideU     rocblas_stride.\\n\nStride from the start of one matrix U_j to the next one U_(j+1).\nThere is no restriction for the value of strideU. Normal use case is strideU >= ldu*min(m,n).\n@param[out]\nV           pointer to type. Array on the GPU (the size depends on the value of strideV). \\n\nThe matrix V_j of right singular vectors stored as rows (transposed / conjugate-transposed).\nNot referenced if right_svect is set to none.\n@param[in]\nldv         rocblas_int. ldv >= min(m,n) if right_svect is\nset to singular; or ldv >= 1 otherwise.\\n\nThe leading dimension of V_j.\n@param[in]\nstrideV     rocblas_stride.\\n\nStride from the start of one matrix V_j to the next one V_(j+1).\nThere is no restriction for the value of strideV. Normal use case is strideV >= ldv*n.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU (the size depends on the value of strideF).\\n\nIf info[j] = 0, the first nsv[j] elements of ifail_j are zero.\nOtherwise, contains the indices of those eigenvectors that failed\nto converge, as returned by \\ref rocsolver_sbdsvdx \"BDSVDX\".\n@param[in]\nstrideF     rocblas_stride.\\n\nStride from the start of one vector ifail_j to the next one ifail_(j+1).\nThere is no restriction for the value of strideF. Normal use case is strideF >= min(m,n).\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info[j] = 0, successful exit.\nIf info[j] = i > 0, i eigenvectors did not converge in \\ref rocsolver_sbdsvdx \"BDSVDX\"; their\nindices are stored in ifail_j.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GESVDX_STRIDED_BATCHED computes a set of singular values and optionally the corresponding singular\nvectors of a batch of general m-by-n matrices \\f$A_l\\f$ (partial Singular Value Decomposition).\n\n\\details\nThis function computes all the singular values of \\f$A_l\\f$, all the singular values in the half-open interval\n\\f$[vl, vu)\\f$, or the il-th through iu-th singular values, depending on the value of srange.\n\nThe full SVD of matrix \\f$A_l\\f$  is given by:\n\n\\f[\nA_l = U_l  S_l  V_l'\n\\f]\n\nwhere the m-by-n matrix \\f$S_l\\f$  is zero except, possibly, for its min(m,n)\ndiagonal elements, which are the singular values of \\f$A_l\\f$ . \\f$U_l\\f$  and \\f$V_l\\f$  are orthogonal\n(unitary) matrices. The first min(m,n) columns of \\f$U_l\\f$  and \\f$V_l\\f$  are the left and\nright singular vectors of \\f$A_l\\f$ , respectively.\n\nThe computation of the singular vectors is optional and it is controlled by\nthe function arguments left_svect and right_svect as described below. When\ncomputed, this function returns the transpose (or transpose conjugate) of the\nright singular vectors, i.e. the rows of \\f$V_l'\\f$.\n\nleft_svect and right_svect are #rocblas_svect enums that, for this function, can take the\nfollowing values:\n\n- rocblas_svect_singular: the singular vectors (first min(m,n)\ncolumns of \\f$U_l\\f$  or rows of \\f$V_l'\\f$ ) corresponding to the computed singular values are computed,\n- rocblas_svect_none: no columns (or rows) of \\f$U_l\\f$  (or \\f$V_l'\\f$ ) are computed, i.e.\nno singular vectors.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nleft_svect  #rocblas_svect.\nSpecifies if the left singular vectors are computed.\n@param[in]\nright_svect #rocblas_svect.\nSpecifies if the right singular vectors are computed.\n@param[in]\nsrange      #rocblas_srange.\nSpecifies the type of range or interval of the singular values to be computed.\n@param[in]\nm           rocblas_int. m >= 0.\nThe number of rows of matrix A_l.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of columns of matrix A_l.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the matrices A_l.\nOn exit, the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= m.\nThe leading dimension of A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[in]\nvl          real type. 0 <= vl < vu.\nThe lower bound of the search interval [vl, vu). Ignored if srange indicates to look\nfor all the singular values of A_l or the singular values within a set of indices.\n@param[in]\nvu          real type. 0 <= vl < vu.\nThe upper bound of the search interval [vl, vu). Ignored if srange indicates to look\nfor all the singular values of A_l or the singular values within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\nThe index of the largest singular value to be computed. Ignored if srange indicates to look\nfor all the singular values of A_l or the singular values in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise.\nThe index of the smallest singular value to be computed. Ignored if srange indicates to look\nfor all the singular values of A_l or the singular values in a half-open interval.\n@param[out]\nnsv         pointer to rocblas_int. Array of batch_count integers on the GPU.\nThe total number of singular values found. If srange is rocblas_srange_all, nsv[l] = min(m,n).\nIf srange is rocblas_srange_index, nsv[l] = iu - il + 1. Otherwise, 0 <= nsv[l] <= min(m,n).\n@param[out]\nS           pointer to real type. Array on the GPU (the size depends on the value of strideS).\nThe first nsv_l elements contain the computed singular values in descending order.\n(The remaining elements may be used as workspace for internal computations).\n@param[in]\nstrideS     rocblas_stride.\nStride from the start of one vector S_l to the next one S_(l+1).\nThere is no restriction for the value of strideS. Normal use case is strideS >= nsv_l.\nNote: If srange is rocblas_srange_value, then the value of nsv_l is not known in advance.\nIn this case, the user should ensure that S_l is large enough to hold min(m,n) values.\n@param[out]\nU           pointer to type. Array on the GPU (the size depends on the value of strideU).\nThe matrix U_l of left singular vectors stored as columns. Not\nreferenced if left_svect is set to none.\n@param[in]\nldu         rocblas_int. ldu >= m if left_svect singular; ldu >= 1 otherwise.\nThe leading dimension of U_l.\n@param[in]\nstrideU     rocblas_stride.\nStride from the start of one matrix U_l to the next one U_(l+1).\nThere is no restriction for the value of strideU. Normal use case is strideU >= ldu*nsv_l.\nNote: If srange is rocblas_srange_value, then the value of nsv_l is not known in advance.\nIn this case, the user should ensure that U_l is large enough to hold min(m,n) columns.\n@param[out]\nV           pointer to type. Array on the GPU (the size depends on the value of strideV).\nThe matrix V_l of right singular vectors stored as rows (transposed / conjugate-transposed).\nNot referenced if right_svect is set to none.\n@param[in]\nldv         rocblas_int. ldv >= nsv_l if right_svect is set to singular; or ldv >= 1 otherwise.\nThe leading dimension of V_l.\nNote: If srange is rocblas_srange_value, then the value of nsv_l is not known in advance.\nIn this case, the user should ensure that V_l is large enough to hold min(m,n) rows.\n@param[in]\nstrideV     rocblas_stride.\nStride from the start of one matrix V_l to the next one V_(l+1).\nThere is no restriction for the value of strideV. Normal use case is strideV >= ldv*n.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU (the size depends on the value of strideF).\nIf info[l] = 0, the first nsv[l] elements of ifail_l are zero.\nOtherwise, contains the indices of those eigenvectors that failed\nto converge, as returned by \\ref rocsolver_sbdsvdx \"BDSVDX\".\n@param[in]\nstrideF     rocblas_stride.\nStride from the start of one vector ifail_l to the next one ifail_(l+1).\nThere is no restriction for the value of strideF. Normal use case is strideF >= min(m,n).\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info[l] = 0, successful exit.\nIf info[l] = i > 0, i eigenvectors did not converge in \\ref rocsolver_sbdsvdx \"BDSVDX\"; their\nindices are stored in ifail_l.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgesvdx_strided_batched(
         handle: rocblas_handle,
         left_svect: rocblas_svect,
@@ -7213,7 +7232,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYTD2 computes the tridiagonal form of a real symmetric matrix A.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe tridiagonal form is given by:\n\n\\f[\nT = Q'  A  Q\n\\f]\n\nwhere T is symmetric tridiagonal and Q is an orthogonal matrix represented as the product\nof Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ = H_1H_2\\cdots H_{n-1} & \\: \\text{if uplo indicates lower, or}\\\\\nQ = H_{n-1}H_{n-2}\\cdots H_1 & \\: \\text{if uplo indicates upper.}\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_i\\f$ is given by\n\n\\f[\nH_i = I - \\text{tau}[i] \\cdot v_i  v_i'\n\\f]\n\nwhere tau[i] is the corresponding Householder scalar. When uplo indicates lower, the first i\nelements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i+1] = 1\\f$. If uplo indicates upper,\nthe last n-i elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the symmetric matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix to be factored.\nOn exit, if upper, then the elements on the diagonal and superdiagonal\ncontain the tridiagonal form T; the elements above the superdiagonal contain\nthe first i-1 elements of the Householder vectors v_i stored as columns.\nIf lower, then the elements on the diagonal and subdiagonal\ncontain the tridiagonal form T; the elements below the subdiagonal contain\nthe last n-i-1 elements of the Householder vectors v_i stored as columns.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nThe leading dimension of A.\n@param[out]\nD           pointer to type. Array on the GPU of dimension n.\\n\nThe diagonal elements of T.\n@param[out]\nE           pointer to type. Array on the GPU of dimension n-1.\\n\nThe off-diagonal elements of T.\n@param[out]\ntau         pointer to type. Array on the GPU of dimension n-1.\\n\nThe Householder scalars."]
+    #[doc = " @{\n\\brief SYTD2 computes the tridiagonal form of a real symmetric matrix A.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe tridiagonal form is given by:\n\n\\f[\nT = Q'  A  Q\n\\f]\n\nwhere T is symmetric tridiagonal and Q is an orthogonal matrix represented as the product\nof Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ = H(1)H(2)\\cdots H(n-1) & \\: \\text{if uplo indicates lower, or}\\\\\nQ = H(n-1)H(n-2)\\cdots H(1) & \\: \\text{if uplo indicates upper.}\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H(i)\\f$ is given by\n\n\\f[\nH(i) = I - \\text{tau}[i] \\cdot v_i^{}  v_i'\n\\f]\n\nwhere tau[i] is the corresponding Householder scalar. When uplo indicates lower, the first i\nelements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i+1] = 1\\f$. If uplo indicates upper,\nthe last n-i elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the symmetric matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix to be factored.\nOn exit, if upper, then the elements on the diagonal and superdiagonal\ncontain the tridiagonal form T; the elements above the superdiagonal contain\nthe first i-1 elements of the Householder vectors v_i stored as columns.\nIf lower, then the elements on the diagonal and subdiagonal\ncontain the tridiagonal form T; the elements below the subdiagonal contain\nthe last n-i-1 elements of the Householder vectors v_i stored as columns.\n@param[in]\nlda         rocblas_int. lda >= n.\nThe leading dimension of A.\n@param[out]\nD           pointer to type. Array on the GPU of dimension n.\nThe diagonal elements of T.\n@param[out]\nE           pointer to type. Array on the GPU of dimension n-1.\nThe off-diagonal elements of T.\n@param[out]\ntau         pointer to type. Array on the GPU of dimension n-1.\nThe Householder scalars."]
     pub fn rocsolver_ssytd2(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -7240,7 +7259,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HETD2 computes the tridiagonal form of a complex hermitian matrix A.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe tridiagonal form is given by:\n\n\\f[\nT = Q'  A  Q\n\\f]\n\nwhere T is hermitian tridiagonal and Q is an unitary matrix represented as the product\nof Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ = H_1H_2\\cdots H_{n-1} & \\: \\text{if uplo indicates lower, or}\\\\\nQ = H_{n-1}H_{n-2}\\cdots H_1 & \\: \\text{if uplo indicates upper.}\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_i\\f$ is given by\n\n\\f[\nH_i = I - \\text{tau}[i] \\cdot v_i  v_i'\n\\f]\n\nwhere tau[i] is the corresponding Householder scalar. When uplo indicates lower, the first i\nelements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i+1] = 1\\f$. If uplo indicates upper,\nthe last n-i elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the hermitian matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix to be factored.\nOn exit, if upper, then the elements on the diagonal and superdiagonal\ncontain the tridiagonal form T; the elements above the superdiagonal contain\nthe first i-1 elements of the Householders vector v_i stored as columns.\nIf lower, then the elements on the diagonal and subdiagonal\ncontain the tridiagonal form T; the elements below the subdiagonal contain\nthe last n-i-1 elements of the Householder vectors v_i stored as columns.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nThe leading dimension of A.\n@param[out]\nD           pointer to real type. Array on the GPU of dimension n.\\n\nThe diagonal elements of T.\n@param[out]\nE           pointer to real type. Array on the GPU of dimension n-1.\\n\nThe off-diagonal elements of T.\n@param[out]\ntau         pointer to type. Array on the GPU of dimension n-1.\\n\nThe Householder scalars."]
+    #[doc = " @{\n\\brief HETD2 computes the tridiagonal form of a complex hermitian matrix A.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe tridiagonal form is given by:\n\n\\f[\nT = Q'  A  Q\n\\f]\n\nwhere T is hermitian tridiagonal and Q is an unitary matrix represented as the product\nof Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ = H(1)H(2)\\cdots H(n-1) & \\: \\text{if uplo indicates lower, or}\\\\\nQ = H(n-1)H(n-2)\\cdots H(1) & \\: \\text{if uplo indicates upper.}\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H(i)\\f$ is given by\n\n\\f[\nH(i) = I - \\text{tau}[i] \\cdot v_i^{}  v_i'\n\\f]\n\nwhere tau[i] is the corresponding Householder scalar. When uplo indicates lower, the first i\nelements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i+1] = 1\\f$. If uplo indicates upper,\nthe last n-i elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the hermitian matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix to be factored.\nOn exit, if upper, then the elements on the diagonal and superdiagonal\ncontain the tridiagonal form T; the elements above the superdiagonal contain\nthe first i-1 elements of the Householders vector v_i stored as columns.\nIf lower, then the elements on the diagonal and subdiagonal\ncontain the tridiagonal form T; the elements below the subdiagonal contain\nthe last n-i-1 elements of the Householder vectors v_i stored as columns.\n@param[in]\nlda         rocblas_int. lda >= n.\nThe leading dimension of A.\n@param[out]\nD           pointer to real type. Array on the GPU of dimension n.\nThe diagonal elements of T.\n@param[out]\nE           pointer to real type. Array on the GPU of dimension n-1.\nThe off-diagonal elements of T.\n@param[out]\ntau         pointer to type. Array on the GPU of dimension n-1.\nThe Householder scalars."]
     pub fn rocsolver_chetd2(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -7267,7 +7286,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYTD2_BATCHED computes the tridiagonal form of a batch of real symmetric matrices A_j.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe tridiagonal form of \\f$A_j\\f$ is given by:\n\n\\f[\nT_j = Q_j'  A_j  Q_j\n\\f]\n\nwhere \\f$T_j\\f$ is symmetric tridiagonal and \\f$Q_j\\f$ is an orthogonal matrix represented as the product\nof Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ_j = H_{j_1}H_{j_2}\\cdots H_{j_{n-1}} & \\: \\text{if uplo indicates lower, or}\\\\\nQ_j = H_{j_{n-1}}H_{j_{n-2}}\\cdots H_{j_1} & \\: \\text{if uplo indicates upper.}\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_{j_i}\\f$ is given by\n\n\\f[\nH_{j_i} = I - \\text{tau}_j[i] \\cdot v_{j_i}  v_{j_i}'\n\\f]\n\nwhere \\f$\\text{tau}_j[i]\\f$ is the corresponding Householder scalar. When uplo indicates lower, the first i\nelements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i+1] = 1\\f$. If uplo indicates upper,\nthe last n-i elements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the symmetric matrix A_j is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of the matrices A_j.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the matrices A_j to be factored.\nOn exit, if upper, then the elements on the diagonal and superdiagonal\ncontain the tridiagonal form T_j; the elements above the superdiagonal contain\nthe first i-1 elements of the Householder vectors v_(j_i) stored as columns.\nIf lower, then the elements on the diagonal and subdiagonal\ncontain the tridiagonal form T_j; the elements below the subdiagonal contain\nthe last n-i-1 elements of the Householder vectors v_(j_i) stored as columns.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nThe leading dimension of A_j.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\\n\nThe diagonal elements of T_j.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to type. Array on the GPU (the size depends on the value of strideE).\\n\nThe off-diagonal elements of T_j.\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n-1.\n@param[out]\ntau         pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors tau_j of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector tau_j to the next one tau_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= n-1.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYTD2_BATCHED computes the tridiagonal form of a batch of real symmetric matrices A_l.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe tridiagonal form of \\f$A_l\\f$ is given by:\n\n\\f[\nT_l^{} = Q_l'  A_l^{}  Q_l^{}\n\\f]\n\nwhere \\f$T_l\\f$ is symmetric tridiagonal and \\f$Q_l\\f$ is an orthogonal matrix represented as the product\nof Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ_l = H_l(1)H_l(2)\\cdots H_l(n-1) & \\: \\text{if uplo indicates lower, or}\\\\\nQ_l = H_l(n-1)H_l(n-2)\\cdots H_l(1) & \\: \\text{if uplo indicates upper.}\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_l(i)\\f$ is given by\n\n\\f[\nH_l^{}(i) = I - \\text{tau}_l^{}[i] \\cdot v_{l_i}^{}  v_{l_i}'\n\\f]\n\nwhere \\f$\\text{tau}_l[i]\\f$ is the corresponding Householder scalar. When uplo indicates lower, the first i\nelements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i+1] = 1\\f$. If uplo indicates upper,\nthe last n-i elements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the symmetric matrix A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A_l is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of the matrices A_l.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the matrices A_l to be factored.\nOn exit, if upper, then the elements on the diagonal and superdiagonal\ncontain the tridiagonal form T_l; the elements above the superdiagonal contain\nthe first i-1 elements of the Householder vectors v_(l_i) stored as columns.\nIf lower, then the elements on the diagonal and subdiagonal\ncontain the tridiagonal form T_l; the elements below the subdiagonal contain\nthe last n-i-1 elements of the Householder vectors v_(l_i) stored as columns.\n@param[in]\nlda         rocblas_int. lda >= n.\nThe leading dimension of A_l.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\nThe diagonal elements of T_l.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to type. Array on the GPU (the size depends on the value of strideE).\nThe off-diagonal elements of T_l.\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n-1.\n@param[out]\ntau         pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors tau_l of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector tau_l to the next one tau_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= n-1.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssytd2_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -7302,7 +7321,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HETD2_BATCHED computes the tridiagonal form of a batch of complex hermitian matrices A_j.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe tridiagonal form of \\f$A_j\\f$ is given by:\n\n\\f[\nT_j = Q_j'  A_j  Q_j\n\\f]\n\nwhere \\f$T_j\\f$ is Hermitian tridiagonal and \\f$Q_j\\f$ is a unitary matrix represented as the product\nof Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ_j = H_{j_1}H_{j_2}\\cdots H_{j_{n-1}} & \\: \\text{if uplo indicates lower, or}\\\\\nQ_j = H_{j_{n-1}}H_{j_{n-2}}\\cdots H_{j_1} & \\: \\text{if uplo indicates upper.}\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_{j_i}\\f$ is given by\n\n\\f[\nH_{j_i} = I - \\text{tau}_j[i] \\cdot v_{j_i}  v_{j_i}'\n\\f]\n\nwhere \\f$\\text{tau}_j[i]\\f$ is the corresponding Householder scalar. When uplo indicates lower, the first i\nelements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i+1] = 1\\f$. If uplo indicates upper,\nthe last n-i elements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the hermitian matrix A_j is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of the matrices A_j.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the matrices A_j to be factored.\nOn exit, if upper, then the elements on the diagonal and superdiagonal\ncontain the tridiagonal form T_j; the elements above the superdiagonal contain\nthe first i-1 elements of the Householder vectors v_(j_i) stored as columns.\nIf lower, then the elements on the diagonal and subdiagonal\ncontain the tridiagonal form T_j; the elements below the subdiagonal contain\nthe last n-i-1 elements of the Householder vectors v_(j_i) stored as columns.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nThe leading dimension of A_j.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\\n\nThe diagonal elements of T_j.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\\n\nThe off-diagonal elements of T_j.\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n-1.\n@param[out]\ntau         pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors tau_j of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector tau_j to the next one tau_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= n-1.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch.\n"]
+    #[doc = " @{\n\\brief HETD2_BATCHED computes the tridiagonal form of a batch of complex hermitian matrices A_l.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe tridiagonal form of \\f$A_l\\f$ is given by:\n\n\\f[\nT_l^{} = Q_l'  A_l^{}  Q_l^{}\n\\f]\n\nwhere \\f$T_l\\f$ is Hermitian tridiagonal and \\f$Q_l\\f$ is a unitary matrix represented as the product\nof Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ_l = H_l(1)H_l(2)\\cdots H_l(n-1) & \\: \\text{if uplo indicates lower, or}\\\\\nQ_l = H_l(n-1)H_l(n-2)\\cdots H_l(1) & \\: \\text{if uplo indicates upper.}\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_l(i)\\f$ is given by\n\n\\f[\nH_l^{}(i) = I - \\text{tau}_l[i] \\cdot v_{l_i}^{}  v_{l_i}'\n\\f]\n\nwhere \\f$\\text{tau}_l[i]\\f$ is the corresponding Householder scalar. When uplo indicates lower, the first i\nelements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i+1] = 1\\f$. If uplo indicates upper,\nthe last n-i elements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the hermitian matrix A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A_l is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of the matrices A_l.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the matrices A_l to be factored.\nOn exit, if upper, then the elements on the diagonal and superdiagonal\ncontain the tridiagonal form T_l; the elements above the superdiagonal contain\nthe first i-1 elements of the Householder vectors v_(l_i) stored as columns.\nIf lower, then the elements on the diagonal and subdiagonal\ncontain the tridiagonal form T_l; the elements below the subdiagonal contain\nthe last n-i-1 elements of the Householder vectors v_(l_i) stored as columns.\n@param[in]\nlda         rocblas_int. lda >= n.\nThe leading dimension of A_l.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\nThe diagonal elements of T_l.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\nThe off-diagonal elements of T_l.\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n-1.\n@param[out]\ntau         pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors tau_l of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector tau_l to the next one tau_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= n-1.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch.\n"]
     pub fn rocsolver_chetd2_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -7337,7 +7356,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYTD2_STRIDED_BATCHED computes the tridiagonal form of a batch of real symmetric matrices A_j.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe tridiagonal form of \\f$A_j\\f$ is given by:\n\n\\f[\nT_j = Q_j'  A_j  Q_j\n\\f]\n\nwhere \\f$T_j\\f$ is symmetric tridiagonal and \\f$Q_j\\f$ is an orthogonal matrix represented as the product\nof Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ_j = H_{j_1}H_{j_2}\\cdots H_{j_{n-1}} & \\: \\text{if uplo indicates lower, or}\\\\\nQ_j = H_{j_{n-1}}H_{j_{n-2}}\\cdots H_{j_1} & \\: \\text{if uplo indicates upper.}\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_{j_i}\\f$ is given by\n\n\\f[\nH_{j_i} = I - \\text{tau}_j[i] \\cdot v_{j_i}  v_{j_i}'\n\\f]\n\nwhere \\f$\\text{tau}_j[i]\\f$ is the corresponding Householder scalar. When uplo indicates lower, the first i\nelements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i+1] = 1\\f$. If uplo indicates upper,\nthe last n-i elements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the symmetric matrix A_j is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of the matrices A_j.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_j to be factored.\nOn exit, if upper, then the elements on the diagonal and superdiagonal\ncontain the tridiagonal form T_j; the elements above the superdiagonal contain\nthe first i-1 elements of the Householder vectors v_(j_i) stored as columns.\nIf lower, then the elements on the diagonal and subdiagonal\ncontain the tridiagonal form T_j; the elements below the subdiagonal contain\nthe last n-i-1 elements of the Householder vectors v_(j_i) stored as columns.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nThe leading dimension of A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\\n\nThe diagonal elements of T_j.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to type. Array on the GPU (the size depends on the value of strideE).\\n\nThe off-diagonal elements of T_j.\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n-1.\n@param[out]\ntau         pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors tau_j of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector tau_j to the next one tau_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= n-1.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYTD2_STRIDED_BATCHED computes the tridiagonal form of a batch of real symmetric matrices A_l.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe tridiagonal form of \\f$A_l\\f$ is given by:\n\n\\f[\nT_l^{} = Q_l'  A_l^{}  Q_l^{}\n\\f]\n\nwhere \\f$T_l\\f$ is symmetric tridiagonal and \\f$Q_l\\f$ is an orthogonal matrix represented as the product\nof Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ_l = H_l(1)H_l(2)\\cdots H_l(n-1) & \\: \\text{if uplo indicates lower, or}\\\\\nQ_l = H_l(n-1)H_l(n-2)\\cdots H_l(1) & \\: \\text{if uplo indicates upper.}\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_l(i)\\f$ is given by\n\n\\f[\nH_l^{}(i) = I - \\text{tau}_l[i] \\cdot v_{l_i}^{}  v_{l_i}'\n\\f]\n\nwhere \\f$\\text{tau}_l[i]\\f$ is the corresponding Householder scalar. When uplo indicates lower, the first i\nelements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i+1] = 1\\f$. If uplo indicates upper,\nthe last n-i elements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the symmetric matrix A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A_l is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of the matrices A_l.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the matrices A_l to be factored.\nOn exit, if upper, then the elements on the diagonal and superdiagonal\ncontain the tridiagonal form T_l; the elements above the superdiagonal contain\nthe first i-1 elements of the Householder vectors v_(l_i) stored as columns.\nIf lower, then the elements on the diagonal and subdiagonal\ncontain the tridiagonal form T_l; the elements below the subdiagonal contain\nthe last n-i-1 elements of the Householder vectors v_(l_i) stored as columns.\n@param[in]\nlda         rocblas_int. lda >= n.\nThe leading dimension of A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\nThe diagonal elements of T_l.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to type. Array on the GPU (the size depends on the value of strideE).\nThe off-diagonal elements of T_l.\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n-1.\n@param[out]\ntau         pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors tau_l of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector tau_l to the next one tau_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= n-1.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssytd2_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -7374,7 +7393,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HETD2_STRIDED_BATCHED computes the tridiagonal form of a batch of complex hermitian matrices A_j.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe tridiagonal form of \\f$A_j\\f$ is given by:\n\n\\f[\nT_j = Q_j'  A_j  Q_j\n\\f]\n\nwhere \\f$T_j\\f$ is Hermitian tridiagonal and \\f$Q_j\\f$ is a unitary matrix represented as the product\nof Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ_j = H_{j_1}H_{j_2}\\cdots H_{j_{n-1}} & \\: \\text{if uplo indicates lower, or}\\\\\nQ_j = H_{j_{n-1}}H_{j_{n-2}}\\cdots H_{j_1} & \\: \\text{if uplo indicates upper.}\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_{j_i}\\f$ is given by\n\n\\f[\nH_{j_i} = I - \\text{tau}_j[i] \\cdot v_{j_i}  v_{j_i}'\n\\f]\n\nwhere \\f$\\text{tau}_j[i]\\f$ is the corresponding Householder scalar. When uplo indicates lower, the first i\nelements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i+1] = 1\\f$. If uplo indicates upper,\nthe last n-i elements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the hermitian matrix A_j is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of the matrices A_j.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_j to be factored.\nOn exit, if upper, then the elements on the diagonal and superdiagonal\ncontain the tridiagonal form T_j; the elements above the superdiagonal contain\nthe first i-1 elements of the Householder vectors v_(j_i) stored as columns.\nIf lower, then the elements on the diagonal and subdiagonal\ncontain the tridiagonal form T_j; the elements below the subdiagonal contain\nthe last n-i-1 elements of the Householder vectors v_(j_i) stored as columns.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nThe leading dimension of A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\\n\nThe diagonal elements of T_j.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\\n\nThe off-diagonal elements of T_j.\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n-1.\n@param[out]\ntau         pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors tau_j of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector tau_j to the next one tau_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= n-1.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief HETD2_STRIDED_BATCHED computes the tridiagonal form of a batch of complex hermitian matrices A_l.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe tridiagonal form of \\f$A_l\\f$ is given by:\n\n\\f[\nT_l^{} = Q_l'  A_l^{}  Q_l^{}\n\\f]\n\nwhere \\f$T_l\\f$ is Hermitian tridiagonal and \\f$Q_l\\f$ is a unitary matrix represented as the product\nof Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ_l = H_l(1)H_l(2)\\cdots H_l(n-1) & \\: \\text{if uplo indicates lower, or}\\\\\nQ_l = H_l(n-1)H_l(n-2)\\cdots H_l(1) & \\: \\text{if uplo indicates upper.}\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_l(i)\\f$ is given by\n\n\\f[\nH_l^{}(i) = I - \\text{tau}_l[i] \\cdot v_{l_i}^{}  v_{l_i}'\n\\f]\n\nwhere \\f$\\text{tau}_l[i]\\f$ is the corresponding Householder scalar. When uplo indicates lower, the first i\nelements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i+1] = 1\\f$. If uplo indicates upper,\nthe last n-i elements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the hermitian matrix A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A_l is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of the matrices A_l.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the matrices A_l to be factored.\nOn exit, if upper, then the elements on the diagonal and superdiagonal\ncontain the tridiagonal form T_l; the elements above the superdiagonal contain\nthe first i-1 elements of the Householder vectors v_(l_i) stored as columns.\nIf lower, then the elements on the diagonal and subdiagonal\ncontain the tridiagonal form T_l; the elements below the subdiagonal contain\nthe last n-i-1 elements of the Householder vectors v_(l_i) stored as columns.\n@param[in]\nlda         rocblas_int. lda >= n.\nThe leading dimension of A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\nThe diagonal elements of T_l.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\nThe off-diagonal elements of T_l.\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n-1.\n@param[out]\ntau         pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors tau_l of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector tau_l to the next one tau_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= n-1.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_chetd2_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -7411,7 +7430,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYTRD computes the tridiagonal form of a real symmetric matrix A.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe tridiagonal form is given by:\n\n\\f[\nT = Q'  A  Q\n\\f]\n\nwhere T is symmetric tridiagonal and Q is an orthogonal matrix represented as the product\nof Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ = H_1H_2\\cdots H_{n-1} & \\: \\text{if uplo indicates lower, or}\\\\\nQ = H_{n-1}H_{n-2}\\cdots H_1 & \\: \\text{if uplo indicates upper.}\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_i\\f$ is given by\n\n\\f[\nH_i = I - \\text{tau}[i] \\cdot v_i  v_i'\n\\f]\n\nwhere tau[i] is the corresponding Householder scalar. When uplo indicates lower, the first i\nelements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i+1] = 1\\f$. If uplo indicates upper,\nthe last n-i elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the symmetric matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix to be factored.\nOn exit, if upper, then the elements on the diagonal and superdiagonal\ncontain the tridiagonal form T; the elements above the superdiagonal contain\nthe first i-1 elements of the Householder vectors v_i stored as columns.\nIf lower, then the elements on the diagonal and subdiagonal\ncontain the tridiagonal form T; the elements below the subdiagonal contain\nthe last n-i-1 elements of the Householder vectors v_i stored as columns.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nThe leading dimension of A.\n@param[out]\nD           pointer to type. Array on the GPU of dimension n.\\n\nThe diagonal elements of T.\n@param[out]\nE           pointer to type. Array on the GPU of dimension n-1.\\n\nThe off-diagonal elements of T.\n@param[out]\ntau         pointer to type. Array on the GPU of dimension n-1.\\n\nThe Householder scalars."]
+    #[doc = " @{\n\\brief SYTRD computes the tridiagonal form of a real symmetric matrix A.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe tridiagonal form is given by:\n\n\\f[\nT = Q'  A  Q\n\\f]\n\nwhere T is symmetric tridiagonal and Q is an orthogonal matrix represented as the product\nof Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ = H(1)H(2)\\cdots H_(n-1) & \\: \\text{if uplo indicates lower, or}\\\\\nQ = H(n-1)H(n-2)\\cdots H(1) & \\: \\text{if uplo indicates upper.}\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H(i)\\f$ is given by\n\n\\f[\nH(i) = I - \\text{tau}[i] \\cdot v_i^{}  v_i'\n\\f]\n\nwhere tau[i] is the corresponding Householder scalar. When uplo indicates lower, the first i\nelements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i+1] = 1\\f$. If uplo indicates upper,\nthe last n-i elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the symmetric matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix to be factored.\nOn exit, if upper, then the elements on the diagonal and superdiagonal\ncontain the tridiagonal form T; the elements above the superdiagonal contain\nthe first i-1 elements of the Householder vectors v_i stored as columns.\nIf lower, then the elements on the diagonal and subdiagonal\ncontain the tridiagonal form T; the elements below the subdiagonal contain\nthe last n-i-1 elements of the Householder vectors v_i stored as columns.\n@param[in]\nlda         rocblas_int. lda >= n.\nThe leading dimension of A.\n@param[out]\nD           pointer to type. Array on the GPU of dimension n.\nThe diagonal elements of T.\n@param[out]\nE           pointer to type. Array on the GPU of dimension n-1.\nThe off-diagonal elements of T.\n@param[out]\ntau         pointer to type. Array on the GPU of dimension n-1.\nThe Householder scalars."]
     pub fn rocsolver_ssytrd(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -7438,7 +7457,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HETRD computes the tridiagonal form of a complex hermitian matrix A.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe tridiagonal form is given by:\n\n\\f[\nT = Q'  A  Q\n\\f]\n\nwhere T is hermitian tridiagonal and Q is an unitary matrix represented as the product\nof Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ = H_1H_2\\cdots H_{n-1} & \\: \\text{if uplo indicates lower, or}\\\\\nQ = H_{n-1}H_{n-2}\\cdots H_1 & \\: \\text{if uplo indicates upper.}\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_i\\f$ is given by\n\n\\f[\nH_i = I - \\text{tau}[i] \\cdot v_i  v_i'\n\\f]\n\nwhere tau[i] is the corresponding Householder scalar. When uplo indicates lower, the first i\nelements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i+1] = 1\\f$. If uplo indicates upper,\nthe last n-i elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the hermitian matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix to be factored.\nOn exit, if upper, then the elements on the diagonal and superdiagonal\ncontain the tridiagonal form T; the elements above the superdiagonal contain\nthe first i-1 elements of the Householder vectors v_i stored as columns.\nIf lower, then the elements on the diagonal and subdiagonal\ncontain the tridiagonal form T; the elements below the subdiagonal contain\nthe last n-i-1 elements of the Householder vectors v_i stored as columns.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nThe leading dimension of A.\n@param[out]\nD           pointer to real type. Array on the GPU of dimension n.\\n\nThe diagonal elements of T.\n@param[out]\nE           pointer to real type. Array on the GPU of dimension n-1.\\n\nThe off-diagonal elements of T.\n@param[out]\ntau         pointer to type. Array on the GPU of dimension n-1.\\n\nThe Householder scalars."]
+    #[doc = " @{\n\\brief HETRD computes the tridiagonal form of a complex hermitian matrix A.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe tridiagonal form is given by:\n\n\\f[\nT = Q'  A  Q\n\\f]\n\nwhere T is hermitian tridiagonal and Q is an unitary matrix represented as the product\nof Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ = H(1)H(2)\\cdots H(n-1) & \\: \\text{if uplo indicates lower, or}\\\\\nQ = H(n-1)H(n-2)\\cdots H(1) & \\: \\text{if uplo indicates upper.}\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H(i)\\f$ is given by\n\n\\f[\nH(i) = I - \\text{tau}[i] \\cdot v_i^{}  v_i'\n\\f]\n\nwhere tau[i] is the corresponding Householder scalar. When uplo indicates lower, the first i\nelements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i+1] = 1\\f$. If uplo indicates upper,\nthe last n-i elements of the Householder vector \\f$v_i\\f$ are zero, and \\f$v_i[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the hermitian matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix to be factored.\nOn exit, if upper, then the elements on the diagonal and superdiagonal\ncontain the tridiagonal form T; the elements above the superdiagonal contain\nthe first i-1 elements of the Householder vectors v_i stored as columns.\nIf lower, then the elements on the diagonal and subdiagonal\ncontain the tridiagonal form T; the elements below the subdiagonal contain\nthe last n-i-1 elements of the Householder vectors v_i stored as columns.\n@param[in]\nlda         rocblas_int. lda >= n.\nThe leading dimension of A.\n@param[out]\nD           pointer to real type. Array on the GPU of dimension n.\nThe diagonal elements of T.\n@param[out]\nE           pointer to real type. Array on the GPU of dimension n-1.\nThe off-diagonal elements of T.\n@param[out]\ntau         pointer to type. Array on the GPU of dimension n-1.\nThe Householder scalars."]
     pub fn rocsolver_chetrd(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -7465,7 +7484,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYTRD_BATCHED computes the tridiagonal form of a batch of real symmetric matrices A_j.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe tridiagonal form of \\f$A_j\\f$ is given by:\n\n\\f[\nT_j = Q_j'  A_j  Q_j\n\\f]\n\nwhere \\f$T_j\\f$ is symmetric tridiagonal and \\f$Q_j\\f$ is an orthogonal matrix represented as the product\nof Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ_j = H_{j_1}H_{j_2}\\cdots H_{j_{n-1}} & \\: \\text{if uplo indicates lower, or}\\\\\nQ_j = H_{j_{n-1}}H_{j_{n-2}}\\cdots H_{j_1} & \\: \\text{if uplo indicates upper.}\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_{j_i}\\f$ is given by\n\n\\f[\nH_{j_i} = I - \\text{tau}_j[i] \\cdot v_{j_i}  v_{j_i}'\n\\f]\n\nwhere \\f$\\text{tau}_j[i]\\f$ is the corresponding Householder scalar. When uplo indicates lower, the first i\nelements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i+1] = 1\\f$. If uplo indicates upper,\nthe last n-i elements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the symmetric matrix A_j is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of the matrices A_j.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the matrices A_j to be factored.\nOn exit, if upper, then the elements on the diagonal and superdiagonal\ncontain the tridiagonal form T_j; the elements above the superdiagonal contain\nthe first i-1 elements of the Householder vectors v_(j_i) stored as columns.\nIf lower, then the elements on the diagonal and subdiagonal\ncontain the tridiagonal form T_j; the elements below the subdiagonal contain\nthe last n-i-1 elements of the Householder vectors v_(j_i) stored as columns.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nThe leading dimension of A_j.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\\n\nThe diagonal elements of T_j.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to type. Array on the GPU (the size depends on the value of strideE).\\n\nThe off-diagonal elements of T_j.\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n-1.\n@param[out]\ntau         pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors tau_j of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector tau_j to the next one tau_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= n-1.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYTRD_BATCHED computes the tridiagonal form of a batch of real symmetric matrices A_l.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe tridiagonal form of \\f$A_l\\f$ is given by:\n\n\\f[\nT_l^{} = Q_l'  A_l^{}  Q_l^{}\n\\f]\n\nwhere \\f$T_l\\f$ is symmetric tridiagonal and \\f$Q_l\\f$ is an orthogonal matrix represented as the product\nof Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ_l = H_l(1)H_l(2)\\cdots H_l(n-1) & \\: \\text{if uplo indicates lower, or}\\\\\nQ_l = H_l(n-1)H_l(n-2)\\cdots H_l(1) & \\: \\text{if uplo indicates upper.}\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_l(i)\\f$ is given by\n\n\\f[\nH_l^{}(i) = I - \\text{tau}_l[i] \\cdot v_{l_i}^{}  v_{l_i}'\n\\f]\n\nwhere \\f$\\text{tau}_l[i]\\f$ is the corresponding Householder scalar. When uplo indicates lower, the first i\nelements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i+1] = 1\\f$. If uplo indicates upper,\nthe last n-i elements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the symmetric matrix A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A_l is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of the matrices A_l.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the matrices A_l to be factored.\nOn exit, if upper, then the elements on the diagonal and superdiagonal\ncontain the tridiagonal form T_l; the elements above the superdiagonal contain\nthe first i-1 elements of the Householder vectors v_(l_i) stored as columns.\nIf lower, then the elements on the diagonal and subdiagonal\ncontain the tridiagonal form T_l; the elements below the subdiagonal contain\nthe last n-i-1 elements of the Householder vectors v_(l_i) stored as columns.\n@param[in]\nlda         rocblas_int. lda >= n.\nThe leading dimension of A_l.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\nThe diagonal elements of T_l.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to type. Array on the GPU (the size depends on the value of strideE).\nThe off-diagonal elements of T_l.\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n-1.\n@param[out]\ntau         pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors tau_l of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector tau_l to the next one tau_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= n-1.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssytrd_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -7500,7 +7519,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HETRD_BATCHED computes the tridiagonal form of a batch of complex hermitian matrices A_j.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe tridiagonal form of \\f$A_j\\f$ is given by:\n\n\\f[\nT_j = Q_j'  A_j  Q_j\n\\f]\n\nwhere \\f$T_j\\f$ is Hermitian tridiagonal and \\f$Q_j\\f$ is a unitary matrix represented as the product\nof Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ_j = H_{j_1}H_{j_2}\\cdots H_{j_{n-1}} & \\: \\text{if uplo indicates lower, or}\\\\\nQ_j = H_{j_{n-1}}H_{j_{n-2}}\\cdots H_{j_1} & \\: \\text{if uplo indicates upper.}\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_{j_i}\\f$ is given by\n\n\\f[\nH_{j_i} = I - \\text{tau}_j[i] \\cdot v_{j_i}  v_{j_i}'\n\\f]\n\nwhere \\f$\\text{tau}_j[i]\\f$ is the corresponding Householder scalar. When uplo indicates lower, the first i\nelements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i+1] = 1\\f$. If uplo indicates upper,\nthe last n-i elements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the hermitian matrix A_j is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of the matrices A_j.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the matrices A_j to be factored.\nOn exit, if upper, then the elements on the diagonal and superdiagonal\ncontain the tridiagonal form T_j; the elements above the superdiagonal contain\nthe first i-1 elements of the Householder vectors v_(j_i) stored as columns.\nIf lower, then the elements on the diagonal and subdiagonal\ncontain the tridiagonal form T_j; the elements below the subdiagonal contain\nthe last n-i-1 elements of the Householder vectors v_(j_i) stored as columns.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nThe leading dimension of A_j.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\\n\nThe diagonal elements of T_j.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE          pointer to real type. Array on the GPU (the size depends on the value of strideE).\\n\nThe off-diagonal elements of T_j.\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n-1.\n@param[out]\ntau         pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors tau_j of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector tau_j to the next one tau_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= n-1.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief HETRD_BATCHED computes the tridiagonal form of a batch of complex hermitian matrices A_l.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe tridiagonal form of \\f$A_l\\f$ is given by:\n\n\\f[\nT_l^{} = Q_l'  A_l^{}  Q_l^{}\n\\f]\n\nwhere \\f$T_l\\f$ is Hermitian tridiagonal and \\f$Q_l\\f$ is a unitary matrix represented as the product\nof Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ_l = H_l(1)H_l(2)\\cdots H_l(n-1) & \\: \\text{if uplo indicates lower, or}\\\\\nQ_l = H_l(n-1)H_l(n-2)\\cdots H_l(1) & \\: \\text{if uplo indicates upper.}\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_l(i)\\f$ is given by\n\n\\f[\nH_l^{}(i) = I - \\text{tau}_l[i] \\cdot v_{l_i}^{}  v_{l_i}'\n\\f]\n\nwhere \\f$\\text{tau}_l[i]\\f$ is the corresponding Householder scalar. When uplo indicates lower, the first i\nelements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i+1] = 1\\f$. If uplo indicates upper,\nthe last n-i elements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the hermitian matrix A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A_l is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of the matrices A_l.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the matrices A_l to be factored.\nOn exit, if upper, then the elements on the diagonal and superdiagonal\ncontain the tridiagonal form T_l; the elements above the superdiagonal contain\nthe first i-1 elements of the Householder vectors v_(l_i) stored as columns.\nIf lower, then the elements on the diagonal and subdiagonal\ncontain the tridiagonal form T_l; the elements below the subdiagonal contain\nthe last n-i-1 elements of the Householder vectors v_(l_i) stored as columns.\n@param[in]\nlda         rocblas_int. lda >= n.\nThe leading dimension of A_l.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\nThe diagonal elements of T_l.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE          pointer to real type. Array on the GPU (the size depends on the value of strideE).\nThe off-diagonal elements of T_l.\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n-1.\n@param[out]\ntau         pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors tau_l of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector tau_l to the next one tau_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= n-1.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_chetrd_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -7535,7 +7554,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYTRD_STRIDED_BATCHED computes the tridiagonal form of a batch of real symmetric matrices A_j.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe tridiagonal form of \\f$A_j\\f$ is given by:\n\n\\f[\nT_j = Q_j'  A_j  Q_j\n\\f]\n\nwhere \\f$T_j\\f$ is symmetric tridiagonal and \\f$Q_j\\f$ is an orthogonal matrix represented as the product\nof Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ_j = H_{j_1}H_{j_2}\\cdots H_{j_{n-1}} & \\: \\text{if uplo indicates lower, or}\\\\\nQ_j = H_{j_{n-1}}H_{j_{n-2}}\\cdots H_{j_1} & \\: \\text{if uplo indicates upper.}\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_{j_i}\\f$ is given by\n\n\\f[\nH_{j_i} = I - \\text{tau}_j[i] \\cdot v_{j_i}  v_{j_i}'\n\\f]\n\nwhere \\f$\\text{tau}_j[i]\\f$ is the corresponding Householder scalar. When uplo indicates lower, the first i\nelements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i+1] = 1\\f$. If uplo indicates upper,\nthe last n-i elements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the symmetric matrix A_j is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of the matrices A_j.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_j to be factored.\nOn exit, if upper, then the elements on the diagonal and superdiagonal\ncontain the tridiagonal form T_j; the elements above the superdiagonal contain\nthe first i-1 elements of the Householder vectors v_(j_i) stored as columns.\nIf lower, then the elements on the diagonal and subdiagonal\ncontain the tridiagonal form T_j; the elements below the subdiagonal contain\nthe last n-i-1 elements of the Householder vectors v_(j_i) stored as columns.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nThe leading dimension of A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\\n\nThe diagonal elements of T_j.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to type. Array on the GPU (the size depends on the value of strideE).\\n\nThe off-diagonal elements of T_j.\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n-1.\n@param[out]\ntau         pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors tau_j of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector tau_j to the next one tau_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= n-1.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYTRD_STRIDED_BATCHED computes the tridiagonal form of a batch of real symmetric matrices A_l.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe tridiagonal form of \\f$A_l\\f$ is given by:\n\n\\f[\nT_l^{} = Q_l'  A_l^{}  Q_l^{}\n\\f]\n\nwhere \\f$T_l\\f$ is symmetric tridiagonal and \\f$Q_l\\f$ is an orthogonal matrix represented as the product\nof Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ_l = H_l(1)H_l(2)\\cdots H_l(n-1) & \\: \\text{if uplo indicates lower, or}\\\\\nQ_l = H_l(n-1)H_l(n-2)\\cdots H_l(1) & \\: \\text{if uplo indicates upper.}\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_l(i)\\f$ is given by\n\n\\f[\nH_l^{}(i) = I - \\text{tau}_l[i] \\cdot v_{l_i}^{}  v_{l_i}'\n\\f]\n\nwhere \\f$\\text{tau}_l[i]\\f$ is the corresponding Householder scalar. When uplo indicates lower, the first i\nelements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i+1] = 1\\f$. If uplo indicates upper,\nthe last n-i elements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the symmetric matrix A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A_l is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of the matrices A_l.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the matrices A_l to be factored.\nOn exit, if upper, then the elements on the diagonal and superdiagonal\ncontain the tridiagonal form T_l; the elements above the superdiagonal contain\nthe first i-1 elements of the Householder vectors v_(l_i) stored as columns.\nIf lower, then the elements on the diagonal and subdiagonal\ncontain the tridiagonal form T_l; the elements below the subdiagonal contain\nthe last n-i-1 elements of the Householder vectors v_(l_i) stored as columns.\n@param[in]\nlda         rocblas_int. lda >= n.\nThe leading dimension of A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\nThe diagonal elements of T_l.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to type. Array on the GPU (the size depends on the value of strideE).\nThe off-diagonal elements of T_l.\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n-1.\n@param[out]\ntau         pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors tau_l of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector tau_l to the next one tau_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= n-1.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssytrd_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -7572,7 +7591,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HETRD_STRIDED_BATCHED computes the tridiagonal form of a batch of complex hermitian matrices A_j.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe tridiagonal form of \\f$A_j\\f$ is given by:\n\n\\f[\nT_j = Q_j'  A_j  Q_j\n\\f]\n\nwhere \\f$T_j\\f$ is Hermitian tridiagonal and \\f$Q_j\\f$ is a unitary matrix represented as the product\nof Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ_j = H_{j_1}H_{j_2}\\cdots H_{j_{n-1}} & \\: \\text{if uplo indicates lower, or}\\\\\nQ_j = H_{j_{n-1}}H_{j_{n-2}}\\cdots H_{j_1} & \\: \\text{if uplo indicates upper.}\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_{j_i}\\f$ is given by\n\n\\f[\nH_{j_i} = I - \\text{tau}_j[i] \\cdot v_{j_i}  v_{j_i}'\n\\f]\n\nwhere \\f$\\text{tau}_j[i]\\f$ is the corresponding Householder scalar. When uplo indicates lower, the first i\nelements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i+1] = 1\\f$. If uplo indicates upper,\nthe last n-i elements of the Householder vector \\f$v_{j_i}\\f$ are zero, and \\f$v_{j_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the hermitian matrix A_j is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of the matrices A_j.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_j to be factored.\nOn exit, if upper, then the elements on the diagonal and superdiagonal\ncontain the tridiagonal form T_j; the elements above the superdiagonal contain\nthe first i-1 elements of the Householder vectors v_(j_i) stored as columns.\nIf lower, then the elements on the diagonal and subdiagonal\ncontain the tridiagonal form T_j; the elements below the subdiagonal contain\nthe last n-i-1 elements of the Householder vectors v_(j_i) stored as columns.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nThe leading dimension of A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\\n\nThe diagonal elements of T_j.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\\n\nThe off-diagonal elements of T_j.\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n-1.\n@param[out]\ntau         pointer to type. Array on the GPU (the size depends on the value of strideP).\\n\nContains the vectors tau_j of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector tau_j to the next one tau_(j+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= n-1.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief HETRD_STRIDED_BATCHED computes the tridiagonal form of a batch of complex hermitian matrices A_l.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe tridiagonal form of \\f$A_l\\f$ is given by:\n\n\\f[\nT_l^{} = Q_l'  A_l^{}  Q_l^{}\n\\f]\n\nwhere \\f$T_l\\f$ is Hermitian tridiagonal and \\f$Q_l\\f$ is a unitary matrix represented as the product\nof Householder matrices\n\n\\f[\n\\begin{array}{cl}\nQ_l = H_l(1)H_l(2)\\cdots H_l(n-1) & \\: \\text{if uplo indicates lower, or}\\\\\nQ_l = H_l(n-1)H_l(n-2)\\cdots H_l(1) & \\: \\text{if uplo indicates upper.}\n\\end{array}\n\\f]\n\nEach Householder matrix \\f$H_l(i)\\f$ is given by\n\n\\f[\nH_l^{}(i) = I - \\text{tau}_l[i] \\cdot v_{l_i}^{}  v_{l_i}'\n\\f]\n\nwhere \\f$\\text{tau}_l[i]\\f$ is the corresponding Householder scalar. When uplo indicates lower, the first i\nelements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i+1] = 1\\f$. If uplo indicates upper,\nthe last n-i elements of the Householder vector \\f$v_{l_i}\\f$ are zero, and \\f$v_{l_i}[i] = 1\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the hermitian matrix A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A_l is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of the matrices A_l.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the matrices A_l to be factored.\nOn exit, if upper, then the elements on the diagonal and superdiagonal\ncontain the tridiagonal form T_l; the elements above the superdiagonal contain\nthe first i-1 elements of the Householder vectors v_(l_i) stored as columns.\nIf lower, then the elements on the diagonal and subdiagonal\ncontain the tridiagonal form T_l; the elements below the subdiagonal contain\nthe last n-i-1 elements of the Householder vectors v_(l_i) stored as columns.\n@param[in]\nlda         rocblas_int. lda >= n.\nThe leading dimension of A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\nThe diagonal elements of T_l.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\nThe off-diagonal elements of T_l.\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n-1.\n@param[out]\ntau         pointer to type. Array on the GPU (the size depends on the value of strideP).\nContains the vectors tau_l of corresponding Householder scalars.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector tau_l to the next one tau_(l+1).\nThere is no restriction for the value\nof strideP. Normal use is strideP >= n-1.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_chetrd_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -7609,7 +7628,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYGS2 reduces a real symmetric-definite generalized eigenproblem to standard\nform.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = \\lambda B X & \\: \\text{1st form,}\\\\\nA B X = \\lambda X & \\: \\text{2nd form, or}\\\\\nB A X = \\lambda X & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype.\n\nIf the problem is of the 1st form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU^{-T} A U^{-1}, & \\: \\text{or}\\\\\nL^{-1} A L^{-T},\n\\end{array}\n\\f]\n\nwhere the symmetric-definite matrix B has been factorized as either \\f$U^T U\\f$ or\n\\f$L L^T\\f$ as returned by \\ref rocsolver_spotrf \"POTRF\", depending on the value of uplo.\n\nIf the problem is of the 2nd or 3rd form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU A U^T, & \\: \\text{or}\\\\\nL^T A L,\n\\end{array}\n\\f]\n\nalso depending on the value of uplo.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblem.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the matrix A is stored, and\nwhether the factorization applied to B was upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A and\nB are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A. On exit, the transformed matrix associated with\nthe equivalent standard eigenvalue problem.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A.\n@param[out]\nB           pointer to type. Array on the GPU of dimension ldb*n.\\n\nThe triangular factor of the matrix B, as returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B."]
+    #[doc = " @{\n\\brief SYGS2 reduces a real symmetric-definite generalized eigenproblem to standard\nform.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = \\lambda B X & \\: \\text{1st form,}\\\\\nA B X = \\lambda X & \\: \\text{2nd form, or}\\\\\nB A X = \\lambda X & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype.\n\nIf the problem is of the 1st form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU^{-T} A U^{-1}, & \\: \\text{or}\\\\\nL^{-1} A L^{-T},\n\\end{array}\n\\f]\n\nwhere the symmetric-definite matrix B has been factorized as either \\f$U^T U\\f$ or\n\\f$L L^T\\f$ as returned by \\ref rocsolver_spotrf \"POTRF\", depending on the value of uplo.\n\nIf the problem is of the 2nd or 3rd form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU A U^T, & \\: \\text{or}\\\\\nL^T A L,\n\\end{array}\n\\f]\n\nalso depending on the value of uplo.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblem.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the matrix A is stored, and\nwhether the factorization applied to B was upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A and\nB are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A. On exit, the transformed matrix associated with\nthe equivalent standard eigenvalue problem.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A.\n@param[out]\nB           pointer to type. Array on the GPU of dimension ldb*n.\nThe triangular factor of the matrix B, as returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B."]
     pub fn rocsolver_ssygs2(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -7636,7 +7655,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEGS2 reduces a hermitian-definite generalized eigenproblem to standard form.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = \\lambda B X & \\: \\text{1st form,}\\\\\nA B X = \\lambda X & \\: \\text{2nd form, or}\\\\\nB A X = \\lambda X & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype.\n\nIf the problem is of the 1st form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU^{-H} A U^{-1}, & \\: \\text{or}\\\\\nL^{-1} A L^{-H},\n\\end{array}\n\\f]\n\nwhere the hermitian-definite matrix B has been factorized as either \\f$U^H U\\f$ or\n\\f$L L^H\\f$ as returned by \\ref rocsolver_spotrf \"POTRF\", depending on the value of uplo.\n\nIf the problem is of the 2nd or 3rd form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU A U^H, & \\: \\text{or}\\\\\nL^H A L,\n\\end{array}\n\\f]\n\nalso depending on the value of uplo.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblem.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the matrix A is stored, and\nwhether the factorization applied to B was upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A and\nB are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A. On exit, the transformed matrix associated with\nthe equivalent standard eigenvalue problem.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A.\n@param[out]\nB           pointer to type. Array on the GPU of dimension ldb*n.\\n\nThe triangular factor of the matrix B, as returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B."]
+    #[doc = " @{\n\\brief HEGS2 reduces a hermitian-definite generalized eigenproblem to standard form.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = \\lambda B X & \\: \\text{1st form,}\\\\\nA B X = \\lambda X & \\: \\text{2nd form, or}\\\\\nB A X = \\lambda X & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype.\n\nIf the problem is of the 1st form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU^{-H} A U^{-1}, & \\: \\text{or}\\\\\nL^{-1} A L^{-H},\n\\end{array}\n\\f]\n\nwhere the hermitian-definite matrix B has been factorized as either \\f$U^H U\\f$ or\n\\f$L L^H\\f$ as returned by \\ref rocsolver_spotrf \"POTRF\", depending on the value of uplo.\n\nIf the problem is of the 2nd or 3rd form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU A U^H, & \\: \\text{or}\\\\\nL^H A L,\n\\end{array}\n\\f]\n\nalso depending on the value of uplo.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblem.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the matrix A is stored, and\nwhether the factorization applied to B was upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A and\nB are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A. On exit, the transformed matrix associated with\nthe equivalent standard eigenvalue problem.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A.\n@param[out]\nB           pointer to type. Array on the GPU of dimension ldb*n.\nThe triangular factor of the matrix B, as returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B."]
     pub fn rocsolver_chegs2(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -7663,7 +7682,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYGS2_BATCHED reduces a batch of real symmetric-definite generalized eigenproblems\nto standard form.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = \\lambda B_j X_j & \\: \\text{1st form,}\\\\\nA_j B_j X_j = \\lambda X_j & \\: \\text{2nd form, or}\\\\\nB_j A_j X_j = \\lambda X_j & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype.\n\nIf the problem is of the 1st form, then \\f$A_j\\f$ is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_j^{-T} A_j U_j^{-1}, & \\: \\text{or}\\\\\nL_j^{-1} A_j L_j^{-T},\n\\end{array}\n\\f]\n\nwhere the symmetric-definite matrix \\f$B_j\\f$ has been factorized as either \\f$U_j^T U_j\\f$ or\n\\f$L_j L_j^T\\f$ as returned by \\ref rocsolver_spotrf \"POTRF\", depending on the value of uplo.\n\nIf the problem is of the 2nd or 3rd form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_j A_j U_j^T, & \\: \\text{or}\\\\\nL_j^T A_j L_j,\n\\end{array}\n\\f]\n\nalso depending on the value of uplo.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the matrices A_j are stored, and\nwhether the factorization applied to B_j was upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A_j and\nB_j are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the matrices A_j. On exit, the transformed matrices associated with\nthe equivalent standard eigenvalue problems.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A_j.\n@param[out]\nB           array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*n.\\n\nThe triangular factors of the matrices B_j, as returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B_j.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYGS2_BATCHED reduces a batch of real symmetric-definite generalized eigenproblems\nto standard form.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype.\n\nIf the problem is of the 1st form, then \\f$A_l\\f$ is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_l^{-T} A_l^{} U_l^{-1}, & \\: \\text{or}\\\\\nL_l^{-1} A_l^{} L_l^{-T},\n\\end{array}\n\\f]\n\nwhere the symmetric-definite matrix \\f$B_l\\f$ has been factorized as either \\f$U_l^T U_l^{}\\f$ or\n\\f$L_l^{} L_l^T\\f$ as returned by \\ref rocsolver_spotrf \"POTRF\", depending on the value of uplo.\n\nIf the problem is of the 2nd or 3rd form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_l^{} A_l^{} U_l^T, & \\: \\text{or}\\\\\nL_l^T A_l^{} L_l^{},\n\\end{array}\n\\f]\n\nalso depending on the value of uplo.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the matrices A_l are stored, and\nwhether the factorization applied to B_l was upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A_l and\nB_l are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the matrices A_l. On exit, the transformed matrices associated with\nthe equivalent standard eigenvalue problems.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A_l.\n@param[out]\nB           array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*n.\nThe triangular factors of the matrices B_l, as returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B_l.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssygs2_batched(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -7692,7 +7711,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEGS2_BATCHED reduces a batch of hermitian-definite generalized eigenproblems to\nstandard form.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = \\lambda B_j X_j & \\: \\text{1st form,}\\\\\nA_j B_j X_j = \\lambda X_j & \\: \\text{2nd form, or}\\\\\nB_j A_j X_j = \\lambda X_j & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype.\n\nIf the problem is of the 1st form, then \\f$A_j\\f$ is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_j^{-H} A_j U_j^{-1}, & \\: \\text{or}\\\\\nL_j^{-1} A_j L_j^{-H},\n\\end{array}\n\\f]\n\nwhere the hermitian-definite matrix \\f$B_j\\f$ has been factorized as either \\f$U_j^H U_j\\f$ or\n\\f$L_j L_j^H\\f$ as returned by \\ref rocsolver_spotrf \"POTRF\", depending on the value of uplo.\n\nIf the problem is of the 2nd or 3rd form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_j A_j U_j^H, & \\: \\text{or}\\\\\nL_j^H A_j L_j,\n\\end{array}\n\\f]\n\nalso depending on the value of uplo.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the matrices A_j are stored, and\nwhether the factorization applied to B_j was upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A_j and\nB_j are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the matrices A_j. On exit, the transformed matrices associated with\nthe equivalent standard eigenvalue problems.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A_j.\n@param[out]\nB           array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*n.\\n\nThe triangular factors of the matrices B_j, as returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B_j.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief HEGS2_BATCHED reduces a batch of hermitian-definite generalized eigenproblems to\nstandard form.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype.\n\nIf the problem is of the 1st form, then \\f$A_l\\f$ is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_l^{-H} A_l^{} U_l^{-1}, & \\: \\text{or}\\\\\nL_l^{-1} A_l^{} L_l^{-H},\n\\end{array}\n\\f]\n\nwhere the hermitian-definite matrix \\f$B_l\\f$ has been factorized as either \\f$U_l^H U_l^{}\\f$ or\n\\f$L_l^{} L_l^H\\f$ as returned by \\ref rocsolver_spotrf \"POTRF\", depending on the value of uplo.\n\nIf the problem is of the 2nd or 3rd form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_l^{} A_l^{} U_l^H, & \\: \\text{or}\\\\\nL_l^H A_l^{} L_l^{},\n\\end{array}\n\\f]\n\nalso depending on the value of uplo.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the matrices A_l are stored, and\nwhether the factorization applied to B_l was upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A_l and\nB_l are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the matrices A_l. On exit, the transformed matrices associated with\nthe equivalent standard eigenvalue problems.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A_l.\n@param[out]\nB           array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*n.\nThe triangular factors of the matrices B_l, as returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B_l.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_chegs2_batched(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -7721,7 +7740,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYGS2_STRIDED_BATCHED reduces a batch of real symmetric-definite generalized\neigenproblems to standard form.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = \\lambda B_j X_j & \\: \\text{1st form,}\\\\\nA_j B_j X_j = \\lambda X_j & \\: \\text{2nd form, or}\\\\\nB_j A_j X_j = \\lambda X_j & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype.\n\nIf the problem is of the 1st form, then \\f$A_j\\f$ is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_j^{-T} A_j U_j^{-1}, & \\: \\text{or}\\\\\nL_j^{-1} A_j L_j^{-T},\n\\end{array}\n\\f]\n\nwhere the symmetric-definite matrix \\f$B_j\\f$ has been factorized as either \\f$U_j^T U_j\\f$ or\n\\f$L_j L_j^T\\f$ as returned by \\ref rocsolver_spotrf \"POTRF\", depending on the value of uplo.\n\nIf the problem is of the 2nd or 3rd form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_j A_j U_j^T, & \\: \\text{or}\\\\\nL_j^T A_j L_j,\n\\end{array}\n\\f]\n\nalso depending on the value of uplo.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the matrices A_j are stored, and\nwhether the factorization applied to B_j was upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A_j and\nB_j are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_j. On exit, the transformed matrices associated with\nthe equivalent standard eigenvalue problems.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\\n\nThe triangular factors of the matrices B_j, as returned by \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B_j.\n@param[in]\nstrideB     rocblas_stride.\\n\nStride from the start of one matrix B_j to the next one B_(j+1).\nThere is no restriction for the value of strideB. Normal use case is strideB >= ldb*n.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYGS2_STRIDED_BATCHED reduces a batch of real symmetric-definite generalized\neigenproblems to standard form.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype.\n\nIf the problem is of the 1st form, then \\f$A_l\\f$ is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_l^{-T} A_l^{} U_l^{-1}, & \\: \\text{or}\\\\\nL_l^{-1} A_l^{} L_l^{-T},\n\\end{array}\n\\f]\n\nwhere the symmetric-definite matrix \\f$B_l\\f$ has been factorized as either \\f$U_l^T U_l^{}\\f$ or\n\\f$L_l^{} L_l^T\\f$ as returned by \\ref rocsolver_spotrf \"POTRF\", depending on the value of uplo.\n\nIf the problem is of the 2nd or 3rd form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_l^{} A_l^{} U_l^T, & \\: \\text{or}\\\\\nL_l^T A_l^{} L_l^{},\n\\end{array}\n\\f]\n\nalso depending on the value of uplo.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the matrices A_l are stored, and\nwhether the factorization applied to B_l was upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A_l and\nB_l are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the matrices A_l. On exit, the transformed matrices associated with\nthe equivalent standard eigenvalue problems.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\nThe triangular factors of the matrices B_l, as returned by \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B_l.\n@param[in]\nstrideB     rocblas_stride.\nStride from the start of one matrix B_l to the next one B_(l+1).\nThere is no restriction for the value of strideB. Normal use case is strideB >= ldb*n.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssygs2_strided_batched(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -7754,7 +7773,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEGS2_STRIDED_BATCHED reduces a batch of hermitian-definite generalized\neigenproblems to standard form.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = \\lambda B_j X_j & \\: \\text{1st form,}\\\\\nA_j B_j X_j = \\lambda X_j & \\: \\text{2nd form, or}\\\\\nB_j A_j X_j = \\lambda X_j & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype.\n\nIf the problem is of the 1st form, then \\f$A_j\\f$ is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_j^{-H} A_j U_j^{-1}, & \\: \\text{or}\\\\\nL_j^{-1} A_j L_j^{-H},\n\\end{array}\n\\f]\n\nwhere the hermitian-definite matrix \\f$B_j\\f$ has been factorized as either \\f$U_j^H U_j\\f$ or\n\\f$L_j L_j^H\\f$ as returned by \\ref rocsolver_spotrf \"POTRF\", depending on the value of uplo.\n\nIf the problem is of the 2nd or 3rd form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_j A_j U_j^H, & \\: \\text{or}\\\\\nL_j^H A_j L_j,\n\\end{array}\n\\f]\n\nalso depending on the value of uplo.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the matrices A_j are stored, and\nwhether the factorization applied to B_j was upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A_j and\nB_j are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_j. On exit, the transformed matrices associated with\nthe equivalent standard eigenvalue problems.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\\n\nThe triangular factors of the matrices B_j, as returned by \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B_j.\n@param[in]\nstrideB     rocblas_stride.\\n\nStride from the start of one matrix B_j to the next one B_(j+1).\nThere is no restriction for the value of strideB. Normal use case is strideB >= ldb*n.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief HEGS2_STRIDED_BATCHED reduces a batch of hermitian-definite generalized\neigenproblems to standard form.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype.\n\nIf the problem is of the 1st form, then \\f$A_l\\f$ is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_l^{-H} A_l^{} U_l^{-1}, & \\: \\text{or}\\\\\nL_l^{-1} A_l^{} L_l^{-H},\n\\end{array}\n\\f]\n\nwhere the hermitian-definite matrix \\f$B_l\\f$ has been factorized as either \\f$U_l^H U_l^{}\\f$ or\n\\f$L_l^{} L_l^H\\f$ as returned by \\ref rocsolver_spotrf \"POTRF\", depending on the value of uplo.\n\nIf the problem is of the 2nd or 3rd form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_l^{} A_l^{} U_l^H, & \\: \\text{or}\\\\\nL_l^H A_l^{} L_l^{},\n\\end{array}\n\\f]\n\nalso depending on the value of uplo.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the matrices A_l are stored, and\nwhether the factorization applied to B_l was upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A_l and\nB_l are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the matrices A_l. On exit, the transformed matrices associated with\nthe equivalent standard eigenvalue problems.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\nThe triangular factors of the matrices B_l, as returned by \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B_l.\n@param[in]\nstrideB     rocblas_stride.\nStride from the start of one matrix B_l to the next one B_(l+1).\nThere is no restriction for the value of strideB. Normal use case is strideB >= ldb*n.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_chegs2_strided_batched(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -7787,7 +7806,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYGST reduces a real symmetric-definite generalized eigenproblem to standard\nform.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = \\lambda B X & \\: \\text{1st form,}\\\\\nA B X = \\lambda X & \\: \\text{2nd form, or}\\\\\nB A X = \\lambda X & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype.\n\nIf the problem is of the 1st form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU^{-T} A U^{-1}, & \\: \\text{or}\\\\\nL^{-1} A L^{-T},\n\\end{array}\n\\f]\n\nwhere the symmetric-definite matrix B has been factorized as either \\f$U^T U\\f$ or\n\\f$L L^T\\f$ as returned by \\ref rocsolver_spotrf \"POTRF\", depending on the value of uplo.\n\nIf the problem is of the 2nd or 3rd form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU A U^T, & \\: \\text{or}\\\\\nL^T A L,\n\\end{array}\n\\f]\n\nalso depending on the value of uplo.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblem.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the matrix A is stored, and\nwhether the factorization applied to B was upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A and\nB are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A. On exit, the transformed matrix associated with\nthe equivalent standard eigenvalue problem.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A.\n@param[out]\nB           pointer to type. Array on the GPU of dimension ldb*n.\\n\nThe triangular factor of the matrix B, as returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B."]
+    #[doc = " @{\n\\brief SYGST reduces a real symmetric-definite generalized eigenproblem to standard\nform.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = \\lambda B X & \\: \\text{1st form,}\\\\\nA B X = \\lambda X & \\: \\text{2nd form, or}\\\\\nB A X = \\lambda X & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype.\n\nIf the problem is of the 1st form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU^{-T} A U^{-1}, & \\: \\text{or}\\\\\nL^{-1} A L^{-T},\n\\end{array}\n\\f]\n\nwhere the symmetric-definite matrix B has been factorized as either \\f$U^T U\\f$ or\n\\f$L L^T\\f$ as returned by \\ref rocsolver_spotrf \"POTRF\", depending on the value of uplo.\n\nIf the problem is of the 2nd or 3rd form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU A U^T, & \\: \\text{or}\\\\\nL^T A L,\n\\end{array}\n\\f]\n\nalso depending on the value of uplo.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblem.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the matrix A is stored, and\nwhether the factorization applied to B was upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A and\nB are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A. On exit, the transformed matrix associated with\nthe equivalent standard eigenvalue problem.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A.\n@param[out]\nB           pointer to type. Array on the GPU of dimension ldb*n.\nThe triangular factor of the matrix B, as returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B."]
     pub fn rocsolver_ssygst(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -7814,7 +7833,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEGST reduces a hermitian-definite generalized eigenproblem to standard form.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = \\lambda B X & \\: \\text{1st form,}\\\\\nA B X = \\lambda X & \\: \\text{2nd form, or}\\\\\nB A X = \\lambda X & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype.\n\nIf the problem is of the 1st form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU^{-H} A U^{-1}, & \\: \\text{or}\\\\\nL^{-1} A L^{-H},\n\\end{array}\n\\f]\n\nwhere the hermitian-definite matrix B has been factorized as either \\f$U^H U\\f$ or\n\\f$L L^H\\f$ as returned by \\ref rocsolver_spotrf \"POTRF\", depending on the value of uplo.\n\nIf the problem is of the 2nd or 3rd form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU A U^H, & \\: \\text{or}\\\\\nL^H A L,\n\\end{array}\n\\f]\n\nalso depending on the value of uplo.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblem.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the matrix A is stored, and\nwhether the factorization applied to B was upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A and\nB are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A. On exit, the transformed matrix associated with\nthe equivalent standard eigenvalue problem.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A.\n@param[out]\nB           pointer to type. Array on the GPU of dimension ldb*n.\\n\nThe triangular factor of the matrix B, as returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B."]
+    #[doc = " @{\n\\brief HEGST reduces a hermitian-definite generalized eigenproblem to standard form.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = \\lambda B X & \\: \\text{1st form,}\\\\\nA B X = \\lambda X & \\: \\text{2nd form, or}\\\\\nB A X = \\lambda X & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype.\n\nIf the problem is of the 1st form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU^{-H} A U^{-1}, & \\: \\text{or}\\\\\nL^{-1} A L^{-H},\n\\end{array}\n\\f]\n\nwhere the hermitian-definite matrix B has been factorized as either \\f$U^H U\\f$ or\n\\f$L L^H\\f$ as returned by \\ref rocsolver_spotrf \"POTRF\", depending on the value of uplo.\n\nIf the problem is of the 2nd or 3rd form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU A U^H, & \\: \\text{or}\\\\\nL^H A L,\n\\end{array}\n\\f]\n\nalso depending on the value of uplo.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblem.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the matrix A is stored, and\nwhether the factorization applied to B was upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A and\nB are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A. On exit, the transformed matrix associated with\nthe equivalent standard eigenvalue problem.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A.\n@param[out]\nB           pointer to type. Array on the GPU of dimension ldb*n.\nThe triangular factor of the matrix B, as returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B."]
     pub fn rocsolver_chegst(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -7841,7 +7860,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYGST_BATCHED reduces a batch of real symmetric-definite generalized eigenproblems\nto standard form.\n\n\\details\n(This is the blocked version of the algorithm).\n\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = \\lambda B_j X_j & \\: \\text{1st form,}\\\\\nA_j B_j X_j = \\lambda X_j & \\: \\text{2nd form, or}\\\\\nB_j A_j X_j = \\lambda X_j & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype.\n\nIf the problem is of the 1st form, then \\f$A_j\\f$ is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_j^{-T} A_j U_j^{-1}, & \\: \\text{or}\\\\\nL_j^{-1} A_j L_j^{-T},\n\\end{array}\n\\f]\n\nwhere the symmetric-definite matrix \\f$B_j\\f$ has been factorized as either \\f$U_j^T U_j\\f$ or\n\\f$L_j L_j^T\\f$ as returned by \\ref rocsolver_spotrf \"POTRF\", depending on the value of uplo.\n\nIf the problem is of the 2nd or 3rd form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_j A_j U_j^T, & \\: \\text{or}\\\\\nL_j^T A_j L_j,\n\\end{array}\n\\f]\n\nalso depending on the value of uplo.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the matrices A_j are stored, and\nwhether the factorization applied to B_j was upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A_j and\nB_j are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the matrices A_j. On exit, the transformed matrices associated with\nthe equivalent standard eigenvalue problems.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A_j.\n@param[out]\nB           array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*n.\\n\nThe triangular factors of the matrices B_j, as returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B_j.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYGST_BATCHED reduces a batch of real symmetric-definite generalized eigenproblems\nto standard form.\n\n\\details\n(This is the blocked version of the algorithm).\n\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype.\n\nIf the problem is of the 1st form, then \\f$A_l\\f$ is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_l^{-T} A_l^{} U_l^{-1}, & \\: \\text{or}\\\\\nL_l^{-1} A_l^{} L_l^{-T},\n\\end{array}\n\\f]\n\nwhere the symmetric-definite matrix \\f$B_l\\f$ has been factorized as either \\f$U_l^T U_l^{}\\f$ or\n\\f$L_l^{} L_l^T\\f$ as returned by \\ref rocsolver_spotrf \"POTRF\", depending on the value of uplo.\n\nIf the problem is of the 2nd or 3rd form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_l^{} A_l^{} U_l^T, & \\: \\text{or}\\\\\nL_l^T A_l^{} L_l^{},\n\\end{array}\n\\f]\n\nalso depending on the value of uplo.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the matrices A_l are stored, and\nwhether the factorization applied to B_l was upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A_l and\nB_l are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the matrices A_l. On exit, the transformed matrices associated with\nthe equivalent standard eigenvalue problems.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A_l.\n@param[out]\nB           array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*n.\nThe triangular factors of the matrices B_l, as returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B_l.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssygst_batched(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -7870,7 +7889,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEGST_BATCHED reduces a batch of hermitian-definite generalized eigenproblems to\nstandard form.\n\n\\details\n(This is the blocked version of the algorithm).\n\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = \\lambda B_j X_j & \\: \\text{1st form,}\\\\\nA_j B_j X_j = \\lambda X_j & \\: \\text{2nd form, or}\\\\\nB_j A_j X_j = \\lambda X_j & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype.\n\nIf the problem is of the 1st form, then \\f$A_j\\f$ is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_j^{-H} A_j U_j^{-1}, & \\: \\text{or}\\\\\nL_j^{-1} A_j L_j^{-H},\n\\end{array}\n\\f]\n\nwhere the hermitian-definite matrix \\f$B_j\\f$ has been factorized as either \\f$U_j^H U_j\\f$ or\n\\f$L_j L_j^H\\f$ as returned by \\ref rocsolver_spotrf \"POTRF\", depending on the value of uplo.\n\nIf the problem is of the 2nd or 3rd form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_j A_j U_j^H, & \\: \\text{or}\\\\\nL_j^H A_j L_j,\n\\end{array}\n\\f]\n\nalso depending on the value of uplo.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the matrices A_j are stored, and\nwhether the factorization applied to B_j was upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A_j and\nB_j are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the matrices A_j. On exit, the transformed matrices associated with\nthe equivalent standard eigenvalue problems.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A_j.\n@param[out]\nB           array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*n.\\n\nThe triangular factors of the matrices B_j, as returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B_j.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief HEGST_BATCHED reduces a batch of hermitian-definite generalized eigenproblems to\nstandard form.\n\n\\details\n(This is the blocked version of the algorithm).\n\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype.\n\nIf the problem is of the 1st form, then \\f$A_l\\f$ is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_l^{-H} A_l^{} U_l^{-1}, & \\: \\text{or}\\\\\nL_l^{-1} A_l^{} L_l^{-H},\n\\end{array}\n\\f]\n\nwhere the hermitian-definite matrix \\f$B_l\\f$ has been factorized as either \\f$U_l^H U_l^{}\\f$ or\n\\f$L_l^{} L_l^H\\f$ as returned by \\ref rocsolver_spotrf \"POTRF\", depending on the value of uplo.\n\nIf the problem is of the 2nd or 3rd form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_l^{} A_l^{} U_l^H, & \\: \\text{or}\\\\\nL_l^H A_l^{} L_l^{},\n\\end{array}\n\\f]\n\nalso depending on the value of uplo.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the matrices A_l are stored, and\nwhether the factorization applied to B_l was upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A_l and\nB_l are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the matrices A_l. On exit, the transformed matrices associated with\nthe equivalent standard eigenvalue problems.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A_l.\n@param[out]\nB           array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*n.\nThe triangular factors of the matrices B_l, as returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B_l.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_chegst_batched(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -7899,7 +7918,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYGST_STRIDED_BATCHED reduces a batch of real symmetric-definite generalized\neigenproblems to standard form.\n\n\\details\n(This is the blocked version of the algorithm).\n\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = \\lambda B_j X_j & \\: \\text{1st form,}\\\\\nA_j B_j X_j = \\lambda X_j & \\: \\text{2nd form, or}\\\\\nB_j A_j X_j = \\lambda X_j & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype.\n\nIf the problem is of the 1st form, then \\f$A_j\\f$ is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_j^{-T} A_j U_j^{-1}, & \\: \\text{or}\\\\\nL_j^{-1} A_j L_j^{-T},\n\\end{array}\n\\f]\n\nwhere the symmetric-definite matrix \\f$B_j\\f$ has been factorized as either \\f$U_j^T U_j\\f$ or\n\\f$L_j L_j^T\\f$ as returned by \\ref rocsolver_spotrf \"POTRF\", depending on the value of uplo.\n\nIf the problem is of the 2nd or 3rd form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_j A_j U_j^T, & \\: \\text{or}\\\\\nL_j^T A_j L_j,\n\\end{array}\n\\f]\n\nalso depending on the value of uplo.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the matrices A_j are stored, and\nwhether the factorization applied to B_j was upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A_j and\nB_j are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_j. On exit, the transformed matrices associated with\nthe equivalent standard eigenvalue problems.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\\n\nThe triangular factors of the matrices B_j, as returned by \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B_j.\n@param[in]\nstrideB     rocblas_stride.\\n\nStride from the start of one matrix B_j to the next one B_(j+1).\nThere is no restriction for the value of strideB. Normal use case is strideB >= ldb*n.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYGST_STRIDED_BATCHED reduces a batch of real symmetric-definite generalized\neigenproblems to standard form.\n\n\\details\n(This is the blocked version of the algorithm).\n\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype.\n\nIf the problem is of the 1st form, then \\f$A_l\\f$ is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_l^{-T} A_l^{} U_l^{-1}, & \\: \\text{or}\\\\\nL_l^{-1} A_l^{} L_l^{-T},\n\\end{array}\n\\f]\n\nwhere the symmetric-definite matrix \\f$B_l\\f$ has been factorized as either \\f$U_l^T U_l^{}\\f$ or\n\\f$L_l^{} L_l^T\\f$ as returned by \\ref rocsolver_spotrf \"POTRF\", depending on the value of uplo.\n\nIf the problem is of the 2nd or 3rd form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_l^{} A_l^{} U_l^T, & \\: \\text{or}\\\\\nL_l^T A_l^{} L_l^{},\n\\end{array}\n\\f]\n\nalso depending on the value of uplo.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the matrices A_l are stored, and\nwhether the factorization applied to B_l was upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A_l and\nB_l are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the matrices A_l. On exit, the transformed matrices associated with\nthe equivalent standard eigenvalue problems.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\nThe triangular factors of the matrices B_l, as returned by \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B_l.\n@param[in]\nstrideB     rocblas_stride.\nStride from the start of one matrix B_l to the next one B_(l+1).\nThere is no restriction for the value of strideB. Normal use case is strideB >= ldb*n.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssygst_strided_batched(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -7932,7 +7951,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEGST_STRIDED_BATCHED reduces a batch of hermitian-definite generalized\neigenproblems to standard form.\n\n\\details\n(This is the blocked version of the algorithm).\n\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = \\lambda B_j X_j & \\: \\text{1st form,}\\\\\nA_j B_j X_j = \\lambda X_j & \\: \\text{2nd form, or}\\\\\nB_j A_j X_j = \\lambda X_j & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype.\n\nIf the problem is of the 1st form, then \\f$A_j\\f$ is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_j^{-H} A_j U_j^{-1}, & \\: \\text{or}\\\\\nL_j^{-1} A_j L_j^{-H},\n\\end{array}\n\\f]\n\nwhere the hermitian-definite matrix \\f$B_j\\f$ has been factorized as either \\f$U_j^H U_j\\f$ or\n\\f$L_j L_j^H\\f$ as returned by \\ref rocsolver_spotrf \"POTRF\", depending on the value of uplo.\n\nIf the problem is of the 2nd or 3rd form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_j A_j U_j^H, & \\: \\text{or}\\\\\nL_j^H A_j L_j,\n\\end{array}\n\\f]\n\nalso depending on the value of uplo.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the matrices A_j are stored, and\nwhether the factorization applied to B_j was upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A_j and\nB_j are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_j. On exit, the transformed matrices associated with\nthe equivalent standard eigenvalue problems.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\\n\nThe triangular factors of the matrices B_j, as returned by \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B_j.\n@param[in]\nstrideB     rocblas_stride.\\n\nStride from the start of one matrix B_j to the next one B_(j+1).\nThere is no restriction for the value of strideB. Normal use case is strideB >= ldb*n.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief HEGST_STRIDED_BATCHED reduces a batch of hermitian-definite generalized\neigenproblems to standard form.\n\n\\details\n(This is the blocked version of the algorithm).\n\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype.\n\nIf the problem is of the 1st form, then \\f$A_l\\f$ is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_l^{-H} A_l^{} U_l^{-1}, & \\: \\text{or}\\\\\nL_l^{-1} A_l^{} L_l^{-H},\n\\end{array}\n\\f]\n\nwhere the hermitian-definite matrix \\f$B_l\\f$ has been factorized as either \\f$U_l^H U_l^{}\\f$ or\n\\f$L_l^{} L_l^H\\f$ as returned by \\ref rocsolver_spotrf \"POTRF\", depending on the value of uplo.\n\nIf the problem is of the 2nd or 3rd form, then A is overwritten with\n\n\\f[\n\\begin{array}{cl}\nU_l^{} A_l^{} U_l^H, & \\: \\text{or}\\\\\nL_l^H A_l^{} L_l^{},\n\\end{array}\n\\f]\n\nalso depending on the value of uplo.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the matrices A_l are stored, and\nwhether the factorization applied to B_l was upper or lower triangular.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A_l and\nB_l are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the matrices A_l. On exit, the transformed matrices associated with\nthe equivalent standard eigenvalue problems.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\nThe triangular factors of the matrices B_l, as returned by \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B_l.\n@param[in]\nstrideB     rocblas_stride.\nStride from the start of one matrix B_l to the next one B_(l+1).\nThere is no restriction for the value of strideB. Normal use case is strideB >= ldb*n.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_chegst_strided_batched(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -7965,7 +7984,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYEV computes the eigenvalues and optionally the eigenvectors of a real symmetric\nmatrix A.\n\n\\details\nThe eigenvalues are returned in ascending order. The eigenvectors are computed depending\non the value of evect. The computed eigenvectors are orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the symmetric matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A. On exit, the eigenvectors of A if they were computed and\nthe algorithm converged; otherwise the contents of A are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrix A.\n@param[out]\nD           pointer to type. Array on the GPU of dimension n.\\n\nThe eigenvalues of A in increasing order.\n@param[out]\nE           pointer to type. Array on the GPU of dimension n.\\n\nThis array is used to work internally with the tridiagonal matrix T associated with A.\nOn exit, if info > 0, it contains the unconverged off-diagonal elements of T\n(or properly speaking, a tridiagonal matrix equivalent to T). The diagonal elements\nof this matrix are in D; those that converged correspond to a subset of the\neigenvalues of A (not necessarily ordered).\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit. If info = i > 0, the algorithm did not converge.\ni elements of E did not converge to zero."]
+    #[doc = " @{\n\\brief SYEV computes the eigenvalues and optionally the eigenvectors of a real symmetric\nmatrix A.\n\n\\details\nThe eigenvalues are returned in ascending order. The eigenvectors are computed depending\non the value of evect. The computed eigenvectors are orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the symmetric matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of rows and columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A. On exit, the eigenvectors of A if they were computed and\nthe algorithm converged; otherwise the contents of A are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrix A.\n@param[out]\nD           pointer to type. Array on the GPU of dimension n.\nThe eigenvalues of A in increasing order.\n@param[out]\nE           pointer to type. Array on the GPU of dimension n.\nThis array is used to work internally with the tridiagonal matrix T associated with A.\nOn exit, if info > 0, it contains the unconverged off-diagonal elements of T\n(or properly speaking, a tridiagonal matrix equivalent to T). The diagonal elements\nof this matrix are in D; those that converged correspond to a subset of the\neigenvalues of A (not necessarily ordered).\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit. If info = i > 0, the algorithm did not converge.\ni elements of E did not converge to zero."]
     pub fn rocsolver_ssyev(
         handle: rocblas_handle,
         evect: rocblas_evect,
@@ -7994,7 +8013,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEEV computes the eigenvalues and optionally the eigenvectors of a Hermitian matrix A.\n\n\\details\nThe eigenvalues are returned in ascending order. The eigenvectors are computed depending\non the value of evect. The computed eigenvectors are orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the Hermitian matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A. On exit, the eigenvectors of A if they were computed and\nthe algorithm converged; otherwise the contents of A are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrix A.\n@param[out]\nD           pointer to real type. Array on the GPU of dimension n.\\n\nThe eigenvalues of A in increasing order.\n@param[out]\nE           pointer to real type. Array on the GPU of dimension n.\\n\nThis array is used to work internally with the tridiagonal matrix T associated with A.\nOn exit, if info > 0, it contains the unconverged off-diagonal elements of T\n(or properly speaking, a tridiagonal matrix equivalent to T). The diagonal elements\nof this matrix are in D; those that converged correspond to a subset of the\neigenvalues of A (not necessarily ordered).\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit. If info = i > 0, the algorithm did not converge.\ni elements of E did not converge to zero."]
+    #[doc = " @{\n\\brief HEEV computes the eigenvalues and optionally the eigenvectors of a Hermitian matrix A.\n\n\\details\nThe eigenvalues are returned in ascending order. The eigenvectors are computed depending\non the value of evect. The computed eigenvectors are orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the Hermitian matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of rows and columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A. On exit, the eigenvectors of A if they were computed and\nthe algorithm converged; otherwise the contents of A are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrix A.\n@param[out]\nD           pointer to real type. Array on the GPU of dimension n.\nThe eigenvalues of A in increasing order.\n@param[out]\nE           pointer to real type. Array on the GPU of dimension n.\nThis array is used to work internally with the tridiagonal matrix T associated with A.\nOn exit, if info > 0, it contains the unconverged off-diagonal elements of T\n(or properly speaking, a tridiagonal matrix equivalent to T). The diagonal elements\nof this matrix are in D; those that converged correspond to a subset of the\neigenvalues of A (not necessarily ordered).\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit. If info = i > 0, the algorithm did not converge.\ni elements of E did not converge to zero."]
     pub fn rocsolver_cheev(
         handle: rocblas_handle,
         evect: rocblas_evect,
@@ -8023,7 +8042,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYEV_BATCHED computes the eigenvalues and optionally the eigenvectors of a batch of\nreal symmetric matrices A_j.\n\n\\details\nThe eigenvalues are returned in ascending order. The eigenvectors are computed depending\non the value of evect. The computed eigenvectors are orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the symmetric matrices A_j is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_j\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrices A_j.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the matrices A_j. On exit, the eigenvectors of A_j if they were computed and\nthe algorithm converged; otherwise the contents of A_j are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\\n\nThe eigenvalues of A_j in increasing order.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to type. Array on the GPU (the size depends on the value of strideE).\\n\nThis array is used to work internally with the tridiagonal matrix T_j associated with A_j.\nOn exit, if info[j] > 0, E_j contains the unconverged off-diagonal elements of T_j\n(or properly speaking, a tridiagonal matrix equivalent to T_j). The diagonal elements\nof this matrix are in D_j; those that converged correspond to a subset of the\neigenvalues of A_j (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for matrix A_j. If info[j] = i > 0, the algorithm did not converge.\ni elements of E_j did not converge to zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYEV_BATCHED computes the eigenvalues and optionally the eigenvectors of a batch of\nreal symmetric matrices A_l.\n\n\\details\nThe eigenvalues are returned in ascending order. The eigenvectors are computed depending\non the value of evect. The computed eigenvectors are orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the symmetric matrices A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of rows and columns of matrices A_l.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the matrices A_l. On exit, the eigenvectors of A_l if they were computed and\nthe algorithm converged; otherwise the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\nThe eigenvalues of A_l in increasing order.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to type. Array on the GPU (the size depends on the value of strideE).\nThis array is used to work internally with the tridiagonal matrix T_l associated with A_l.\nOn exit, if info[l] > 0, E_l contains the unconverged off-diagonal elements of T_l\n(or properly speaking, a tridiagonal matrix equivalent to T_l). The diagonal elements\nof this matrix are in D_l; those that converged correspond to a subset of the\neigenvalues of A_l (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for matrix A_l. If info[l] = i > 0, the algorithm did not converge.\ni elements of E_l did not converge to zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssyev_batched(
         handle: rocblas_handle,
         evect: rocblas_evect,
@@ -8058,7 +8077,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEEV_BATCHED computes the eigenvalues and optionally the eigenvectors of a batch of\nHermitian matrices A_j.\n\n\\details\nThe eigenvalues are returned in ascending order. The eigenvectors are computed depending\non the value of evect. The computed eigenvectors are orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the Hermitian matrices A_j is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_j\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrices A_j.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the matrices A_j. On exit, the eigenvectors of A_j if they were computed and\nthe algorithm converged; otherwise the contents of A_j are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\\n\nThe eigenvalues of A_j in increasing order.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\\n\nThis array is used to work internally with the tridiagonal matrix T_j associated with A_j.\nOn exit, if info[j] > 0, E_j contains the unconverged off-diagonal elements of T_j\n(or properly speaking, a tridiagonal matrix equivalent to T_j). The diagonal elements\nof this matrix are in D_j; those that converged correspond to a subset of the\neigenvalues of A_j (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for matrix A_j. If info[j] = i > 0, the algorithm did not converge.\ni elements of E_j did not converge to zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief HEEV_BATCHED computes the eigenvalues and optionally the eigenvectors of a batch of\nHermitian matrices A_l.\n\n\\details\nThe eigenvalues are returned in ascending order. The eigenvectors are computed depending\non the value of evect. The computed eigenvectors are orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the Hermitian matrices A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of rows and columns of matrices A_l.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the matrices A_l. On exit, the eigenvectors of A_l if they were computed and\nthe algorithm converged; otherwise the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\nThe eigenvalues of A_l in increasing order.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\nThis array is used to work internally with the tridiagonal matrix T_l associated with A_l.\nOn exit, if info[l] > 0, E_l contains the unconverged off-diagonal elements of T_l\n(or properly speaking, a tridiagonal matrix equivalent to T_l). The diagonal elements\nof this matrix are in D_l; those that converged correspond to a subset of the\neigenvalues of A_l (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for matrix A_l. If info[l] = i > 0, the algorithm did not converge.\ni elements of E_l did not converge to zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_cheev_batched(
         handle: rocblas_handle,
         evect: rocblas_evect,
@@ -8093,7 +8112,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYEV_STRIDED_BATCHED computes the eigenvalues and optionally the eigenvectors of a batch of\nreal symmetric matrices A_j.\n\n\\details\nThe eigenvalues are returned in ascending order. The eigenvectors are computed depending\non the value of evect. The computed eigenvectors are orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the symmetric matrices A_j is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_j\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrices A_j.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_j. On exit, the eigenvectors of A_j if they were computed and\nthe algorithm converged; otherwise the contents of A_j are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\\n\nThe eigenvalues of A_j in increasing order.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to type. Array on the GPU (the size depends on the value of strideE).\\n\nThis array is used to work internally with the tridiagonal matrix T_j associated with A_j.\nOn exit, if info[j] > 0, E_j contains the unconverged off-diagonal elements of T_j\n(or properly speaking, a tridiagonal matrix equivalent to T_j). The diagonal elements\nof this matrix are in D_j; those that converged correspond to a subset of the\neigenvalues of A_j (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for matrix A_j. If info[j] = i > 0, the algorithm did not converge.\ni elements of E_j did not converge to zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYEV_STRIDED_BATCHED computes the eigenvalues and optionally the eigenvectors of a batch of\nreal symmetric matrices A_l.\n\n\\details\nThe eigenvalues are returned in ascending order. The eigenvectors are computed depending\non the value of evect. The computed eigenvectors are orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the symmetric matrices A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of rows and columns of matrices A_l.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the matrices A_l. On exit, the eigenvectors of A_l if they were computed and\nthe algorithm converged; otherwise the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\nThe eigenvalues of A_l in increasing order.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to type. Array on the GPU (the size depends on the value of strideE).\nThis array is used to work internally with the tridiagonal matrix T_l associated with A_l.\nOn exit, if info[l] > 0, E_l contains the unconverged off-diagonal elements of T_l\n(or properly speaking, a tridiagonal matrix equivalent to T_l). The diagonal elements\nof this matrix are in D_l; those that converged correspond to a subset of the\neigenvalues of A_l (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for matrix A_l. If info[l] = i > 0, the algorithm did not converge.\ni elements of E_l did not converge to zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssyev_strided_batched(
         handle: rocblas_handle,
         evect: rocblas_evect,
@@ -8130,7 +8149,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEEV_STRIDED_BATCHED computes the eigenvalues and optionally the eigenvectors of a batch of\nHermitian matrices A_j.\n\n\\details\nThe eigenvalues are returned in ascending order. The eigenvectors are computed depending\non the value of evect. The computed eigenvectors are orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the Hermitian matrices A_j is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_j\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrices A_j.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_j. On exit, the eigenvectors of A_j if they were computed and\nthe algorithm converged; otherwise the contents of A_j are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\\n\nThe eigenvalues of A_j in increasing order.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\\n\nThis array is used to work internally with the tridiagonal matrix T_j associated with A_j.\nOn exit, if info[j] > 0, E_j contains the unconverged off-diagonal elements of T_j\n(or properly speaking, a tridiagonal matrix equivalent to T_j). The diagonal elements\nof this matrix are in D_j; those that converged correspond to a subset of the\neigenvalues of A_j (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for matrix A_j. If info[j] = i > 0, the algorithm did not converge.\ni elements of E_j did not converge to zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief HEEV_STRIDED_BATCHED computes the eigenvalues and optionally the eigenvectors of a batch of\nHermitian matrices A_l.\n\n\\details\nThe eigenvalues are returned in ascending order. The eigenvectors are computed depending\non the value of evect. The computed eigenvectors are orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the Hermitian matrices A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of rows and columns of matrices A_l.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the matrices A_l. On exit, the eigenvectors of A_l if they were computed and\nthe algorithm converged; otherwise the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\nThe eigenvalues of A_l in increasing order.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\nThis array is used to work internally with the tridiagonal matrix T_l associated with A_l.\nOn exit, if info[l] > 0, E_l contains the unconverged off-diagonal elements of T_l\n(or properly speaking, a tridiagonal matrix equivalent to T_l). The diagonal elements\nof this matrix are in D_l; those that converged correspond to a subset of the\neigenvalues of A_l (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for matrix A_l. If info[l] = i > 0, the algorithm did not converge.\ni elements of E_l did not converge to zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_cheev_strided_batched(
         handle: rocblas_handle,
         evect: rocblas_evect,
@@ -8167,7 +8186,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYEVD computes the eigenvalues and optionally the eigenvectors of a real symmetric\nmatrix A.\n\n\\details\nThe eigenvalues are returned in ascending order. The eigenvectors are computed using a\ndivide-and-conquer algorithm, depending on the value of evect. The computed eigenvectors\nare orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the symmetric matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A. On exit, the eigenvectors of A if they were computed and\nthe algorithm converged; otherwise the contents of A are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrix A.\n@param[out]\nD           pointer to type. Array on the GPU of dimension n.\\n\nThe eigenvalues of A in increasing order.\n@param[out]\nE           pointer to type. Array on the GPU of dimension n.\\n\nThis array is used to work internally with the tridiagonal matrix T associated with A.\nOn exit, if info > 0, it contains the unconverged off-diagonal elements of T\n(or properly speaking, a tridiagonal matrix equivalent to T). The diagonal elements\nof this matrix are in D; those that converged correspond to a subset of the\neigenvalues of A (not necessarily ordered).\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0 and evect is rocblas_evect_none, the algorithm did not converge.\ni elements of E did not converge to zero.\nIf info = i > 0 and evect is rocblas_evect_original, the algorithm failed to\ncompute an eigenvalue in the submatrix from [i/(n+1), i/(n+1)] to [i%(n+1), i%(n+1)]."]
+    #[doc = " @{\n\\brief SYEVD computes the eigenvalues and optionally the eigenvectors of a real symmetric\nmatrix A.\n\n\\details\nThe eigenvalues are returned in ascending order. The eigenvectors are computed using a\ndivide-and-conquer algorithm, depending on the value of evect. The computed eigenvectors\nare orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the symmetric matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of rows and columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A. On exit, the eigenvectors of A if they were computed and\nthe algorithm converged; otherwise the contents of A are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrix A.\n@param[out]\nD           pointer to type. Array on the GPU of dimension n.\nThe eigenvalues of A in increasing order.\n@param[out]\nE           pointer to type. Array on the GPU of dimension n.\nThis array is used to work internally with the tridiagonal matrix T associated with A.\nOn exit, if info > 0, it contains the unconverged off-diagonal elements of T\n(or properly speaking, a tridiagonal matrix equivalent to T). The diagonal elements\nof this matrix are in D; those that converged correspond to a subset of the\neigenvalues of A (not necessarily ordered).\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0 and evect is rocblas_evect_none, the algorithm did not converge.\ni elements of E did not converge to zero.\nIf info = i > 0 and evect is rocblas_evect_original, the algorithm failed to\ncompute an eigenvalue in the submatrix from [i/(n+1), i/(n+1)] to [i%(n+1), i%(n+1)]."]
     pub fn rocsolver_ssyevd(
         handle: rocblas_handle,
         evect: rocblas_evect,
@@ -8196,7 +8215,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEEVD computes the eigenvalues and optionally the eigenvectors of a Hermitian matrix A.\n\n\\details\nThe eigenvalues are returned in ascending order. The eigenvectors are computed using a\ndivide-and-conquer algorithm, depending on the value of evect. The computed eigenvectors\nare orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the Hermitian matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A. On exit, the eigenvectors of A if they were computed and\nthe algorithm converged; otherwise the contents of A are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrix A.\n@param[out]\nD           pointer to real type. Array on the GPU of dimension n.\\n\nThe eigenvalues of A in increasing order.\n@param[out]\nE           pointer to real type. Array on the GPU of dimension n.\\n\nThis array is used to work internally with the tridiagonal matrix T associated with A.\nOn exit, if info > 0, it contains the unconverged off-diagonal elements of T\n(or properly speaking, a tridiagonal matrix equivalent to T). The diagonal elements\nof this matrix are in D; those that converged correspond to a subset of the\neigenvalues of A (not necessarily ordered).\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0 and evect is rocblas_evect_none, the algorithm did not converge.\ni elements of E did not converge to zero.\nIf info = i > 0 and evect is rocblas_evect_original, the algorithm failed to\ncompute an eigenvalue in the submatrix from [i/(n+1), i/(n+1)] to [i%(n+1), i%(n+1)]."]
+    #[doc = " @{\n\\brief HEEVD computes the eigenvalues and optionally the eigenvectors of a Hermitian matrix A.\n\n\\details\nThe eigenvalues are returned in ascending order. The eigenvectors are computed using a\ndivide-and-conquer algorithm, depending on the value of evect. The computed eigenvectors\nare orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the Hermitian matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of rows and columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A. On exit, the eigenvectors of A if they were computed and\nthe algorithm converged; otherwise the contents of A are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrix A.\n@param[out]\nD           pointer to real type. Array on the GPU of dimension n.\nThe eigenvalues of A in increasing order.\n@param[out]\nE           pointer to real type. Array on the GPU of dimension n.\nThis array is used to work internally with the tridiagonal matrix T associated with A.\nOn exit, if info > 0, it contains the unconverged off-diagonal elements of T\n(or properly speaking, a tridiagonal matrix equivalent to T). The diagonal elements\nof this matrix are in D; those that converged correspond to a subset of the\neigenvalues of A (not necessarily ordered).\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0 and evect is rocblas_evect_none, the algorithm did not converge.\ni elements of E did not converge to zero.\nIf info = i > 0 and evect is rocblas_evect_original, the algorithm failed to\ncompute an eigenvalue in the submatrix from [i/(n+1), i/(n+1)] to [i%(n+1), i%(n+1)]."]
     pub fn rocsolver_cheevd(
         handle: rocblas_handle,
         evect: rocblas_evect,
@@ -8225,7 +8244,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYEVD_BATCHED computes the eigenvalues and optionally the eigenvectors of a batch of\nreal symmetric matrices A_j.\n\n\\details\nThe eigenvalues are returned in ascending order. The eigenvectors are computed using a\ndivide-and-conquer algorithm, depending on the value of evect. The computed eigenvectors\nare orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the symmetric matrices A_j is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_j\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrices A_j.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the matrices A_j. On exit, the eigenvectors of A_j if they were computed and\nthe algorithm converged; otherwise the contents of A_j are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\\n\nThe eigenvalues of A_j in increasing order.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to type. Array on the GPU (the size depends on the value of strideE).\\n\nThis array is used to work internally with the tridiagonal matrix T_j associated with A_j.\nOn exit, if info[j] > 0, E_j contains the unconverged off-diagonal elements of T_j\n(or properly speaking, a tridiagonal matrix equivalent to T_j). The diagonal elements\nof this matrix are in D_j; those that converged correspond to a subset of the\neigenvalues of A_j (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for matrix A_j.\nIf info[j] = i > 0 and evect is rocblas_evect_none, the algorithm did not converge.\ni elements of E_j did not converge to zero.\nIf info[j] = i > 0 and evect is rocblas_evect_original, the algorithm failed to\ncompute an eigenvalue in the submatrix from [i/(n+1), i/(n+1)] to [i%(n+1), i%(n+1)].\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYEVD_BATCHED computes the eigenvalues and optionally the eigenvectors of a batch of\nreal symmetric matrices A_l.\n\n\\details\nThe eigenvalues are returned in ascending order. The eigenvectors are computed using a\ndivide-and-conquer algorithm, depending on the value of evect. The computed eigenvectors\nare orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the symmetric matrices A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of rows and columns of matrices A_l.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the matrices A_l. On exit, the eigenvectors of A_l if they were computed and\nthe algorithm converged; otherwise the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\nThe eigenvalues of A_l in increasing order.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to type. Array on the GPU (the size depends on the value of strideE).\nThis array is used to work internally with the tridiagonal matrix T_l associated with A_l.\nOn exit, if info[l] > 0, E_l contains the unconverged off-diagonal elements of T_l\n(or properly speaking, a tridiagonal matrix equivalent to T_l). The diagonal elements\nof this matrix are in D_l; those that converged correspond to a subset of the\neigenvalues of A_l (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for matrix A_l.\nIf info[l] = i > 0 and evect is rocblas_evect_none, the algorithm did not converge.\ni elements of E_l did not converge to zero.\nIf info[l] = i > 0 and evect is rocblas_evect_original, the algorithm failed to\ncompute an eigenvalue in the submatrix from [i/(n+1), i/(n+1)] to [i%(n+1), i%(n+1)].\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssyevd_batched(
         handle: rocblas_handle,
         evect: rocblas_evect,
@@ -8260,7 +8279,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEEVD_BATCHED computes the eigenvalues and optionally the eigenvectors of a batch of\nHermitian matrices A_j.\n\n\\details\nThe eigenvalues are returned in ascending order. The eigenvectors are computed using a\ndivide-and-conquer algorithm, depending on the value of evect. The computed eigenvectors\nare orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the Hermitian matrices A_j is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_j\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrices A_j.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the matrices A_j. On exit, the eigenvectors of A_j if they were computed and\nthe algorithm converged; otherwise the contents of A_j are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\\n\nThe eigenvalues of A_j in increasing order.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\\n\nThis array is used to work internally with the tridiagonal matrix T_j associated with A_j.\nOn exit, if info[j] > 0, E_j contains the unconverged off-diagonal elements of T_j\n(or properly speaking, a tridiagonal matrix equivalent to T_j). The diagonal elements\nof this matrix are in D_j; those that converged correspond to a subset of the\neigenvalues of A_j (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for matrix A_j.\nIf info[j] = i > 0 and evect is rocblas_evect_none, the algorithm did not converge.\ni elements of E_j did not converge to zero.\nIf info[j] = i > 0 and evect is rocblas_evect_original, the algorithm failed to\ncompute an eigenvalue in the submatrix from [i/(n+1), i/(n+1)] to [i%(n+1), i%(n+1)].\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief HEEVD_BATCHED computes the eigenvalues and optionally the eigenvectors of a batch of\nHermitian matrices A_l.\n\n\\details\nThe eigenvalues are returned in ascending order. The eigenvectors are computed using a\ndivide-and-conquer algorithm, depending on the value of evect. The computed eigenvectors\nare orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the Hermitian matrices A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of rows and columns of matrices A_l.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the matrices A_l. On exit, the eigenvectors of A_l if they were computed and\nthe algorithm converged; otherwise the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\nThe eigenvalues of A_l in increasing order.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\nThis array is used to work internally with the tridiagonal matrix T_l associated with A_l.\nOn exit, if info[l] > 0, E_l contains the unconverged off-diagonal elements of T_l\n(or properly speaking, a tridiagonal matrix equivalent to T_l). The diagonal elements\nof this matrix are in D_l; those that converged correspond to a subset of the\neigenvalues of A_l (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for matrix A_l.\nIf info[l] = i > 0 and evect is rocblas_evect_none, the algorithm did not converge.\ni elements of E_l did not converge to zero.\nIf info[l] = i > 0 and evect is rocblas_evect_original, the algorithm failed to\ncompute an eigenvalue in the submatrix from [i/(n+1), i/(n+1)] to [i%(n+1), i%(n+1)].\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_cheevd_batched(
         handle: rocblas_handle,
         evect: rocblas_evect,
@@ -8295,7 +8314,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYEVD_STRIDED_BATCHED computes the eigenvalues and optionally the eigenvectors of a batch of\nreal symmetric matrices A_j.\n\n\\details\nThe eigenvalues are returned in ascending order. The eigenvectors are computed using a\ndivide-and-conquer algorithm, depending on the value of evect. The computed eigenvectors\nare orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the symmetric matrices A_j is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_j\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrices A_j.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_j. On exit, the eigenvectors of A_j if they were computed and\nthe algorithm converged; otherwise the contents of A_j are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\\n\nThe eigenvalues of A_j in increasing order.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to type. Array on the GPU (the size depends on the value of strideE).\\n\nThis array is used to work internally with the tridiagonal matrix T_j associated with A_j.\nOn exit, if info[j] > 0, E_j contains the unconverged off-diagonal elements of T_j\n(or properly speaking, a tridiagonal matrix equivalent to T_j). The diagonal elements\nof this matrix are in D_j; those that converged correspond to a subset of the\neigenvalues of A_j (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for matrix A_j.\nIf info[j] = i > 0 and evect is rocblas_evect_none, the algorithm did not converge.\ni elements of E_j did not converge to zero.\nIf info[j] = i > 0 and evect is rocblas_evect_original, the algorithm failed to\ncompute an eigenvalue in the submatrix from [i/(n+1), i/(n+1)] to [i%(n+1), i%(n+1)].\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYEVD_STRIDED_BATCHED computes the eigenvalues and optionally the eigenvectors of a batch of\nreal symmetric matrices A_l.\n\n\\details\nThe eigenvalues are returned in ascending order. The eigenvectors are computed using a\ndivide-and-conquer algorithm, depending on the value of evect. The computed eigenvectors\nare orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the symmetric matrices A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of rows and columns of matrices A_l.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the matrices A_l. On exit, the eigenvectors of A_l if they were computed and\nthe algorithm converged; otherwise the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\nThe eigenvalues of A_l in increasing order.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to type. Array on the GPU (the size depends on the value of strideE).\nThis array is used to work internally with the tridiagonal matrix T_l associated with A_l.\nOn exit, if info[l] > 0, E_l contains the unconverged off-diagonal elements of T_l\n(or properly speaking, a tridiagonal matrix equivalent to T_l). The diagonal elements\nof this matrix are in D_l; those that converged correspond to a subset of the\neigenvalues of A_l (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for matrix A_l.\nIf info[l] = i > 0 and evect is rocblas_evect_none, the algorithm did not converge.\ni elements of E_l did not converge to zero.\nIf info[l] = i > 0 and evect is rocblas_evect_original, the algorithm failed to\ncompute an eigenvalue in the submatrix from [i/(n+1), i/(n+1)] to [i%(n+1), i%(n+1)].\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssyevd_strided_batched(
         handle: rocblas_handle,
         evect: rocblas_evect,
@@ -8332,7 +8351,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEEVD_STRIDED_BATCHED computes the eigenvalues and optionally the eigenvectors of a batch of\nHermitian matrices A_j.\n\n\\details\nThe eigenvalues are returned in ascending order. The eigenvectors are computed using a\ndivide-and-conquer algorithm, depending on the value of evect. The computed eigenvectors\nare orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the Hermitian matrices A_j is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_j\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrices A_j.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_j. On exit, the eigenvectors of A_j if they were computed and\nthe algorithm converged; otherwise the contents of A_j are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\\n\nThe eigenvalues of A_j in increasing order.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\\n\nThis array is used to work internally with the tridiagonal matrix T_j associated with A_j.\nOn exit, if info[j] > 0, E_j contains the unconverged off-diagonal elements of T_j\n(or properly speaking, a tridiagonal matrix equivalent to T_j). The diagonal elements\nof this matrix are in D_j; those that converged correspond to a subset of the\neigenvalues of A_j (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for matrix A_j.\nIf info[j] = i > 0 and evect is rocblas_evect_none, the algorithm did not converge.\ni elements of E_j did not converge to zero.\nIf info[j] = i > 0 and evect is rocblas_evect_original, the algorithm failed to\ncompute an eigenvalue in the submatrix from [i/(n+1), i/(n+1)] to [i%(n+1), i%(n+1)].\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief HEEVD_STRIDED_BATCHED computes the eigenvalues and optionally the eigenvectors of a batch of\nHermitian matrices A_l.\n\n\\details\nThe eigenvalues are returned in ascending order. The eigenvectors are computed using a\ndivide-and-conquer algorithm, depending on the value of evect. The computed eigenvectors\nare orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the Hermitian matrices A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of rows and columns of matrices A_l.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the matrices A_l. On exit, the eigenvectors of A_l if they were computed and\nthe algorithm converged; otherwise the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\nThe eigenvalues of A_l in increasing order.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\nThis array is used to work internally with the tridiagonal matrix T_l associated with A_l.\nOn exit, if info[l] > 0, E_l contains the unconverged off-diagonal elements of T_l\n(or properly speaking, a tridiagonal matrix equivalent to T_l). The diagonal elements\nof this matrix are in D_l; those that converged correspond to a subset of the\neigenvalues of A_l (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use case is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for matrix A_l.\nIf info[l] = i > 0 and evect is rocblas_evect_none, the algorithm did not converge.\ni elements of E_l did not converge to zero.\nIf info[l] = i > 0 and evect is rocblas_evect_original, the algorithm failed to\ncompute an eigenvalue in the submatrix from [i/(n+1), i/(n+1)] to [i%(n+1), i%(n+1)].\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_cheevd_strided_batched(
         handle: rocblas_handle,
         evect: rocblas_evect,
@@ -8369,7 +8388,411 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYEVJ computes the eigenvalues and optionally the eigenvectors of a real symmetric\nmatrix A.\n\n\\details\nThe eigenvalues are found using the iterative Jacobi algorithm and are returned in an order\ndepending on the value of esort.\nThe eigenvectors are computed depending on the value of evect. The computed eigenvectors are orthonormal.\n\nAt the \\f$k\\f$-th iteration (or \"sweep\"), \\f$A\\f$ is transformed by a product of Jacobi rotations \\f$V\\f$ as\n\n\\f[\nA^{(k)} = V' A^{(k-1)} V\n\\f]\n\nsuch that \\f$off(A^{(k)}) < off(A^{(k-1)})\\f$, where \\f$A^{(0)} = A\\f$ and \\f$off(A^{(k)})\\f$ is the\nFrobenius norm of the off-diagonal elements of \\f$A^{(k)}\\f$. As \\f$off(A^{(k)}) \\rightarrow 0\\f$, the\ndiagonal elements of \\f$A^{(k)}\\f$ increasingly resemble the eigenvalues of \\f$A\\f$.\n\n\\note\nIn order to carry out calculations, this method may synchronize the stream contained within the\nrocblas_handle.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nesort       #rocblas_esort.\\n\nSpecifies the order of the returned eigenvalues. If esort is\nrocblas_esort_ascending, then the eigenvalues are sorted and returned in ascending order.\nIf esort is rocblas_esort_none, then the order of the returned eigenvalues is unspecified.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the symmetric matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A. On exit, the eigenvectors of A if they were computed and\nthe algorithm converged; otherwise the contents of A are unchanged.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrix A.\n@param[in]\nabstol      type.\\n\nThe absolute tolerance. The algorithm is considered to have converged once off(A)\nis <= norm(A) * abstol. If abstol <= 0, then the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to type on the GPU.\\n\nThe Frobenius norm of the off-diagonal elements of A (i.e. off(A)) at the final iteration.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\\n\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to a rocblas_int on the GPU.\\n\nThe actual number of sweeps (iterations) used by the algorithm.\n@param[out]\nW           pointer to type. Array on the GPU of dimension n.\\n\nThe eigenvalues of A in increasing order.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit. If info = 1, the algorithm did not converge."]
+    #[doc = " @{\n\\brief SYEVDJ computes the eigenvalues and optionally the eigenvectors of a real symmetric\nmatrix A.\n\n\\details\nThe eigenvalues are found using the iterative Jacobi algorithm and are returned in ascending order.\nThe eigenvectors are computed using a divide-and-conquer approach depending on the value of evect.\nThe computed eigenvectors are orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the symmetric matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A. On exit, the eigenvectors of A if they were computed and\nthe algorithm converged; otherwise the contents of A are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrix A.\n@param[out]\nD           pointer to type. Array on the GPU of dimension n.\\n\nThe eigenvalues of A in increasing order.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit. If info = 1, the algorithm did not converge."]
+    pub fn rocsolver_ssyevdj(
+        handle: rocblas_handle,
+        evect: rocblas_evect,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        A: *mut f32,
+        lda: rocblas_int,
+        D: *mut f32,
+        info: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsolver_dsyevdj(
+        handle: rocblas_handle,
+        evect: rocblas_evect,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        A: *mut f64,
+        lda: rocblas_int,
+        D: *mut f64,
+        info: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief HEEVDJ computes the eigenvalues and optionally the eigenvectors of a complex Hermitian\nmatrix A.\n\n\\details\nThe eigenvalues are found using the iterative Jacobi algorithm and are returned in ascending order.\nThe eigenvectors are computed using a divide-and-conquer approach depending on the value of evect.\nThe computed eigenvectors are orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the Hermitian matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A. On exit, the eigenvectors of A if they were computed and\nthe algorithm converged; otherwise the contents of A are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrix A.\n@param[out]\nD           pointer to real type. Array on the GPU of dimension n.\\n\nThe eigenvalues of A in increasing order.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit. If info = 1, the algorithm did not converge."]
+    pub fn rocsolver_cheevdj(
+        handle: rocblas_handle,
+        evect: rocblas_evect,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+        D: *mut f32,
+        info: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsolver_zheevdj(
+        handle: rocblas_handle,
+        evect: rocblas_evect,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+        D: *mut f64,
+        info: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief SYEVDJ_BATCHED computes the eigenvalues and optionally the eigenvectors of a\nbatch of real symmetric matrices A_l.\n\n\\details\nThe eigenvalues are found using the iterative Jacobi algorithm and are returned in ascending order.\nThe eigenvectors are computed using a divide-and-conquer approach depending on the value of evect.\nThe computed eigenvectors are orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the symmetric matrices A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrices A_l.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the matrices A_l. On exit, the eigenvectors of A_l if they were computed and\nthe algorithm converged; otherwise the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_l.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\\n\nThe eigenvalues of A_l in increasing order.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[l] = 0, successful exit for A_l. If info[l] = 1, the algorithm did not converge for A_l.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    pub fn rocsolver_ssyevdj_batched(
+        handle: rocblas_handle,
+        evect: rocblas_evect,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        A: *const *mut f32,
+        lda: rocblas_int,
+        D: *mut f32,
+        strideD: rocblas_stride,
+        info: *mut rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsolver_dsyevdj_batched(
+        handle: rocblas_handle,
+        evect: rocblas_evect,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        A: *const *mut f64,
+        lda: rocblas_int,
+        D: *mut f64,
+        strideD: rocblas_stride,
+        info: *mut rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief HEEVDJ_BATCHED computes the eigenvalues and optionally the eigenvectors of a\nbatch of complex Hermitian matrices A_l.\n\n\\details\nThe eigenvalues are found using the iterative Jacobi algorithm and are returned in ascending order.\nThe eigenvectors are computed using a divide-and-conquer approach depending on the value of evect.\nThe computed eigenvectors are orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the Hermitian matrices A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrices A_l.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the matrices A_l. On exit, the eigenvectors of A_l if they were computed and\nthe algorithm converged; otherwise the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_l.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\\n\nThe eigenvalues of A_l in increasing order.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[l] = 0, successful exit for A_l. If info[l] = 1, the algorithm did not converge for A_l.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    pub fn rocsolver_cheevdj_batched(
+        handle: rocblas_handle,
+        evect: rocblas_evect,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        A: *const *mut rocblas_float_complex,
+        lda: rocblas_int,
+        D: *mut f32,
+        strideD: rocblas_stride,
+        info: *mut rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsolver_zheevdj_batched(
+        handle: rocblas_handle,
+        evect: rocblas_evect,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        A: *const *mut rocblas_double_complex,
+        lda: rocblas_int,
+        D: *mut f64,
+        strideD: rocblas_stride,
+        info: *mut rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief SYEVDJ_STRIDED_BATCHED computes the eigenvalues and optionally the eigenvectors of a\nbatch of real symmetric matrices A_l.\n\n\\details\nThe eigenvalues are found using the iterative Jacobi algorithm and are returned in ascending order.\nThe eigenvectors are computed using a divide-and-conquer approach depending on the value of evect.\nThe computed eigenvectors are orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the symmetric matrices A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrices A_l.\n@param[inout]\nA           Pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_l. On exit, the eigenvectors of A_l if they were computed and\nthe algorithm converged; otherwise the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\\n\nThe eigenvalues of A_l in increasing order.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[l] = 0, successful exit for A_l. If info[l] = 1, the algorithm did not converge for A_l.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    pub fn rocsolver_ssyevdj_strided_batched(
+        handle: rocblas_handle,
+        evect: rocblas_evect,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        A: *mut f32,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        D: *mut f32,
+        strideD: rocblas_stride,
+        info: *mut rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsolver_dsyevdj_strided_batched(
+        handle: rocblas_handle,
+        evect: rocblas_evect,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        A: *mut f64,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        D: *mut f64,
+        strideD: rocblas_stride,
+        info: *mut rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief HEEVDJ_STRIDED_BATCHED computes the eigenvalues and optionally the eigenvectors of a\nbatch of complex Hermitian matrices A_l.\n\n\\details\nThe eigenvalues are found using the iterative Jacobi algorithm and are returned in ascending order.\nThe eigenvectors are computed using a divide-and-conquer approach depending on the value of evect.\nThe computed eigenvectors are orthonormal.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the Hermitian matrices A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrices A_l.\n@param[inout]\nA           Pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_l. On exit, the eigenvectors of A_l if they were computed and\nthe algorithm converged; otherwise the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\\n\nThe eigenvalues of A_l in increasing order.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use case is strideD >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[l] = 0, successful exit for A_l. If info[l] = 1, the algorithm did not converge for A_l.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    pub fn rocsolver_cheevdj_strided_batched(
+        handle: rocblas_handle,
+        evect: rocblas_evect,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        D: *mut f32,
+        strideD: rocblas_stride,
+        info: *mut rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsolver_zheevdj_strided_batched(
+        handle: rocblas_handle,
+        evect: rocblas_evect,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        D: *mut f64,
+        strideD: rocblas_stride,
+        info: *mut rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief SYGVDJ computes the eigenvalues and (optionally) eigenvectors of\na real generalized symmetric-definite eigenproblem.\n\n\\details\nThe problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = \\lambda B X & \\: \\text{1st form,}\\\\\nA B X = \\lambda X & \\: \\text{2nd form, or}\\\\\nB A X = \\lambda X & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvalues are found using the iterative Jacobi algorithm,\nand are returned in ascending order. The eigenvectors are computed using a divide-and-conquer algorithm,\ndepending on the value of evect.\n\nWhen computed, the matrix Z of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ^T B Z=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ^T B^{-1} Z=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblem.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices A and B are stored.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A and B\nare not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A. On exit, the normalized matrix Z of eigenvectors if they were computed\nand the algorithm converged; otherwise the contents of A are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrix A.\n@param[inout]\nB           pointer to type. Array on the GPU of dimension ldb*n.\\n\nOn entry, the symmetric positive definite matrix B. On exit,\nthe triangular factor of B as returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of matrix B.\n@param[out]\nD           pointer to type. Array on the GPU of dimension n.\\n\nThe eigenvalues in increasing order.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit. If info = 1, the algorithm did not converge.\nIf info = n + i, the leading minor of order i of B is not positive definite."]
+    pub fn rocsolver_ssygvdj(
+        handle: rocblas_handle,
+        itype: rocblas_eform,
+        evect: rocblas_evect,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        A: *mut f32,
+        lda: rocblas_int,
+        B: *mut f32,
+        ldb: rocblas_int,
+        D: *mut f32,
+        info: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsolver_dsygvdj(
+        handle: rocblas_handle,
+        itype: rocblas_eform,
+        evect: rocblas_evect,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        A: *mut f64,
+        lda: rocblas_int,
+        B: *mut f64,
+        ldb: rocblas_int,
+        D: *mut f64,
+        info: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief HEGVDJ computes the eigenvalues and (optionally) eigenvectors of\na complex generalized Hermitian-definite eigenproblem.\n\n\\details\nThe problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = \\lambda B X & \\: \\text{1st form,}\\\\\nA B X = \\lambda X & \\: \\text{2nd form, or}\\\\\nB A X = \\lambda X & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvalues are found using the iterative Jacobi algorithm,\nand are returned in ascending order. The eigenvectors are computed using a divide-and-conquer algorithm,\ndepending on the value of evect.\n\nWhen computed, the matrix Z of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ^H B Z=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ^H B^{-1} Z=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblem.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices A and B are stored.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A and B\nare not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A. On exit, the normalized matrix Z of eigenvectors if they were computed\nand the algorithm converged; otherwise the contents of A are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrix A.\n@param[inout]\nB           pointer to type. Array on the GPU of dimension ldb*n.\\n\nOn entry, the Hermitian positive definite matrix B. On exit,\nthe triangular factor of B as returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of matrix B.\n@param[out]\nD           pointer to real type. Array on the GPU of dimension n.\\n\nThe eigenvalues in increasing order.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit. If info = 1, the algorithm did not converge.\nIf info = n + i, the leading minor of order i of B is not positive definite."]
+    pub fn rocsolver_chegvdj(
+        handle: rocblas_handle,
+        itype: rocblas_eform,
+        evect: rocblas_evect,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+        B: *mut rocblas_float_complex,
+        ldb: rocblas_int,
+        D: *mut f32,
+        info: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsolver_zhegvdj(
+        handle: rocblas_handle,
+        itype: rocblas_eform,
+        evect: rocblas_evect,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+        B: *mut rocblas_double_complex,
+        ldb: rocblas_int,
+        D: *mut f64,
+        info: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief SYGVDJ_BATCHED computes the eigenvalues and (optionally) eigenvectors of\nbatch of real generalized symmetric-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvalues are found using the iterative Jacobi algorithm,\nand are returned in ascending order. The eigenvectors are computed using a divide-and-conquer algorithm,\ndepending on the value of evect.\n\nWhen computed, the matrix Z_l of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ^T_l B_l Z_l=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ^T_l B^{-1}_l Z_l=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices A_l and B_l are stored.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A_l and B_l\nare not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrix A_l.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the matrices A_l. On exit, the normalized matrices Z_l of eigenvectors if they were computed\nand the algorithm converged; otherwise the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_l.\n@param[inout]\nB           array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*n.\\n\nOn entry, the symmetric positive definite matrices B_l. On exit,\nthe triangular factor of B_l as returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of matrices B_l.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\\n\nThe eigenvalues in increasing order.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use is strideD >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[l] = 0, successful exit. If info[l] = 1, the algorithm did not converge for matrix A_l.\nIf info[l] = n + i, the leading minor of order i of B_l is not positive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of eigenproblems in the batch."]
+    pub fn rocsolver_ssygvdj_batched(
+        handle: rocblas_handle,
+        itype: rocblas_eform,
+        evect: rocblas_evect,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        A: *const *mut f32,
+        lda: rocblas_int,
+        B: *const *mut f32,
+        ldb: rocblas_int,
+        D: *mut f32,
+        strideD: rocblas_stride,
+        info: *mut rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsolver_dsygvdj_batched(
+        handle: rocblas_handle,
+        itype: rocblas_eform,
+        evect: rocblas_evect,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        A: *const *mut f64,
+        lda: rocblas_int,
+        B: *const *mut f64,
+        ldb: rocblas_int,
+        D: *mut f64,
+        strideD: rocblas_stride,
+        info: *mut rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief HEGVDJ_BATCHED computes the eigenvalues and (optionally) eigenvectors of\nbatch of complex generalized Hermitian-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvalues are found using the iterative Jacobi algorithm,\nand are returned in ascending order. The eigenvectors are computed using a divide-and-conquer algorithm,\ndepending on the value of evect.\n\nWhen computed, the matrix Z_l of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ^H_l B_l Z_l=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ^H_l B^{-1}_l Z_l=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices A_l and B_l are stored.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A_l and B_l\nare not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrix A_l.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the matrices A_l. On exit, the normalized matrices Z_l of eigenvectors if they were computed\nand the algorithm converged; otherwise the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_l.\n@param[inout]\nB           array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*n.\\n\nOn entry, the Hermitian positive definite matrices B_l. On exit,\nthe triangular factor of B_l as returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of matrices B_l.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\\n\nThe eigenvalues in increasing order.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use is strideD >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[l] = 0, successful exit. If info[l] = 1, the algorithm did not converge for matrix A_l.\nIf info[l] = n + i, the leading minor of order i of B_l is not positive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of eigenproblems in the batch."]
+    pub fn rocsolver_chegvdj_batched(
+        handle: rocblas_handle,
+        itype: rocblas_eform,
+        evect: rocblas_evect,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        A: *const *mut rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const *mut rocblas_float_complex,
+        ldb: rocblas_int,
+        D: *mut f32,
+        strideD: rocblas_stride,
+        info: *mut rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsolver_zhegvdj_batched(
+        handle: rocblas_handle,
+        itype: rocblas_eform,
+        evect: rocblas_evect,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        A: *const *mut rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const *mut rocblas_double_complex,
+        ldb: rocblas_int,
+        D: *mut f64,
+        strideD: rocblas_stride,
+        info: *mut rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief SYGVDJ_STRIDED_BATCHED computes the eigenvalues and (optionally) eigenvectors of\nbatch of real generalized symmetric-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvalues are found using the iterative Jacobi algorithm,\nand are returned in ascending order. The eigenvectors are computed using a divide-and-conquer algorithm,\ndepending on the value of evect.\n\nWhen computed, the matrix Z_l of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ^T_l B_l Z_l=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ^T_l B^{-1}_l Z_l=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices A_l and B_l are stored.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A_l and B_l\nare not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrix A_l.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_l. On exit, the normalized matrices Z_l of eigenvectors if they were computed\nand the algorithm converged; otherwise the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use is strideA >= lda*n.\n@param[inout]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\\n\nOn entry, the symmetric positive definite matrices B_l. On exit,\nthe triangular factor of B_l as returned by \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of matrices B_l.\n@param[in]\nstrideB     rocblas_stride.\\n\nStride from the start of one matrix B_l to the next one B_(l+1).\nThere is no restriction for the value of strideB. Normal use is strideB >= ldb*n.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\\n\nThe eigenvalues in increasing order.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use is strideD >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[l] = 0, successful exit. If info[l] = 1, the algorithm did not converge for matrix A_l.\nIf info[l] = n + i, the leading minor of order i of B_l is not positive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of eigenproblems in the batch."]
+    pub fn rocsolver_ssygvdj_strided_batched(
+        handle: rocblas_handle,
+        itype: rocblas_eform,
+        evect: rocblas_evect,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        A: *mut f32,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        B: *mut f32,
+        ldb: rocblas_int,
+        strideB: rocblas_stride,
+        D: *mut f32,
+        strideD: rocblas_stride,
+        info: *mut rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsolver_dsygvdj_strided_batched(
+        handle: rocblas_handle,
+        itype: rocblas_eform,
+        evect: rocblas_evect,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        A: *mut f64,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        B: *mut f64,
+        ldb: rocblas_int,
+        strideB: rocblas_stride,
+        D: *mut f64,
+        strideD: rocblas_stride,
+        info: *mut rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief HEGVDJ_STRIDED_BATCHED computes the eigenvalues and (optionally) eigenvectors of\nbatch of complex generalized Hermitian-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvalues are found using the iterative Jacobi algorithm,\nand are returned in ascending order. The eigenvectors are computed using a divide-and-conquer algorithm,\ndepending on the value of evect.\n\nWhen computed, the matrix Z_l of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ^H_l B_l Z_l=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ^H_l B^{-1}_l Z_l=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices A_l and B_l are stored.\nIf uplo indicates lower (or upper), then the upper (or lower) parts of A_l and B_l\nare not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrix A_l.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_l. On exit, the normalized matrices Z_l of eigenvectors if they were computed\nand the algorithm converged; otherwise the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use is strideA >= lda*n.\n@param[inout]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\\n\nOn entry, the Hermitian positive definite matrices B_l. On exit,\nthe triangular factor of B_l as returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of matrices B_l.\n@param[in]\nstrideB     rocblas_stride.\\n\nStride from the start of one matrix B_l to the next one B_(l+1).\nThere is no restriction for the value of strideB. Normal use is strideB >= ldb*n.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\\n\nThe eigenvalues in increasing order.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use is strideD >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[l] = 0, successful exit. If info[l] = 1, the algorithm did not converge for matrix A_l.\nIf info[l] = n + i, the leading minor of order i of B_l is not positive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of eigenproblems in the batch."]
+    pub fn rocsolver_chegvdj_strided_batched(
+        handle: rocblas_handle,
+        itype: rocblas_eform,
+        evect: rocblas_evect,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        B: *mut rocblas_float_complex,
+        ldb: rocblas_int,
+        strideB: rocblas_stride,
+        D: *mut f32,
+        strideD: rocblas_stride,
+        info: *mut rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsolver_zhegvdj_strided_batched(
+        handle: rocblas_handle,
+        itype: rocblas_eform,
+        evect: rocblas_evect,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        B: *mut rocblas_double_complex,
+        ldb: rocblas_int,
+        strideB: rocblas_stride,
+        D: *mut f64,
+        strideD: rocblas_stride,
+        info: *mut rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief SYEVJ computes the eigenvalues and optionally the eigenvectors of a real symmetric\nmatrix A.\n\n\\details\nThe eigenvalues are found using the iterative Jacobi algorithm and are returned in an order\ndepending on the value of esort.\nThe eigenvectors are computed depending on the value of evect. The computed eigenvectors are orthonormal.\n\nAt the \\f$k\\f$-th iteration (or \"sweep\"), \\f$A\\f$ is transformed by a product of Jacobi rotations \\f$V\\f$ as\n\n\\f[\nA^{(k)} = V' A^{(k-1)} V\n\\f]\n\nsuch that \\f$off(A^{(k)}) < off(A^{(k-1)})\\f$, where \\f$A^{(0)} = A\\f$ and \\f$off(A^{(k)})\\f$ is the\nFrobenius norm of the off-diagonal elements of \\f$A^{(k)}\\f$. As \\f$off(A^{(k)}) \\rightarrow 0\\f$, the\ndiagonal elements of \\f$A^{(k)}\\f$ increasingly resemble the eigenvalues of \\f$A\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nesort       #rocblas_esort.\nSpecifies the order of the returned eigenvalues. If esort is\nrocblas_esort_ascending, then the eigenvalues are sorted and returned in ascending order.\nIf esort is rocblas_esort_none, then the order of the returned eigenvalues is unspecified.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the symmetric matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of rows and columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A. On exit, the eigenvectors of A if they were computed and\nthe algorithm converged; otherwise the contents of A are unchanged.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrix A.\n@param[in]\nabstol      type.\nThe absolute tolerance. The algorithm is considered to have converged once off(A)\nis <= abstol. If abstol <= 0, then the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to type on the GPU.\nThe Frobenius norm of the off-diagonal elements of A (i.e. off(A)) at the final iteration.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to a rocblas_int on the GPU.\nThe actual number of sweeps (iterations) used by the algorithm.\n@param[out]\nW           pointer to type. Array on the GPU of dimension n.\nThe eigenvalues of A in increasing order.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit. If info = 1, the algorithm did not converge."]
     pub fn rocsolver_ssyevj(
         handle: rocblas_handle,
         esort: rocblas_esort,
@@ -8406,7 +8829,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEEVJ computes the eigenvalues and optionally the eigenvectors of a complex Hermitian\nmatrix A.\n\n\\details\nThe eigenvalues are found using the iterative Jacobi algorithm and are returned in an order\ndepending on the value of esort.\nThe eigenvectors are computed depending on the value of evect. The computed eigenvectors are orthonormal.\n\nAt the \\f$k\\f$-th iteration (or \"sweep\"), \\f$A\\f$ is transformed by a product of Jacobi rotations \\f$V\\f$ as\n\n\\f[\nA^{(k)} = V' A^{(k-1)} V\n\\f]\n\nsuch that \\f$off(A^{(k)}) < off(A^{(k-1)})\\f$, where \\f$A^{(0)} = A\\f$ and \\f$off(A^{(k)})\\f$ is the\nFrobenius norm of the off-diagonal elements of \\f$A^{(k)}\\f$. As \\f$off(A^{(k)}) \\rightarrow 0\\f$, the\ndiagonal elements of \\f$A^{(k)}\\f$ increasingly resemble the eigenvalues of \\f$A\\f$.\n\n\\note\nIn order to carry out calculations, this method may synchronize the stream contained within the\nrocblas_handle.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nesort       #rocblas_esort.\\n\nSpecifies the order of the returned eigenvalues. If esort is\nrocblas_esort_ascending, then the eigenvalues are sorted and returned in ascending order.\nIf esort is rocblas_esort_none, then the order of the returned eigenvalues is unspecified.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the Hermitian matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A. On exit, the eigenvectors of A if they were computed and\nthe algorithm converged; otherwise the contents of A are unchanged.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrix A.\n@param[in]\nabstol      real type.\\n\nThe absolute tolerance. The algorithm is considered to have converged once off(A)\nis <= norm(A) * abstol. If abstol <= 0, then the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to real type on the GPU.\\n\nThe Frobenius norm of the off-diagonal elements of A (i.e. off(A)) at the final iteration.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\\n\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to a rocblas_int on the GPU.\\n\nThe actual number of sweeps (iterations) used by the algorithm.\n@param[out]\nW           pointer to real type. Array on the GPU of dimension n.\\n\nThe eigenvalues of A in increasing order.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit. If info = 1, the algorithm did not converge."]
+    #[doc = " @{\n\\brief HEEVJ computes the eigenvalues and optionally the eigenvectors of a complex Hermitian\nmatrix A.\n\n\\details\nThe eigenvalues are found using the iterative Jacobi algorithm and are returned in an order\ndepending on the value of esort.\nThe eigenvectors are computed depending on the value of evect. The computed eigenvectors are orthonormal.\n\nAt the \\f$k\\f$-th iteration (or \"sweep\"), \\f$A\\f$ is transformed by a product of Jacobi rotations \\f$V\\f$ as\n\n\\f[\nA^{(k)} = V' A^{(k-1)} V\n\\f]\n\nsuch that \\f$off(A^{(k)}) < off(A^{(k-1)})\\f$, where \\f$A^{(0)} = A\\f$ and \\f$off(A^{(k)})\\f$ is the\nFrobenius norm of the off-diagonal elements of \\f$A^{(k)}\\f$. As \\f$off(A^{(k)}) \\rightarrow 0\\f$, the\ndiagonal elements of \\f$A^{(k)}\\f$ increasingly resemble the eigenvalues of \\f$A\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nesort       #rocblas_esort.\nSpecifies the order of the returned eigenvalues. If esort is\nrocblas_esort_ascending, then the eigenvalues are sorted and returned in ascending order.\nIf esort is rocblas_esort_none, then the order of the returned eigenvalues is unspecified.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the Hermitian matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of rows and columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A. On exit, the eigenvectors of A if they were computed and\nthe algorithm converged; otherwise the contents of A are unchanged.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrix A.\n@param[in]\nabstol      real type.\nThe absolute tolerance. The algorithm is considered to have converged once off(A)\nis <= abstol. If abstol <= 0, then the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to real type on the GPU.\nThe Frobenius norm of the off-diagonal elements of A (i.e. off(A)) at the final iteration.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to a rocblas_int on the GPU.\nThe actual number of sweeps (iterations) used by the algorithm.\n@param[out]\nW           pointer to real type. Array on the GPU of dimension n.\nThe eigenvalues of A in increasing order.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit. If info = 1, the algorithm did not converge."]
     pub fn rocsolver_cheevj(
         handle: rocblas_handle,
         esort: rocblas_esort,
@@ -8443,7 +8866,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYEVJ_BATCHED computes the eigenvalues and optionally the eigenvectors of a batch of\nreal symmetric matrices A_j.\n\n\\details\nThe eigenvalues are found using the iterative Jacobi algorithm and are returned in an order\ndepending on the value of esort.\nThe eigenvectors are computed depending on the value of evect. The computed eigenvectors are orthonormal.\n\nAt the \\f$k\\f$-th iteration (or \"sweep\"), \\f$A_j\\f$ is transformed by a product of Jacobi rotations \\f$V_j\\f$ as\n\n\\f[\nA_j^{(k)} = V_j' A_j^{(k-1)} V_j\n\\f]\n\nsuch that \\f$off(A_j^{(k)}) < off(A_j^{(k-1)})\\f$, where \\f$A_j^{(0)} = A_j\\f$ and \\f$off(A_j^{(k)})\\f$ is the\nFrobenius norm of the off-diagonal elements of \\f$A_j^{(k)}\\f$. As \\f$off(A_j^{(k)}) \\rightarrow 0\\f$, the\ndiagonal elements of \\f$A_j^{(k)}\\f$ increasingly resemble the eigenvalues of \\f$A_j\\f$.\n\n\\note\nIn order to carry out calculations, this method may synchronize the stream contained within the\nrocblas_handle.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nesort       #rocblas_esort.\\n\nSpecifies the order of the returned eigenvalues. If esort is\nrocblas_esort_ascending, then the eigenvalues are sorted and returned in ascending order.\nIf esort is rocblas_esort_none, then the order of the returned eigenvalues is unspecified.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the symmetric matrices A_j is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_j\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrices A_j.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the matrices A_j. On exit, the eigenvectors of A_j if they were computed and\nthe algorithm converged; otherwise the contents of A_j are unchanged.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nabstol      type.\\n\nThe absolute tolerance. The algorithm is considered to have converged once off(A_j)\nis <= norm(A_j) * abstol. If abstol <= 0, then the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to type. Array of batch_count scalars on the GPU.\\n\nThe Frobenius norm of the off-diagonal elements of A_j (i.e. off(A_j)) at the final iteration.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\\n\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nThe actual number of sweeps (iterations) used by the algorithm for each batch instance.\n@param[out]\nW           pointer to type. Array on the GPU (the size depends on the value of strideW).\\n\nThe eigenvalues of A_j in increasing order.\n@param[in]\nstrideW     rocblas_stride.\\n\nStride from the start of one vector W_j to the next one W_(j+1).\nThere is no restriction for the value of strideW. Normal use case is strideW >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for matrix A_j. If info[j] = 1, the algorithm did not converge.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYEVJ_BATCHED computes the eigenvalues and optionally the eigenvectors of a batch of\nreal symmetric matrices A_l.\n\n\\details\nThe eigenvalues are found using the iterative Jacobi algorithm and are returned in an order\ndepending on the value of esort.\nThe eigenvectors are computed depending on the value of evect. The computed eigenvectors are orthonormal.\n\nAt the \\f$k\\f$-th iteration (or \"sweep\"), \\f$A_l\\f$ is transformed by a product of Jacobi rotations \\f$V_l\\f$ as\n\n\\f[\nA_l^{(k)} = V_l' A_l^{(k-1)} V_l^{}\n\\f]\n\nsuch that \\f$off(A_l^{(k)}) < off(A_l^{(k-1)})\\f$, where \\f$A_l^{(0)} = A_l\\f$ and \\f$off(A_l^{(k)})\\f$ is the\nFrobenius norm of the off-diagonal elements of \\f$A_l^{(k)}\\f$. As \\f$off(A_l^{(k)}) \\rightarrow 0\\f$, the\ndiagonal elements of \\f$A_l^{(k)}\\f$ increasingly resemble the eigenvalues of \\f$A_l\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nesort       #rocblas_esort.\nSpecifies the order of the returned eigenvalues. If esort is\nrocblas_esort_ascending, then the eigenvalues are sorted and returned in ascending order.\nIf esort is rocblas_esort_none, then the order of the returned eigenvalues is unspecified.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the symmetric matrices A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of rows and columns of matrices A_l.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the matrices A_l. On exit, the eigenvectors of A_l if they were computed and\nthe algorithm converged; otherwise the contents of A_l are unchanged.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nabstol      type.\nThe absolute tolerance. The algorithm is considered to have converged once off(A_l)\nis <= abstol. If abstol <= 0, then the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to type. Array of batch_count scalars on the GPU.\nThe Frobenius norm of the off-diagonal elements of A_l (i.e. off(A_l)) at the final iteration.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to rocblas_int. Array of batch_count integers on the GPU.\nThe actual number of sweeps (iterations) used by the algorithm for each batch instance.\n@param[out]\nW           pointer to type. Array on the GPU (the size depends on the value of strideW).\nThe eigenvalues of A_l in increasing order.\n@param[in]\nstrideW     rocblas_stride.\nStride from the start of one vector W_l to the next one W_(l+1).\nThere is no restriction for the value of strideW. Normal use case is strideW >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for matrix A_l. If info[l] = 1, the algorithm did not converge.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssyevj_batched(
         handle: rocblas_handle,
         esort: rocblas_esort,
@@ -8484,7 +8907,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEEVJ_BATCHED computes the eigenvalues and optionally the eigenvectors of a batch of\ncomplex Hermitian matrices A_j.\n\n\\details\nThe eigenvalues are found using the iterative Jacobi algorithm and are returned in an order\ndepending on the value of esort.\nThe eigenvectors are computed depending on the value of evect. The computed eigenvectors are orthonormal.\n\nAt the \\f$k\\f$-th iteration (or \"sweep\"), \\f$A_j\\f$ is transformed by a product of Jacobi rotations \\f$V_j\\f$ as\n\n\\f[\nA_j^{(k)} = V_j' A_j^{(k-1)} V_j\n\\f]\n\nsuch that \\f$off(A_j^{(k)}) < off(A_j^{(k-1)})\\f$, where \\f$A_j^{(0)} = A_j\\f$ and \\f$off(A_j^{(k)})\\f$ is the\nFrobenius norm of the off-diagonal elements of \\f$A_j^{(k)}\\f$. As \\f$off(A_j^{(k)}) \\rightarrow 0\\f$, the\ndiagonal elements of \\f$A_j^{(k)}\\f$ increasingly resemble the eigenvalues of \\f$A_j\\f$.\n\n\\note\nIn order to carry out calculations, this method may synchronize the stream contained within the\nrocblas_handle.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nesort       #rocblas_esort.\\n\nSpecifies the order of the returned eigenvalues. If esort is\nrocblas_esort_ascending, then the eigenvalues are sorted and returned in ascending order.\nIf esort is rocblas_esort_none, then the order of the returned eigenvalues is unspecified.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the Hermitian matrices A_j is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_j\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrices A_j.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the matrices A_j. On exit, the eigenvectors of A_j if they were computed and\nthe algorithm converged; otherwise the contents of A_j are unchanged.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nabstol      real type.\\n\nThe absolute tolerance. The algorithm is considered to have converged once off(A_j)\nis <= norm(A_j) * abstol. If abstol <= 0, then the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to real type. Array of batch_count scalars on the GPU.\\n\nThe Frobenius norm of the off-diagonal elements of A_j (i.e. off(A_j)) at the final iteration.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\\n\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nThe actual number of sweeps (iterations) used by the algorithm for each batch instance.\n@param[out]\nW           pointer to real type. Array on the GPU (the size depends on the value of strideW).\\n\nThe eigenvalues of A_j in increasing order.\n@param[in]\nstrideW     rocblas_stride.\\n\nStride from the start of one vector W_j to the next one W_(j+1).\nThere is no restriction for the value of strideW. Normal use case is strideW >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for matrix A_j. If info[j] = 1, the algorithm did not converge.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief HEEVJ_BATCHED computes the eigenvalues and optionally the eigenvectors of a batch of\ncomplex Hermitian matrices A_l.\n\n\\details\nThe eigenvalues are found using the iterative Jacobi algorithm and are returned in an order\ndepending on the value of esort.\nThe eigenvectors are computed depending on the value of evect. The computed eigenvectors are orthonormal.\n\nAt the \\f$k\\f$-th iteration (or \"sweep\"), \\f$A_l\\f$ is transformed by a product of Jacobi rotations \\f$V_l\\f$ as\n\n\\f[\nA_l^{(k)} = V_l' A_l^{(k-1)} V_l^{}\n\\f]\n\nsuch that \\f$off(A_l^{(k)}) < off(A_l^{(k-1)})\\f$, where \\f$A_l^{(0)} = A_l\\f$ and \\f$off(A_l^{(k)})\\f$ is the\nFrobenius norm of the off-diagonal elements of \\f$A_l^{(k)}\\f$. As \\f$off(A_l^{(k)}) \\rightarrow 0\\f$, the\ndiagonal elements of \\f$A_l^{(k)}\\f$ increasingly resemble the eigenvalues of \\f$A_l\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nesort       #rocblas_esort.\nSpecifies the order of the returned eigenvalues. If esort is\nrocblas_esort_ascending, then the eigenvalues are sorted and returned in ascending order.\nIf esort is rocblas_esort_none, then the order of the returned eigenvalues is unspecified.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the Hermitian matrices A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of rows and columns of matrices A_l.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the matrices A_l. On exit, the eigenvectors of A_l if they were computed and\nthe algorithm converged; otherwise the contents of A_l are unchanged.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nabstol      real type.\nThe absolute tolerance. The algorithm is considered to have converged once off(A_l)\nis <= abstol. If abstol <= 0, then the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to real type. Array of batch_count scalars on the GPU.\nThe Frobenius norm of the off-diagonal elements of A_l (i.e. off(A_l)) at the final iteration.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to rocblas_int. Array of batch_count integers on the GPU.\nThe actual number of sweeps (iterations) used by the algorithm for each batch instance.\n@param[out]\nW           pointer to real type. Array on the GPU (the size depends on the value of strideW).\nThe eigenvalues of A_l in increasing order.\n@param[in]\nstrideW     rocblas_stride.\nStride from the start of one vector W_l to the next one W_(l+1).\nThere is no restriction for the value of strideW. Normal use case is strideW >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for matrix A_l. If info[l] = 1, the algorithm did not converge.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_cheevj_batched(
         handle: rocblas_handle,
         esort: rocblas_esort,
@@ -8525,7 +8948,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYEVJ_STRIDED_BATCHED computes the eigenvalues and optionally the eigenvectors of a batch of\nreal symmetric matrices A_j.\n\n\\details\nThe eigenvalues are found using the iterative Jacobi algorithm and are returned in an order\ndepending on the value of esort.\nThe eigenvectors are computed depending on the value of evect. The computed eigenvectors are orthonormal.\n\nAt the \\f$k\\f$-th iteration (or \"sweep\"), \\f$A_j\\f$ is transformed by a product of Jacobi rotations \\f$V_j\\f$ as\n\n\\f[\nA_j^{(k)} = V_j' A_j^{(k-1)} V_j\n\\f]\n\nsuch that \\f$off(A_j^{(k)}) < off(A_j^{(k-1)})\\f$, where \\f$A_j^{(0)} = A_j\\f$ and \\f$off(A_j^{(k)})\\f$ is the\nFrobenius norm of the off-diagonal elements of \\f$A_j^{(k)}\\f$. As \\f$off(A_j^{(k)}) \\rightarrow 0\\f$, the\ndiagonal elements of \\f$A_j^{(k)}\\f$ increasingly resemble the eigenvalues of \\f$A_j\\f$.\n\n\\note\nIn order to carry out calculations, this method may synchronize the stream contained within the\nrocblas_handle.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nesort       #rocblas_esort.\\n\nSpecifies the order of the returned eigenvalues. If esort is\nrocblas_esort_ascending, then the eigenvalues are sorted and returned in ascending order.\nIf esort is rocblas_esort_none, then the order of the returned eigenvalues is unspecified.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the symmetric matrices A_j is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_j\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrices A_j.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_j. On exit, the eigenvectors of A_j if they were computed and\nthe algorithm converged; otherwise the contents of A_j are unchanged.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[in]\nabstol      type.\\n\nThe absolute tolerance. The algorithm is considered to have converged once off(A_j)\nis <= norm(A_j) * abstol. If abstol <= 0, then the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to type. Array of batch_count scalars on the GPU.\\n\nThe Frobenius norm of the off-diagonal elements of A_j (i.e. off(A_j)) at the final iteration.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\\n\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nThe actual number of sweeps (iterations) used by the algorithm for each batch instance.\n@param[out]\nW           pointer to type. Array on the GPU (the size depends on the value of strideW).\\n\nThe eigenvalues of A_j in increasing order.\n@param[in]\nstrideW     rocblas_stride.\\n\nStride from the start of one vector W_j to the next one W_(j+1).\nThere is no restriction for the value of strideW. Normal use case is strideW >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for matrix A_j. If info[j] = 1, the algorithm did not converge.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYEVJ_STRIDED_BATCHED computes the eigenvalues and optionally the eigenvectors of a batch of\nreal symmetric matrices A_l.\n\n\\details\nThe eigenvalues are found using the iterative Jacobi algorithm and are returned in an order\ndepending on the value of esort.\nThe eigenvectors are computed depending on the value of evect. The computed eigenvectors are orthonormal.\n\nAt the \\f$k\\f$-th iteration (or \"sweep\"), \\f$A_l\\f$ is transformed by a product of Jacobi rotations \\f$V_l\\f$ as\n\n\\f[\nA_l^{(k)} = V_l' A_l^{(k-1)} V_l^{}\n\\f]\n\nsuch that \\f$off(A_l^{(k)}) < off(A_l^{(k-1)})\\f$, where \\f$A_l^{(0)} = A_l\\f$ and \\f$off(A_l^{(k)})\\f$ is the\nFrobenius norm of the off-diagonal elements of \\f$A_l^{(k)}\\f$. As \\f$off(A_l^{(k)}) \\rightarrow 0\\f$, the\ndiagonal elements of \\f$A_l^{(k)}\\f$ increasingly resemble the eigenvalues of \\f$A_l\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nesort       #rocblas_esort.\nSpecifies the order of the returned eigenvalues. If esort is\nrocblas_esort_ascending, then the eigenvalues are sorted and returned in ascending order.\nIf esort is rocblas_esort_none, then the order of the returned eigenvalues is unspecified.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the symmetric matrices A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of rows and columns of matrices A_l.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the matrices A_l. On exit, the eigenvectors of A_l if they were computed and\nthe algorithm converged; otherwise the contents of A_l are unchanged.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[in]\nabstol      type.\nThe absolute tolerance. The algorithm is considered to have converged once off(A_l)\nis <= abstol. If abstol <= 0, then the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to type. Array of batch_count scalars on the GPU.\nThe Frobenius norm of the off-diagonal elements of A_l (i.e. off(A_l)) at the final iteration.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to rocblas_int. Array of batch_count integers on the GPU.\nThe actual number of sweeps (iterations) used by the algorithm for each batch instance.\n@param[out]\nW           pointer to type. Array on the GPU (the size depends on the value of strideW).\nThe eigenvalues of A_l in increasing order.\n@param[in]\nstrideW     rocblas_stride.\nStride from the start of one vector W_l to the next one W_(l+1).\nThere is no restriction for the value of strideW. Normal use case is strideW >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for matrix A_l. If info[l] = 1, the algorithm did not converge.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssyevj_strided_batched(
         handle: rocblas_handle,
         esort: rocblas_esort,
@@ -8568,7 +8991,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEEVJ_STRIDED_BATCHED computes the eigenvalues and optionally the eigenvectors of a batch of\ncomplex Hermitian matrices A_j.\n\n\\details\nThe eigenvalues are found using the iterative Jacobi algorithm and are returned in an order\ndepending on the value of esort.\nThe eigenvectors are computed depending on the value of evect. The computed eigenvectors are orthonormal.\n\nAt the \\f$k\\f$-th iteration (or \"sweep\"), \\f$A_j\\f$ is transformed by a product of Jacobi rotations \\f$V_j\\f$ as\n\n\\f[\nA_j^{(k)} = V_j' A_j^{(k-1)} V_j\n\\f]\n\nsuch that \\f$off(A_j^{(k)}) < off(A_j^{(k-1)})\\f$, where \\f$A_j^{(0)} = A_j\\f$ and \\f$off(A_j^{(k)})\\f$ is the\nFrobenius norm of the off-diagonal elements of \\f$A_j^{(k)}\\f$. As \\f$off(A_j^{(k)}) \\rightarrow 0\\f$, the\ndiagonal elements of \\f$A_j^{(k)}\\f$ increasingly resemble the eigenvalues of \\f$A_j\\f$.\n\n\\note\nIn order to carry out calculations, this method may synchronize the stream contained within the\nrocblas_handle.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nesort       #rocblas_esort.\\n\nSpecifies the order of the returned eigenvalues. If esort is\nrocblas_esort_ascending, then the eigenvalues are sorted and returned in ascending order.\nIf esort is rocblas_esort_none, then the order of the returned eigenvalues is unspecified.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the Hermitian matrices A_j is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_j\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrices A_j.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_j. On exit, the eigenvectors of A_j if they were computed and\nthe algorithm converged; otherwise the contents of A_j are unchanged.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[in]\nabstol      real type.\\n\nThe absolute tolerance. The algorithm is considered to have converged once off(A_j)\nis <= norm(A_j) * abstol. If abstol <= 0, then the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to real type. Array of batch_count scalars on the GPU.\\n\nThe Frobenius norm of the off-diagonal elements of A_j (i.e. off(A_j)) at the final iteration.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\\n\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nThe actual number of sweeps (iterations) used by the algorithm for each batch instance.\n@param[out]\nW           pointer to real type. Array on the GPU (the size depends on the value of strideW).\\n\nThe eigenvalues of A_j in increasing order.\n@param[in]\nstrideW     rocblas_stride.\\n\nStride from the start of one vector W_j to the next one W_(j+1).\nThere is no restriction for the value of strideW. Normal use case is strideW >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for matrix A_j. If info[j] = 1, the algorithm did not converge.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief HEEVJ_STRIDED_BATCHED computes the eigenvalues and optionally the eigenvectors of a batch of\ncomplex Hermitian matrices A_l.\n\n\\details\nThe eigenvalues are found using the iterative Jacobi algorithm and are returned in an order\ndepending on the value of esort.\nThe eigenvectors are computed depending on the value of evect. The computed eigenvectors are orthonormal.\n\nAt the \\f$k\\f$-th iteration (or \"sweep\"), \\f$A_l\\f$ is transformed by a product of Jacobi rotations \\f$V_l\\f$ as\n\n\\f[\nA_l^{(k)} = V_l' A_l^{(k-1)} V_l^{}\n\\f]\n\nsuch that \\f$off(A_l^{(k)}) < off(A_l^{(k-1)})\\f$, where \\f$A_l^{(0)} = A_l\\f$ and \\f$off(A_l^{(k)})\\f$ is the\nFrobenius norm of the off-diagonal elements of \\f$A_l^{(k)}\\f$. As \\f$off(A_l^{(k)}) \\rightarrow 0\\f$, the\ndiagonal elements of \\f$A_l^{(k)}\\f$ increasingly resemble the eigenvalues of \\f$A_l\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nesort       #rocblas_esort.\nSpecifies the order of the returned eigenvalues. If esort is\nrocblas_esort_ascending, then the eigenvalues are sorted and returned in ascending order.\nIf esort is rocblas_esort_none, then the order of the returned eigenvalues is unspecified.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the Hermitian matrices A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of rows and columns of matrices A_l.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the matrices A_l. On exit, the eigenvectors of A_l if they were computed and\nthe algorithm converged; otherwise the contents of A_l are unchanged.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[in]\nabstol      real type.\nThe absolute tolerance. The algorithm is considered to have converged once off(A_l)\nis <= abstol. If abstol <= 0, then the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to real type. Array of batch_count scalars on the GPU.\nThe Frobenius norm of the off-diagonal elements of A_l (i.e. off(A_l)) at the final iteration.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to rocblas_int. Array of batch_count integers on the GPU.\nThe actual number of sweeps (iterations) used by the algorithm for each batch instance.\n@param[out]\nW           pointer to real type. Array on the GPU (the size depends on the value of strideW).\nThe eigenvalues of A_l in increasing order.\n@param[in]\nstrideW     rocblas_stride.\nStride from the start of one vector W_l to the next one W_(l+1).\nThere is no restriction for the value of strideW. Normal use case is strideW >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for matrix A_l. If info[l] = 1, the algorithm did not converge.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_cheevj_strided_batched(
         handle: rocblas_handle,
         esort: rocblas_esort,
@@ -8611,7 +9034,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYEVX computes a set of the eigenvalues and optionally the corresponding eigenvectors of a\nreal symmetric matrix A.\n\n\\details\nThis function computes all the eigenvalues of A, all the eigenvalues in the half-open interval \\f$(vl, vu]\\f$,\nor the il-th through iu-th eigenvalues, depending on the value of erange. If evect is rocblas_evect_original,\nthe eigenvectors for these eigenvalues will be computed as well.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nerange      #rocblas_erange.\\n\nSpecifies the type of range or interval of the eigenvalues to be computed.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the symmetric matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A. On exit, the contents of A are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrix A.\n@param[in]\nvl          type. vl < vu.\\n\nThe lower bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nvu          type. vl < vu.\\n\nThe upper bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\\n\nThe index of the smallest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise..\\n\nThe index of the largest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of T or the eigenvalues in a half-open interval.\n@param[in]\nabstol      type.\\n\nThe absolute tolerance. An eigenvalue is considered to be located if it lies\nin an interval whose width is <= abstol. If abstol is negative, then machine-epsilon times\nthe 1-norm of T will be used as tolerance. If abstol=0, then the tolerance will be set\nto twice the underflow threshold; this is the tolerance that could get the most accurate results.\n@param[out]\nnev         pointer to a rocblas_int on the GPU.\\n\nThe total number of eigenvalues found. If erange is rocblas_erange_all, nev = n.\nIf erange is rocblas_erange_index, nev = iu - il + 1. Otherwise, 0 <= nev <= n.\n@param[out]\nW           pointer to type. Array on the GPU of dimension n.\\n\nThe first nev elements contain the computed eigenvalues. (The remaining elements\ncan be used as workspace for internal computations).\n@param[out]\nZ           pointer to type. Array on the GPU of dimension ldz*nev.\\n\nOn exit, if evect is not rocblas_evect_none and info = 0, the first nev columns contain\nthe eigenvectors of A corresponding to the output eigenvalues. Not referenced if\nevect is rocblas_evect_none.\nNote: If erange is rocblas_range_value, then the values of nev are not known in advance.\nThe user should ensure that Z is large enough to hold n columns, as all n columns\ncan be used as workspace for internal computations.\n@param[in]\nldz         rocblas_int. ldz >= n.\\n\nSpecifies the leading dimension of matrix Z.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU of dimension n.\\n\nIf info = 0, the first nev elements of ifail are zero.\nOtherwise, contains the indices of those eigenvectors that failed\nto converge. Not referenced if evect is rocblas_evect_none.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0, the algorithm did not converge. i columns of Z did not converge."]
+    #[doc = " @{\n\\brief SYEVX computes a set of the eigenvalues and optionally the corresponding eigenvectors of a\nreal symmetric matrix A.\n\n\\details\nThis function computes all the eigenvalues of A, all the eigenvalues in the half-open interval \\f$(vl, vu]\\f$,\nor the il-th through iu-th eigenvalues, depending on the value of erange. If evect is rocblas_evect_original,\nthe eigenvectors for these eigenvalues will be computed as well.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nerange      #rocblas_erange.\nSpecifies the type of range or interval of the eigenvalues to be computed.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the symmetric matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of rows and columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A. On exit, the contents of A are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrix A.\n@param[in]\nvl          type. vl < vu.\nThe lower bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nvu          type. vl < vu.\nThe upper bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\nThe index of the smallest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise..\nThe index of the largest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues in a half-open interval.\n@param[in]\nabstol      type.\nThe absolute tolerance. An eigenvalue is considered to be located if it lies\nin an interval whose width is <= abstol. If abstol is negative, then machine-epsilon times\nthe 1-norm of the tridiagonal form of A will be used as tolerance. If abstol=0, then the tolerance will be set\nto twice the underflow threshold; this is the tolerance that could get the most accurate results.\n@param[out]\nnev         pointer to a rocblas_int on the GPU.\nThe total number of eigenvalues found. If erange is rocblas_erange_all, nev = n.\nIf erange is rocblas_erange_index, nev = iu - il + 1. Otherwise, 0 <= nev <= n.\n@param[out]\nW           pointer to type. Array on the GPU of dimension n.\nThe first nev elements contain the computed eigenvalues. (The remaining elements\ncan be used as workspace for internal computations).\n@param[out]\nZ           pointer to type. Array on the GPU of dimension ldz*nev.\nOn exit, if evect is not rocblas_evect_none and info = 0, the first nev columns contain\nthe eigenvectors of A corresponding to the output eigenvalues. Not referenced if\nevect is rocblas_evect_none.\nNote: If erange is rocblas_range_value, then the values of nev are not known in advance.\nThe user should ensure that Z is large enough to hold n columns, as all n columns\ncan be used as workspace for internal computations.\n@param[in]\nldz         rocblas_int. ldz >= n.\nSpecifies the leading dimension of matrix Z.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU of dimension n.\nIf info = 0, the first nev elements of ifail are zero.\nOtherwise, contains the indices of those eigenvectors that failed\nto converge. Not referenced if evect is rocblas_evect_none.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0, the algorithm did not converge. i columns of Z did not converge."]
     pub fn rocsolver_ssyevx(
         handle: rocblas_handle,
         evect: rocblas_evect,
@@ -8658,7 +9081,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEEVX computes a set of the eigenvalues and optionally the corresponding eigenvectors of a\nHermitian matrix A.\n\n\\details\nThis function computes all the eigenvalues of A, all the eigenvalues in the half-open interval \\f$(vl, vu]\\f$,\nor the il-th through iu-th eigenvalues, depending on the value of erange. If evect is rocblas_evect_original,\nthe eigenvectors for these eigenvalues will be computed as well.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nerange      #rocblas_erange.\\n\nSpecifies the type of range or interval of the eigenvalues to be computed.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the symmetric matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A. On exit, the contents of A are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrix A.\n@param[in]\nvl          real type. vl < vu.\\n\nThe lower bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nvu          real type. vl < vu.\\n\nThe upper bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\\n\nThe index of the smallest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise..\\n\nThe index of the largest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of T or the eigenvalues in a half-open interval.\n@param[in]\nabstol      real type.\\n\nThe absolute tolerance. An eigenvalue is considered to be located if it lies\nin an interval whose width is <= abstol. If abstol is negative, then machine-epsilon times\nthe 1-norm of T will be used as tolerance. If abstol=0, then the tolerance will be set\nto twice the underflow threshold; this is the tolerance that could get the most accurate results.\n@param[out]\nnev         pointer to a rocblas_int on the GPU.\\n\nThe total number of eigenvalues found. If erange is rocblas_erange_all, nev = n.\nIf erange is rocblas_erange_index, nev = iu - il + 1. Otherwise, 0 <= nev <= n.\n@param[out]\nW           pointer to real type. Array on the GPU of dimension n.\\n\nThe first nev elements contain the computed eigenvalues. (The remaining elements\ncan be used as workspace for internal computations).\n@param[out]\nZ           pointer to type. Array on the GPU of dimension ldz*nev.\\n\nOn exit, if evect is not rocblas_evect_none and info = 0, the first nev columns contain\nthe eigenvectors of A corresponding to the output eigenvalues. Not referenced if\nevect is rocblas_evect_none.\nNote: If erange is rocblas_range_value, then the values of nev are not known in advance.\nThe user should ensure that Z is large enough to hold n columns, as all n columns\ncan be used as workspace for internal computations.\n@param[in]\nldz         rocblas_int. ldz >= n.\\n\nSpecifies the leading dimension of matrix Z.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU of dimension n.\\n\nIf info = 0, the first nev elements of ifail are zero.\nOtherwise, contains the indices of those eigenvectors that failed\nto converge. Not referenced if evect is rocblas_evect_none.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0, the algorithm did not converge. i columns of Z did not converge."]
+    #[doc = " @{\n\\brief HEEVX computes a set of the eigenvalues and optionally the corresponding eigenvectors of a\nHermitian matrix A.\n\n\\details\nThis function computes all the eigenvalues of A, all the eigenvalues in the half-open interval \\f$(vl, vu]\\f$,\nor the il-th through iu-th eigenvalues, depending on the value of erange. If evect is rocblas_evect_original,\nthe eigenvectors for these eigenvalues will be computed as well.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nerange      #rocblas_erange.\nSpecifies the type of range or interval of the eigenvalues to be computed.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the symmetric matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of rows and columns of matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A. On exit, the contents of A are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrix A.\n@param[in]\nvl          real type. vl < vu.\nThe lower bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nvu          real type. vl < vu.\nThe upper bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\nThe index of the smallest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise..\nThe index of the largest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues in a half-open interval.\n@param[in]\nabstol      real type.\nThe absolute tolerance. An eigenvalue is considered to be located if it lies\nin an interval whose width is <= abstol. If abstol is negative, then machine-epsilon times\nthe 1-norm of the tridiagonal form of A will be used as tolerance. If abstol=0, then the tolerance will be set\nto twice the underflow threshold; this is the tolerance that could get the most accurate results.\n@param[out]\nnev         pointer to a rocblas_int on the GPU.\nThe total number of eigenvalues found. If erange is rocblas_erange_all, nev = n.\nIf erange is rocblas_erange_index, nev = iu - il + 1. Otherwise, 0 <= nev <= n.\n@param[out]\nW           pointer to real type. Array on the GPU of dimension n.\nThe first nev elements contain the computed eigenvalues. (The remaining elements\ncan be used as workspace for internal computations).\n@param[out]\nZ           pointer to type. Array on the GPU of dimension ldz*nev.\nOn exit, if evect is not rocblas_evect_none and info = 0, the first nev columns contain\nthe eigenvectors of A corresponding to the output eigenvalues. Not referenced if\nevect is rocblas_evect_none.\nNote: If erange is rocblas_range_value, then the values of nev are not known in advance.\nThe user should ensure that Z is large enough to hold n columns, as all n columns\ncan be used as workspace for internal computations.\n@param[in]\nldz         rocblas_int. ldz >= n.\nSpecifies the leading dimension of matrix Z.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU of dimension n.\nIf info = 0, the first nev elements of ifail are zero.\nOtherwise, contains the indices of those eigenvectors that failed\nto converge. Not referenced if evect is rocblas_evect_none.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0, the algorithm did not converge. i columns of Z did not converge."]
     pub fn rocsolver_cheevx(
         handle: rocblas_handle,
         evect: rocblas_evect,
@@ -8705,7 +9128,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYEVX_BATCHED computes a set of the eigenvalues and optionally the corresponding eigenvectors\nof a batch of real symmetric matrices A_j.\n\n\\details\nThis function computes all the eigenvalues of A_j, all the eigenvalues in the half-open interval \\f$(vl, vu]\\f$,\nor the il-th through iu-th eigenvalues, depending on the value of erange. If evect is rocblas_evect_original,\nthe eigenvectors for these eigenvalues will be computed as well.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nerange      #rocblas_erange.\\n\nSpecifies the type of range or interval of the eigenvalues to be computed.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the symmetric matrices A_j is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_j\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrices A_j.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the matrices A_j. On exit, the contents of A_j are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nvl          type. vl < vu.\\n\nThe lower bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nvu          type. vl < vu.\\n\nThe upper bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\\n\nThe index of the smallest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise..\\n\nThe index of the largest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of T or the eigenvalues in a half-open interval.\n@param[in]\nabstol      type.\\n\nThe absolute tolerance. An eigenvalue is considered to be located if it lies\nin an interval whose width is <= abstol. If abstol is negative, then machine-epsilon times\nthe 1-norm of T will be used as tolerance. If abstol=0, then the tolerance will be set\nto twice the underflow threshold; this is the tolerance that could get the most accurate results.\n@param[out]\nnev         pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nThe total number of eigenvalues found. If erange is rocblas_erange_all, nev_j = n.\nIf erange is rocblas_erange_index, nev_j = iu - il + 1. Otherwise, 0 <= nev_j <= n.\n@param[out]\nW           pointer to type. Array on the GPU (the size depends on the value of strideW).\\n\nThe first nev_j elements contain the computed eigenvalues. (The remaining elements\ncan be used as workspace for internal computations).\n@param[in]\nstrideW     rocblas_stride.\\n\nStride from the start of one vector W_j to the next one W_(j+1).\nThere is no restriction for the value of strideW. Normal use case is strideW >= n.\n@param[out]\nZ           Array of pointers to type. Each pointer points to an array on the GPU of dimension ldz*nev_j.\\n\nOn exit, if evect is not rocblas_evect_none and info = 0, the first nev_j columns contain\nthe eigenvectors of A_j corresponding to the output eigenvalues. Not referenced if\nevect is rocblas_evect_none.\nNote: If erange is rocblas_range_value, then the values of nev_j are not known in advance.\nThe user should ensure that Z_j is large enough to hold n columns, as all n columns\ncan be used as workspace for internal computations.\n@param[in]\nldz         rocblas_int. ldz >= n.\\n\nSpecifies the leading dimension of matrices Z_j.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU (the size depends on the value of strideF).\\n\nIf info[j] = 0, the first nev_j elements of ifail_j are zero.\nOtherwise, contains the indices of those eigenvectors that failed\nto converge. Not referenced if evect is rocblas_evect_none.\n@param[in]\nstrideF     rocblas_stride.\\n\nStride from the start of one vector ifail_j to the next one ifail_(j+1).\nThere is no restriction for the value of strideF. Normal use case is strideF >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for matrix A_j.\nIf info[j] = i > 0, the algorithm did not converge. i columns of Z_j did not converge.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYEVX_BATCHED computes a set of the eigenvalues and optionally the corresponding eigenvectors\nof a batch of real symmetric matrices A_l.\n\n\\details\nThis function computes all the eigenvalues of A_l, all the eigenvalues in the half-open interval \\f$(vl, vu]\\f$,\nor the il-th through iu-th eigenvalues, depending on the value of erange. If evect is rocblas_evect_original,\nthe eigenvectors for these eigenvalues will be computed as well.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nerange      #rocblas_erange.\nSpecifies the type of range or interval of the eigenvalues to be computed.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the symmetric matrices A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of rows and columns of matrices A_l.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the matrices A_l. On exit, the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nvl          type. vl < vu.\nThe lower bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues within a set of indices.\n@param[in]\nvu          type. vl < vu.\nThe upper bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\nThe index of the smallest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise..\nThe index of the largest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues in a half-open interval.\n@param[in]\nabstol      type.\nThe absolute tolerance. An eigenvalue is considered to be located if it lies\nin an interval whose width is <= abstol. If abstol is negative, then machine-epsilon times\nthe 1-norm of the tridiagonal form of A_l will be used as tolerance. If abstol=0, then the tolerance will be set\nto twice the underflow threshold; this is the tolerance that could get the most accurate results.\n@param[out]\nnev         pointer to rocblas_int. Array of batch_count integers on the GPU.\nThe total number of eigenvalues found. If erange is rocblas_erange_all, nev[l] = n.\nIf erange is rocblas_erange_index, nev[l] = iu - il + 1. Otherwise, 0 <= nev[l] <= n.\n@param[out]\nW           pointer to type. Array on the GPU (the size depends on the value of strideW).\nThe first nev[l] elements contain the computed eigenvalues. (The remaining elements\ncan be used as workspace for internal computations).\n@param[in]\nstrideW     rocblas_stride.\nStride from the start of one vector W_l to the next one W_(l+1).\nThere is no restriction for the value of strideW. Normal use case is strideW >= n.\n@param[out]\nZ           Array of pointers to type. Each pointer points to an array on the GPU of dimension ldz*nev[l].\nOn exit, if evect is not rocblas_evect_none and info[l] = 0, the first nev[l] columns contain\nthe eigenvectors of A_l corresponding to the output eigenvalues. Not referenced if\nevect is rocblas_evect_none.\nNote: If erange is rocblas_range_value, then the values of nev[l] are not known in advance.\nThe user should ensure that Z_l is large enough to hold n columns, as all n columns\ncan be used as workspace for internal computations.\n@param[in]\nldz         rocblas_int. ldz >= n.\nSpecifies the leading dimension of matrices Z_l.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU (the size depends on the value of strideF).\nIf info[l] = 0, the first nev[l] elements of ifail_l are zero.\nOtherwise, contains the indices of those eigenvectors that failed\nto converge. Not referenced if evect is rocblas_evect_none.\n@param[in]\nstrideF     rocblas_stride.\nStride from the start of one vector ifail_l to the next one ifail_(l+1).\nThere is no restriction for the value of strideF. Normal use case is strideF >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for matrix A_l.\nIf info[l] = i > 0, the algorithm did not converge. i columns of Z_l did not converge.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssyevx_batched(
         handle: rocblas_handle,
         evect: rocblas_evect,
@@ -8758,7 +9181,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEEVX_BATCHED computes a set of the eigenvalues and optionally the corresponding eigenvectors\nof a batch of Hermitian matrices A_j.\n\n\\details\nThis function computes all the eigenvalues of A_j, all the eigenvalues in the half-open interval \\f$(vl, vu]\\f$,\nor the il-th through iu-th eigenvalues, depending on the value of erange. If evect is rocblas_evect_original,\nthe eigenvectors for these eigenvalues will be computed as well.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nerange      #rocblas_erange.\\n\nSpecifies the type of range or interval of the eigenvalues to be computed.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the symmetric matrices A_j is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_j\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrices A_j.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the matrices A_j. On exit, the contents of A_j are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nvl          real type. vl < vu.\\n\nThe lower bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nvu          real type. vl < vu.\\n\nThe upper bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\\n\nThe index of the smallest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise..\\n\nThe index of the largest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of T or the eigenvalues in a half-open interval.\n@param[in]\nabstol      real type.\\n\nThe absolute tolerance. An eigenvalue is considered to be located if it lies\nin an interval whose width is <= abstol. If abstol is negative, then machine-epsilon times\nthe 1-norm of T will be used as tolerance. If abstol=0, then the tolerance will be set\nto twice the underflow threshold; this is the tolerance that could get the most accurate results.\n@param[out]\nnev         pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nThe total number of eigenvalues found. If erange is rocblas_erange_all, nev_j = n.\nIf erange is rocblas_erange_index, nev_j = iu - il + 1. Otherwise, 0 <= nev_j <= n.\n@param[out]\nW           pointer to real type. Array on the GPU (the size depends on the value of strideW).\\n\nThe first nev_j elements contain the computed eigenvalues. (The remaining elements\ncan be used as workspace for internal computations).\n@param[in]\nstrideW     rocblas_stride.\\n\nStride from the start of one vector W_j to the next one W_(j+1).\nThere is no restriction for the value of strideW. Normal use case is strideW >= n.\n@param[out]\nZ           Array of pointers to type. Each pointer points to an array on the GPU of dimension ldz*nev_j.\\n\nOn exit, if evect is not rocblas_evect_none and info = 0, the first nev_j columns contain\nthe eigenvectors of A_j corresponding to the output eigenvalues. Not referenced if\nevect is rocblas_evect_none.\nNote: If erange is rocblas_range_value, then the values of nev_j are not known in advance.\nThe user should ensure that Z_j is large enough to hold n columns, as all n columns\ncan be used as workspace for internal computations.\n@param[in]\nldz         rocblas_int. ldz >= n.\\n\nSpecifies the leading dimension of matrices Z_j.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU (the size depends on the value of strideF).\\n\nIf info[j] = 0, the first nev_j elements of ifail_j are zero.\nOtherwise, contains the indices of those eigenvectors that failed\nto converge. Not referenced if evect is rocblas_evect_none.\n@param[in]\nstrideF     rocblas_stride.\\n\nStride from the start of one vector ifail_j to the next one ifail_(j+1).\nThere is no restriction for the value of strideF. Normal use case is strideF >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for matrix A_j.\nIf info[j] = i > 0, the algorithm did not converge. i columns of Z_j did not converge.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief HEEVX_BATCHED computes a set of the eigenvalues and optionally the corresponding eigenvectors\nof a batch of Hermitian matrices A_l.\n\n\\details\nThis function computes all the eigenvalues of A_l, all the eigenvalues in the half-open interval \\f$(vl, vu]\\f$,\nor the il-th through iu-th eigenvalues, depending on the value of erange. If evect is rocblas_evect_original,\nthe eigenvectors for these eigenvalues will be computed as well.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nerange      #rocblas_erange.\nSpecifies the type of range or interval of the eigenvalues to be computed.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the symmetric matrices A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of rows and columns of matrices A_l.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the matrices A_l. On exit, the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nvl          real type. vl < vu.\nThe lower bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues within a set of indices.\n@param[in]\nvu          real type. vl < vu.\nThe upper bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\nThe index of the smallest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise..\nThe index of the largest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues in a half-open interval.\n@param[in]\nabstol      real type.\nThe absolute tolerance. An eigenvalue is considered to be located if it lies\nin an interval whose width is <= abstol. If abstol is negative, then machine-epsilon times\nthe 1-norm of the tridiagonal form of A_l will be used as tolerance. If abstol=0, then the tolerance will be set\nto twice the underflow threshold; this is the tolerance that could get the most accurate results.\n@param[out]\nnev         pointer to rocblas_int. Array of batch_count integers on the GPU.\nThe total number of eigenvalues found. If erange is rocblas_erange_all, nev[l] = n.\nIf erange is rocblas_erange_index, nev[l] = iu - il + 1. Otherwise, 0 <= nev[l] <= n.\n@param[out]\nW           pointer to real type. Array on the GPU (the size depends on the value of strideW).\nThe first nev[l] elements contain the computed eigenvalues. (The remaining elements\ncan be used as workspace for internal computations).\n@param[in]\nstrideW     rocblas_stride.\nStride from the start of one vector W_l to the next one W_(l+1).\nThere is no restriction for the value of strideW. Normal use case is strideW >= n.\n@param[out]\nZ           Array of pointers to type. Each pointer points to an array on the GPU of dimension ldz*nev[l].\nOn exit, if evect is not rocblas_evect_none and info[l] = 0, the first nev[l] columns contain\nthe eigenvectors of A_l corresponding to the output eigenvalues. Not referenced if\nevect is rocblas_evect_none.\nNote: If erange is rocblas_range_value, then the values of nev[l] are not known in advance.\nThe user should ensure that Z_l is large enough to hold n columns, as all n columns\ncan be used as workspace for internal computations.\n@param[in]\nldz         rocblas_int. ldz >= n.\nSpecifies the leading dimension of matrices Z_l.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU (the size depends on the value of strideF).\nIf info[l] = 0, the first nev[l] elements of ifail_l are zero.\nOtherwise, contains the indices of those eigenvectors that failed\nto converge. Not referenced if evect is rocblas_evect_none.\n@param[in]\nstrideF     rocblas_stride.\nStride from the start of one vector ifail_l to the next one ifail_(l+1).\nThere is no restriction for the value of strideF. Normal use case is strideF >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for matrix A_l.\nIf info[l] = i > 0, the algorithm did not converge. i columns of Z_l did not converge.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_cheevx_batched(
         handle: rocblas_handle,
         evect: rocblas_evect,
@@ -8811,7 +9234,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYEVX_STRIDED_BATCHED computes a set of the eigenvalues and optionally the corresponding eigenvectors\nof a batch of real symmetric matrices A_j.\n\n\\details\nThis function computes all the eigenvalues of A_j, all the eigenvalues in the half-open interval \\f$(vl, vu]\\f$,\nor the il-th through iu-th eigenvalues, depending on the value of erange. If evect is rocblas_evect_original,\nthe eigenvectors for these eigenvalues will be computed as well.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nerange      #rocblas_erange.\\n\nSpecifies the type of range or interval of the eigenvalues to be computed.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the symmetric matrices A_j is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_j\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrices A_j.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_j. On exit, the contents of A_j are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[in]\nvl          type. vl < vu.\\n\nThe lower bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nvu          type. vl < vu.\\n\nThe upper bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\\n\nThe index of the smallest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise..\\n\nThe index of the largest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of T or the eigenvalues in a half-open interval.\n@param[in]\nabstol      type.\\n\nThe absolute tolerance. An eigenvalue is considered to be located if it lies\nin an interval whose width is <= abstol. If abstol is negative, then machine-epsilon times\nthe 1-norm of T will be used as tolerance. If abstol=0, then the tolerance will be set\nto twice the underflow threshold; this is the tolerance that could get the most accurate results.\n@param[out]\nnev         pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nThe total number of eigenvalues found. If erange is rocblas_erange_all, nev_j = n.\nIf erange is rocblas_erange_index, nev_j = iu - il + 1. Otherwise, 0 <= nev_j <= n.\n@param[out]\nW           pointer to type. Array on the GPU (the size depends on the value of strideW).\\n\nThe first nev_j elements contain the computed eigenvalues. (The remaining elements\ncan be used as workspace for internal computations).\n@param[in]\nstrideW     rocblas_stride.\\n\nStride from the start of one vector W_j to the next one W_(j+1).\nThere is no restriction for the value of strideW. Normal use case is strideW >= n.\n@param[out]\nZ           pointer to type. Array on the GPU (the size depends on the value of strideZ).\\n\nOn exit, if evect is not rocblas_evect_none and info = 0, the first nev_j columns contain\nthe eigenvectors of A_j corresponding to the output eigenvalues. Not referenced if\nevect is rocblas_evect_none.\n@param[in]\nldz         rocblas_int. ldz >= n.\\n\nSpecifies the leading dimension of matrices Z_j.\n@param[in]\nstrideZ     rocblas_stride.\\n\nStride from the start of one matrix Z_j to the next one Z_(j+1).\nThere is no restriction for the value of strideZ. Normal use case is strideZ >= ldz*nev_j.\nNote: If erange is rocblas_range_value, then the values of nev_j are not known in advance.\nThe user should ensure that Z_j is large enough to hold n columns, as all n columns\ncan be used as workspace for internal computations.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU (the size depends on the value of strideF).\\n\nIf info[j] = 0, the first nev_j elements of ifail_j are zero.\nOtherwise, contains the indices of those eigenvectors that failed\nto converge. Not referenced if evect is rocblas_evect_none.\n@param[in]\nstrideF     rocblas_stride.\\n\nStride from the start of one vector ifail_j to the next one ifail_(j+1).\nThere is no restriction for the value of strideF. Normal use case is strideF >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for matrix A_j.\nIf info[j] = i > 0, the algorithm did not converge. i columns of Z_j did not converge.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYEVX_STRIDED_BATCHED computes a set of the eigenvalues and optionally the corresponding eigenvectors\nof a batch of real symmetric matrices A_l.\n\n\\details\nThis function computes all the eigenvalues of A_l, all the eigenvalues in the half-open interval \\f$(vl, vu]\\f$,\nor the il-th through iu-th eigenvalues, depending on the value of erange. If evect is rocblas_evect_original,\nthe eigenvectors for these eigenvalues will be computed as well.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nerange      #rocblas_erange.\nSpecifies the type of range or interval of the eigenvalues to be computed.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the symmetric matrices A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of rows and columns of matrices A_l.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the matrices A_l. On exit, the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[in]\nvl          type. vl < vu.\nThe lower bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues within a set of indices.\n@param[in]\nvu          type. vl < vu.\nThe upper bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\nThe index of the smallest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise..\nThe index of the largest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues in a half-open interval.\n@param[in]\nabstol      type.\nThe absolute tolerance. An eigenvalue is considered to be located if it lies\nin an interval whose width is <= abstol. If abstol is negative, then machine-epsilon times\nthe 1-norm of the tridiagonal form of A_l will be used as tolerance. If abstol=0, then the tolerance will be set\nto twice the underflow threshold; this is the tolerance that could get the most accurate results.\n@param[out]\nnev         pointer to rocblas_int. Array of batch_count integers on the GPU.\nThe total number of eigenvalues found. If erange is rocblas_erange_all, nev[l] = n.\nIf erange is rocblas_erange_index, nev[l] = iu - il + 1. Otherwise, 0 <= nev[l] <= n.\n@param[out]\nW           pointer to type. Array on the GPU (the size depends on the value of strideW).\nThe first nev[l] elements contain the computed eigenvalues. (The remaining elements\ncan be used as workspace for internal computations).\n@param[in]\nstrideW     rocblas_stride.\nStride from the start of one vector W_l to the next one W_(l+1).\nThere is no restriction for the value of strideW. Normal use case is strideW >= n.\n@param[out]\nZ           pointer to type. Array on the GPU (the size depends on the value of strideZ).\nOn exit, if evect is not rocblas_evect_none and info[l] = 0, the first nev[l] columns contain\nthe eigenvectors of A_l corresponding to the output eigenvalues. Not referenced if\nevect is rocblas_evect_none.\n@param[in]\nldz         rocblas_int. ldz >= n.\nSpecifies the leading dimension of matrices Z_l.\n@param[in]\nstrideZ     rocblas_stride.\nStride from the start of one matrix Z_l to the next one Z_(l+1).\nThere is no restriction for the value of strideZ. Normal use case is strideZ >= ldz*nev[l].\nNote: If erange is rocblas_range_value, then the values of nev[l] are not known in advance.\nThe user should ensure that Z_l is large enough to hold n columns, as all n columns\ncan be used as workspace for internal computations.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU (the size depends on the value of strideF).\nIf info[l] = 0, the first nev[l] elements of ifail_l are zero.\nOtherwise, contains the indices of those eigenvectors that failed\nto converge. Not referenced if evect is rocblas_evect_none.\n@param[in]\nstrideF     rocblas_stride.\nStride from the start of one vector ifail_l to the next one ifail_(l+1).\nThere is no restriction for the value of strideF. Normal use case is strideF >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for matrix A_l.\nIf info[l] = i > 0, the algorithm did not converge. i columns of Z_l did not converge.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssyevx_strided_batched(
         handle: rocblas_handle,
         evect: rocblas_evect,
@@ -8868,7 +9291,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEEVX_STRIDED_BATCHED computes a set of the eigenvalues and optionally the corresponding eigenvectors\nof a batch of Hermitian matrices A_j.\n\n\\details\nThis function computes all the eigenvalues of A_j, all the eigenvalues in the half-open interval \\f$(vl, vu]\\f$,\nor the il-th through iu-th eigenvalues, depending on the value of erange. If evect is rocblas_evect_original,\nthe eigenvectors for these eigenvalues will be computed as well.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nerange      #rocblas_erange.\\n\nSpecifies the type of range or interval of the eigenvalues to be computed.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the symmetric matrices A_j is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_j\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nNumber of rows and columns of matrices A_j.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_j. On exit, the contents of A_j are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[in]\nvl          real type. vl < vu.\\n\nThe lower bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nvu          real type. vl < vu.\\n\nThe upper bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\\n\nThe index of the smallest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise..\\n\nThe index of the largest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of T or the eigenvalues in a half-open interval.\n@param[in]\nabstol      real type.\\n\nThe absolute tolerance. An eigenvalue is considered to be located if it lies\nin an interval whose width is <= abstol. If abstol is negative, then machine-epsilon times\nthe 1-norm of T will be used as tolerance. If abstol=0, then the tolerance will be set\nto twice the underflow threshold; this is the tolerance that could get the most accurate results.\n@param[out]\nnev         pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nThe total number of eigenvalues found. If erange is rocblas_erange_all, nev_j = n.\nIf erange is rocblas_erange_index, nev_j = iu - il + 1. Otherwise, 0 <= nev_j <= n.\n@param[out]\nW           pointer to real type. Array on the GPU (the size depends on the value of strideW).\\n\nThe first nev_j elements contain the computed eigenvalues. (The remaining elements\ncan be used as workspace for internal computations).\n@param[in]\nstrideW     rocblas_stride.\\n\nStride from the start of one vector W_j to the next one W_(j+1).\nThere is no restriction for the value of strideW. Normal use case is strideW >= n.\n@param[out]\nZ           pointer to type. Array on the GPU (the size depends on the value of strideZ).\\n\nOn exit, if evect is not rocblas_evect_none and info = 0, the first nev_j columns contain\nthe eigenvectors of A_j corresponding to the output eigenvalues. Not referenced if\nevect is rocblas_evect_none.\n@param[in]\nldz         rocblas_int. ldz >= n.\\n\nSpecifies the leading dimension of matrices Z_j.\n@param[in]\nstrideZ     rocblas_stride.\\n\nStride from the start of one matrix Z_j to the next one Z_(j+1).\nThere is no restriction for the value of strideZ. Normal use case is strideZ >= ldz*nev_j.\nNote: If erange is rocblas_range_value, then the values of nev_j are not known in advance.\nThe user should ensure that Z_j is large enough to hold n columns, as all n columns\ncan be used as workspace for internal computations.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU (the size depends on the value of strideF).\\n\nIf info[j] = 0, the first nev_j elements of ifail_j are zero.\nOtherwise, contains the indices of those eigenvectors that failed\nto converge. Not referenced if evect is rocblas_evect_none.\n@param[in]\nstrideF     rocblas_stride.\\n\nStride from the start of one vector ifail_j to the next one ifail_(j+1).\nThere is no restriction for the value of strideF. Normal use case is strideF >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for matrix A_j.\nIf info[j] = i > 0, the algorithm did not converge. i columns of Z_j did not converge.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief HEEVX_STRIDED_BATCHED computes a set of the eigenvalues and optionally the corresponding eigenvectors\nof a batch of Hermitian matrices A_l.\n\n\\details\nThis function computes all the eigenvalues of A_l, all the eigenvalues in the half-open interval \\f$(vl, vu]\\f$,\nor the il-th through iu-th eigenvalues, depending on the value of erange. If evect is rocblas_evect_original,\nthe eigenvectors for these eigenvalues will be computed as well.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nerange      #rocblas_erange.\nSpecifies the type of range or interval of the eigenvalues to be computed.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the symmetric matrices A_l is stored.\nIf uplo indicates lower (or upper), then the upper (or lower) part of A_l\nis not used.\n@param[in]\nn           rocblas_int. n >= 0.\nNumber of rows and columns of matrices A_l.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the matrices A_l. On exit, the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[in]\nvl          real type. vl < vu.\nThe lower bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues within a set of indices.\n@param[in]\nvu          real type. vl < vu.\nThe upper bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\nThe index of the smallest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise..\nThe index of the largest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues in a half-open interval.\n@param[in]\nabstol      real type.\nThe absolute tolerance. An eigenvalue is considered to be located if it lies\nin an interval whose width is <= abstol. If abstol is negative, then machine-epsilon times\nthe 1-norm of the tridiagonal form of A_l will be used as tolerance. If abstol=0, then the tolerance will be set\nto twice the underflow threshold; this is the tolerance that could get the most accurate results.\n@param[out]\nnev         pointer to rocblas_int. Array of batch_count integers on the GPU.\nThe total number of eigenvalues found. If erange is rocblas_erange_all, nev[l] = n.\nIf erange is rocblas_erange_index, nev[l] = iu - il + 1. Otherwise, 0 <= nev[l] <= n.\n@param[out]\nW           pointer to real type. Array on the GPU (the size depends on the value of strideW).\nThe first nev[l] elements contain the computed eigenvalues. (The remaining elements\ncan be used as workspace for internal computations).\n@param[in]\nstrideW     rocblas_stride.\nStride from the start of one vector W_l to the next one W_(l+1).\nThere is no restriction for the value of strideW. Normal use case is strideW >= n.\n@param[out]\nZ           pointer to type. Array on the GPU (the size depends on the value of strideZ).\nOn exit, if evect is not rocblas_evect_none and info[l] = 0, the first nev[l] columns contain\nthe eigenvectors of A_l corresponding to the output eigenvalues. Not referenced if\nevect is rocblas_evect_none.\n@param[in]\nldz         rocblas_int. ldz >= n.\nSpecifies the leading dimension of matrices Z_l.\n@param[in]\nstrideZ     rocblas_stride.\nStride from the start of one matrix Z_l to the next one Z_(l+1).\nThere is no restriction for the value of strideZ. Normal use case is strideZ >= ldz*nev[l].\nNote: If erange is rocblas_range_value, then the values of nev[l] are not known in advance.\nThe user should ensure that Z_l is large enough to hold n columns, as all n columns\ncan be used as workspace for internal computations.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU (the size depends on the value of strideF).\nIf info[l] = 0, the first nev[l] elements of ifail_l are zero.\nOtherwise, contains the indices of those eigenvectors that failed\nto converge. Not referenced if evect is rocblas_evect_none.\n@param[in]\nstrideF     rocblas_stride.\nStride from the start of one vector ifail_l to the next one ifail_(l+1).\nThere is no restriction for the value of strideF. Normal use case is strideF >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for matrix A_l.\nIf info[l] = i > 0, the algorithm did not converge. i columns of Z_l did not converge.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_cheevx_strided_batched(
         handle: rocblas_handle,
         evect: rocblas_evect,
@@ -8925,7 +9348,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYGV computes the eigenvalues and (optionally) eigenvectors of\na real generalized symmetric-definite eigenproblem.\n\n\\details\nThe problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = \\lambda B X & \\: \\text{1st form,}\\\\\nA B X = \\lambda X & \\: \\text{2nd form, or}\\\\\nB A X = \\lambda X & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed depending on the\nvalue of evect.\n\nWhen computed, the matrix Z of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ^T B Z=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ^T B^{-1} Z=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblem.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices\nA and B are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A and B are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the symmetric matrix A. On exit, if evect is original,\nthe normalized matrix Z of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrix A (including the diagonal) is destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A.\n@param[out]\nB           pointer to type. Array on the GPU of dimension ldb*n.\\n\nOn entry, the symmetric positive definite matrix B. On exit, the\ntriangular factor of B as returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B.\n@param[out]\nD           pointer to type. Array on the GPU of dimension n.\\n\nOn exit, the eigenvalues in increasing order.\n@param[out]\nE           pointer to type. Array on the GPU of dimension n.\\n\nThis array is used to work internally with the tridiagonal matrix T associated with\nthe reduced eigenvalue problem.\nOn exit, if 0 < info <= n, it contains the unconverged off-diagonal elements of T\n(or properly speaking, a tridiagonal matrix equivalent to T). The diagonal elements\nof this matrix are in D; those that converged correspond to a subset of the\neigenvalues (not necessarily ordered).\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i <= n, i off-diagonal elements of an intermediate\ntridiagonal form did not converge to zero.\nIf info = n + i, the leading minor of order i of B is not\npositive definite."]
+    #[doc = " @{\n\\brief SYGV computes the eigenvalues and (optionally) eigenvectors of\na real generalized symmetric-definite eigenproblem.\n\n\\details\nThe problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = \\lambda B X & \\: \\text{1st form,}\\\\\nA B X = \\lambda X & \\: \\text{2nd form, or}\\\\\nB A X = \\lambda X & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed depending on the\nvalue of evect.\n\nWhen computed, the matrix Z of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ^T B Z=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ^T B^{-1} Z=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblem.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower parts of the matrices\nA and B are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A and B are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the symmetric matrix A. On exit, if evect is original,\nthe normalized matrix Z of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrix A (including the diagonal) is destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A.\n@param[out]\nB           pointer to type. Array on the GPU of dimension ldb*n.\nOn entry, the symmetric positive definite matrix B. On exit, the\ntriangular factor of B as returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B.\n@param[out]\nD           pointer to type. Array on the GPU of dimension n.\nOn exit, the eigenvalues in increasing order.\n@param[out]\nE           pointer to type. Array on the GPU of dimension n.\nThis array is used to work internally with the tridiagonal matrix T associated with\nthe reduced eigenvalue problem.\nOn exit, if 0 < info <= n, it contains the unconverged off-diagonal elements of T\n(or properly speaking, a tridiagonal matrix equivalent to T). The diagonal elements\nof this matrix are in D; those that converged correspond to a subset of the\neigenvalues (not necessarily ordered).\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i <= n, i off-diagonal elements of an intermediate\ntridiagonal form did not converge to zero.\nIf info = n + i, the leading minor of order i of B is not\npositive definite."]
     pub fn rocsolver_ssygv(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -8960,7 +9383,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEGV computes the eigenvalues and (optionally) eigenvectors of\na complex generalized hermitian-definite eigenproblem.\n\n\\details\nThe problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = \\lambda B X & \\: \\text{1st form,}\\\\\nA B X = \\lambda X & \\: \\text{2nd form, or}\\\\\nB A X = \\lambda X & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed depending on the\nvalue of evect.\n\nWhen computed, the matrix Z of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ^H B Z=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ^H B^{-1} Z=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblem.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices\nA and B are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A and B are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the hermitian matrix A. On exit, if evect is original,\nthe normalized matrix Z of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrix A (including the diagonal) is destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A.\n@param[out]\nB           pointer to type. Array on the GPU of dimension ldb*n.\\n\nOn entry, the hermitian positive definite matrix B. On exit, the\ntriangular factor of B as returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B.\n@param[out]\nD           pointer to real type. Array on the GPU of dimension n.\\n\nOn exit, the eigenvalues in increasing order.\n@param[out]\nE           pointer to real type. Array on the GPU of dimension n.\\n\nThis array is used to work internally with the tridiagonal matrix T associated with\nthe reduced eigenvalue problem.\nOn exit, if 0 < info <= n, it contains the unconverged off-diagonal elements of T\n(or properly speaking, a tridiagonal matrix equivalent to T). The diagonal elements\nof this matrix are in D; those that converged correspond to a subset of the\neigenvalues (not necessarily ordered).\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i <= n, i off-diagonal elements of an intermediate\ntridiagonal form did not converge to zero.\nIf info = n + i, the leading minor of order i of B is not\npositive definite."]
+    #[doc = " @{\n\\brief HEGV computes the eigenvalues and (optionally) eigenvectors of\na complex generalized hermitian-definite eigenproblem.\n\n\\details\nThe problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = \\lambda B X & \\: \\text{1st form,}\\\\\nA B X = \\lambda X & \\: \\text{2nd form, or}\\\\\nB A X = \\lambda X & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed depending on the\nvalue of evect.\n\nWhen computed, the matrix Z of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ^H B Z=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ^H B^{-1} Z=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblem.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower parts of the matrices\nA and B are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A and B are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the hermitian matrix A. On exit, if evect is original,\nthe normalized matrix Z of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrix A (including the diagonal) is destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A.\n@param[out]\nB           pointer to type. Array on the GPU of dimension ldb*n.\nOn entry, the hermitian positive definite matrix B. On exit, the\ntriangular factor of B as returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B.\n@param[out]\nD           pointer to real type. Array on the GPU of dimension n.\nOn exit, the eigenvalues in increasing order.\n@param[out]\nE           pointer to real type. Array on the GPU of dimension n.\nThis array is used to work internally with the tridiagonal matrix T associated with\nthe reduced eigenvalue problem.\nOn exit, if 0 < info <= n, it contains the unconverged off-diagonal elements of T\n(or properly speaking, a tridiagonal matrix equivalent to T). The diagonal elements\nof this matrix are in D; those that converged correspond to a subset of the\neigenvalues (not necessarily ordered).\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i <= n, i off-diagonal elements of an intermediate\ntridiagonal form did not converge to zero.\nIf info = n + i, the leading minor of order i of B is not\npositive definite."]
     pub fn rocsolver_chegv(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -8995,7 +9418,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYGV_BATCHED computes the eigenvalues and (optionally)\neigenvectors of a batch of real generalized symmetric-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = \\lambda B_j X_j & \\: \\text{1st form,}\\\\\nA_j B_j X_j = \\lambda X_j & \\: \\text{2nd form, or}\\\\\nB_j A_j X_j = \\lambda X_j & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed depending on the\nvalue of evect.\n\nWhen computed, the matrix \\f$Z_j\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_j^T B_j Z_j=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_j^T B_j^{-1} Z_j=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices\nA_j and B_j are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_j and B_j are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the symmetric matrices A_j. On exit, if evect is original,\nthe normalized matrix Z_j of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrices A_j (including the diagonal) are destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A_j.\n@param[out]\nB           array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*n.\\n\nOn entry, the symmetric positive definite matrices B_j. On exit, the\ntriangular factor of B_j as returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B_j.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\\n\nOn exit, the eigenvalues in increasing order.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use is strideD >= n.\n@param[out]\nE           pointer to type. Array on the GPU (the size depends on the value of strideE).\\n\nThis array is used to work internally with the tridiagonal matrix T_j associated with\nthe jth reduced eigenvalue problem.\nOn exit, if 0 < info[j] <= n, E_j contains the unconverged off-diagonal elements of T_j\n(or properly speaking, a tridiagonal matrix equivalent to T_j). The diagonal elements\nof this matrix are in D_j; those that converged correspond to a subset of the\neigenvalues (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit of batch instance j.\nIf info[j] = i <= n, i off-diagonal elements of an intermediate\ntridiagonal form did not converge to zero.\nIf info[j] = n + i, the leading minor of order i of B_j is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYGV_BATCHED computes the eigenvalues and (optionally)\neigenvectors of a batch of real generalized symmetric-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed depending on the\nvalue of evect.\n\nWhen computed, the matrix \\f$Z_l\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_l^T B_l^{} Z_l^{}=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_l^T B_l^{-1} Z_l^{}=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower parts of the matrices\nA_l and B_l are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_l and B_l are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the symmetric matrices A_l. On exit, if evect is original,\nthe normalized matrix Z_l of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrices A_l (including the diagonal) are destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A_l.\n@param[out]\nB           array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*n.\nOn entry, the symmetric positive definite matrices B_l. On exit, the\ntriangular factor of B_l as returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B_l.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\nOn exit, the eigenvalues in increasing order.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use is strideD >= n.\n@param[out]\nE           pointer to type. Array on the GPU (the size depends on the value of strideE).\nThis array is used to work internally with the tridiagonal matrix T_l associated with\nthe l-th reduced eigenvalue problem.\nOn exit, if 0 < info[l] <= n, E_l contains the unconverged off-diagonal elements of T_l\n(or properly speaking, a tridiagonal matrix equivalent to T_l). The diagonal elements\nof this matrix are in D_l; those that converged correspond to a subset of the\neigenvalues (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit of batch instance l.\nIf info[l] = i <= n, i off-diagonal elements of an intermediate\ntridiagonal form did not converge to zero.\nIf info[l] = n + i, the leading minor of order i of B_l is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssygv_batched(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -9036,7 +9459,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEGV_BATCHED computes the eigenvalues and (optionally)\neigenvectors of a batch of complex generalized hermitian-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = \\lambda B_j X_j & \\: \\text{1st form,}\\\\\nA_j B_j X_j = \\lambda X_j & \\: \\text{2nd form, or}\\\\\nB_j A_j X_j = \\lambda X_j & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed depending on the\nvalue of evect.\n\nWhen computed, the matrix \\f$Z_j\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_j^H B_j Z_j=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_j^H B_j^{-1} Z_j=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices\nA_j and B_j are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_j and B_j are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the hermitian matrices A_j. On exit, if evect is original,\nthe normalized matrix Z_j of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrices A_j (including the diagonal) are destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A_j.\n@param[out]\nB           array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*n.\\n\nOn entry, the hermitian positive definite matrices B_j. On exit, the\ntriangular factor of B_j as returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B_j.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\\n\nOn exit, the eigenvalues in increasing order.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use is strideD >= n.\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\\n\nThis array is used to work internally with the tridiagonal matrix T_j associated with\nthe jth reduced eigenvalue problem.\nOn exit, if 0 < info[j] <= n, it contains the unconverged off-diagonal elements of T_j\n(or properly speaking, a tridiagonal matrix equivalent to T_j). The diagonal elements\nof this matrix are in D_j; those that converged correspond to a subset of the\neigenvalues (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit of batch j.\nIf info[j] = i <= n, i off-diagonal elements of an intermediate\ntridiagonal form did not converge to zero.\nIf info[j] = n + i, the leading minor of order i of B_j is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief HEGV_BATCHED computes the eigenvalues and (optionally)\neigenvectors of a batch of complex generalized hermitian-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed depending on the\nvalue of evect.\n\nWhen computed, the matrix \\f$Z_l\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_l^H B_l^{} Z_l^{}=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_l^H B_l^{-1} Z_l^{}=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower parts of the matrices\nA_l and B_l are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_l and B_l are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the hermitian matrices A_l. On exit, if evect is original,\nthe normalized matrix Z_l of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrices A_l (including the diagonal) are destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A_l.\n@param[out]\nB           array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*n.\nOn entry, the hermitian positive definite matrices B_l. On exit, the\ntriangular factor of B_l as returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B_l.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\nOn exit, the eigenvalues in increasing order.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use is strideD >= n.\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\nThis array is used to work internally with the tridiagonal matrix T_l associated with\nthe l-th reduced eigenvalue problem.\nOn exit, if 0 < info[l] <= n, it contains the unconverged off-diagonal elements of T_l\n(or properly speaking, a tridiagonal matrix equivalent to T_l). The diagonal elements\nof this matrix are in D_l; those that converged correspond to a subset of the\neigenvalues (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit of batch l.\nIf info[l] = i <= n, i off-diagonal elements of an intermediate\ntridiagonal form did not converge to zero.\nIf info[l] = n + i, the leading minor of order i of B_l is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_chegv_batched(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -9077,7 +9500,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYGV_STRIDED_BATCHED computes the eigenvalues and (optionally)\neigenvectors of a batch of real generalized symmetric-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = \\lambda B_j X_j & \\: \\text{1st form,}\\\\\nA_j B_j X_j = \\lambda X_j & \\: \\text{2nd form, or}\\\\\nB_j A_j X_j = \\lambda X_j & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed depending on the\nvalue of evect.\n\nWhen computed, the matrix \\f$Z_j\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_j^T B_j Z_j=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_j^T B_j^{-1} Z_j=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices\nA_j and B_j are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_j and B_j are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the symmetric matrices A_j. On exit, if evect is original,\nthe normalized matrix Z_j of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrices A_j (including the diagonal) are destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use is strideA >= lda*n.\n@param[out]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\\n\nOn entry, the symmetric positive definite matrices B_j. On exit, the\ntriangular factor of B_j as returned by \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B_j.\n@param[in]\nstrideB     rocblas_stride.\\n\nStride from the start of one matrix B_j to the next one B_(j+1).\nThere is no restriction for the value of strideB. Normal use is strideB >= ldb*n.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\\n\nOn exit, the eigenvalues in increasing order.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use is strideD >= n.\n@param[out]\nE           pointer to type. Array on the GPU (the size depends on the value of strideE).\\n\nThis array is used to work internally with the tridiagonal matrix T_j associated with\nthe jth reduced eigenvalue problem.\nOn exit, if 0 < info[j] <= n, it contains the unconverged off-diagonal elements of T_j\n(or properly speaking, a tridiagonal matrix equivalent to T_j). The diagonal elements\nof this matrix are in D_j; those that converged correspond to a subset of the\neigenvalues (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit of batch j.\nIf info[j] = i <= n, i off-diagonal elements of an intermediate\ntridiagonal form did not converge to zero.\nIf info[j] = n + i, the leading minor of order i of B_j is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYGV_STRIDED_BATCHED computes the eigenvalues and (optionally)\neigenvectors of a batch of real generalized symmetric-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed depending on the\nvalue of evect.\n\nWhen computed, the matrix \\f$Z_l\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_l^T B_l^{} Z_l^{}=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_l^T B_l^{-1} Z_l^{}=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower parts of the matrices\nA_l and B_l are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_l and B_l are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the symmetric matrices A_l. On exit, if evect is original,\nthe normalized matrix Z_l of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrices A_l (including the diagonal) are destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use is strideA >= lda*n.\n@param[out]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\nOn entry, the symmetric positive definite matrices B_l. On exit, the\ntriangular factor of B_l as returned by \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B_l.\n@param[in]\nstrideB     rocblas_stride.\nStride from the start of one matrix B_l to the next one B_(l+1).\nThere is no restriction for the value of strideB. Normal use is strideB >= ldb*n.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\nOn exit, the eigenvalues in increasing order.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use is strideD >= n.\n@param[out]\nE           pointer to type. Array on the GPU (the size depends on the value of strideE).\nThis array is used to work internally with the tridiagonal matrix T_l associated with\nthe l-th reduced eigenvalue problem.\nOn exit, if 0 < info[l] <= n, it contains the unconverged off-diagonal elements of T_l\n(or properly speaking, a tridiagonal matrix equivalent to T_l). The diagonal elements\nof this matrix are in D_l; those that converged correspond to a subset of the\neigenvalues (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit of batch j.\nIf info[l] = i <= n, i off-diagonal elements of an intermediate\ntridiagonal form did not converge to zero.\nIf info[l] = n + i, the leading minor of order i of B_l is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssygv_strided_batched(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -9122,7 +9545,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEGV_STRIDED_BATCHED computes the eigenvalues and (optionally)\neigenvectors of a batch of complex generalized hermitian-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = \\lambda B_j X_j & \\: \\text{1st form,}\\\\\nA_j B_j X_j = \\lambda X_j & \\: \\text{2nd form, or}\\\\\nB_j A_j X_j = \\lambda X_j & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed depending on the\nvalue of evect.\n\nWhen computed, the matrix \\f$Z_j\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_j^H B_j Z_j=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_j^H B_j^{-1} Z_j=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices\nA_j and B_j are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_j and B_j are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the hermitian matrices A_j. On exit, if evect is original,\nthe normalized matrix Z_j of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrices A_j (including the diagonal) are destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use is strideA >= lda*n.\n@param[out]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\\n\nOn entry, the hermitian positive definite matrices B_j. On exit, the\ntriangular factor of B_j as returned by \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B_j.\n@param[in]\nstrideB     rocblas_stride.\\n\nStride from the start of one matrix B_j to the next one B_(j+1).\nThere is no restriction for the value of strideB. Normal use is strideB >= ldb*n.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\\n\nOn exit, the eigenvalues in increasing order.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use is strideD >= n.\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\\n\nThis array is used to work internally with the tridiagonal matrix T_j associated with\nthe jth reduced eigenvalue problem.\nOn exit, if 0 < info[j] <= n, it contains the unconverged off-diagonal elements of T_j\n(or properly speaking, a tridiagonal matrix equivalent to T_j). The diagonal elements\nof this matrix are in D_j; those that converged correspond to a subset of the\neigenvalues (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit of batch j.\nIf info[j] = i <= n, i off-diagonal elements of an intermediate\ntridiagonal form did not converge to zero.\nIf info[j] = n + i, the leading minor of order i of B_j is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief HEGV_STRIDED_BATCHED computes the eigenvalues and (optionally)\neigenvectors of a batch of complex generalized hermitian-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed depending on the\nvalue of evect.\n\nWhen computed, the matrix \\f$Z_l\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_l^H B_l^{} Z_l^{}=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_l^H B_l^{-1} Z_l^{}=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower parts of the matrices\nA_l and B_l are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_l and B_l are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the hermitian matrices A_l. On exit, if evect is original,\nthe normalized matrix Z_l of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrices A_l (including the diagonal) are destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use is strideA >= lda*n.\n@param[out]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\nOn entry, the hermitian positive definite matrices B_l. On exit, the\ntriangular factor of B_l as returned by \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B_l.\n@param[in]\nstrideB     rocblas_stride.\nStride from the start of one matrix B_l to the next one B_(l+1).\nThere is no restriction for the value of strideB. Normal use is strideB >= ldb*n.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\nOn exit, the eigenvalues in increasing order.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use is strideD >= n.\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\nThis array is used to work internally with the tridiagonal matrix T_l associated with\nthe l-th reduced eigenvalue problem.\nOn exit, if 0 < info[l] <= n, it contains the unconverged off-diagonal elements of T_l\n(or properly speaking, a tridiagonal matrix equivalent to T_l). The diagonal elements\nof this matrix are in D_l; those that converged correspond to a subset of the\neigenvalues (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit of batch l.\nIf info[l] = i <= n, i off-diagonal elements of an intermediate\ntridiagonal form did not converge to zero.\nIf info[l] = n + i, the leading minor of order i of B_l is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_chegv_strided_batched(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -9167,7 +9590,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYGVD computes the eigenvalues and (optionally) eigenvectors of\na real generalized symmetric-definite eigenproblem.\n\n\\details\nThe problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = \\lambda B X & \\: \\text{1st form,}\\\\\nA B X = \\lambda X & \\: \\text{2nd form, or}\\\\\nB A X = \\lambda X & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed using a divide-and-conquer algorithm, depending on the\nvalue of evect.\n\nWhen computed, the matrix Z of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ^T B Z=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ^T B^{-1} Z=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblem.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices\nA and B are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A and B are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the symmetric matrix A. On exit, if evect is original,\nthe normalized matrix Z of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrix A (including the diagonal) is destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A.\n@param[out]\nB           pointer to type. Array on the GPU of dimension ldb*n.\\n\nOn entry, the symmetric positive definite matrix B. On exit, the\ntriangular factor of B as returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B.\n@param[out]\nD           pointer to type. Array on the GPU of dimension n.\\n\nOn exit, the eigenvalues in increasing order.\n@param[out]\nE           pointer to type. Array on the GPU of dimension n.\\n\nThis array is used to work internally with the tridiagonal matrix T associated with\nthe reduced eigenvalue problem.\nOn exit, if 0 < info <= n, it contains the unconverged off-diagonal elements of T\n(or properly speaking, a tridiagonal matrix equivalent to T). The diagonal elements\nof this matrix are in D; those that converged correspond to a subset of the\neigenvalues (not necessarily ordered).\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i <= n and evect is rocblas_evect_none, i off-diagonal elements of an\nintermediate tridiagonal form did not converge to zero.\nIf info = i <= n and evect is rocblas_evect_original, the algorithm failed to\ncompute an eigenvalue in the submatrix from [i/(n+1), i/(n+1)] to [i%(n+1), i%(n+1)].\nIf info = n + i, the leading minor of order i of B is not\npositive definite."]
+    #[doc = " @{\n\\brief SYGVD computes the eigenvalues and (optionally) eigenvectors of\na real generalized symmetric-definite eigenproblem.\n\n\\details\nThe problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = \\lambda B X & \\: \\text{1st form,}\\\\\nA B X = \\lambda X & \\: \\text{2nd form, or}\\\\\nB A X = \\lambda X & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed using a divide-and-conquer algorithm, depending on the\nvalue of evect.\n\nWhen computed, the matrix Z of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ^T B Z=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ^T B^{-1} Z=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblem.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower parts of the matrices\nA and B are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A and B are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the symmetric matrix A. On exit, if evect is original,\nthe normalized matrix Z of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrix A (including the diagonal) is destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A.\n@param[out]\nB           pointer to type. Array on the GPU of dimension ldb*n.\nOn entry, the symmetric positive definite matrix B. On exit, the\ntriangular factor of B as returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B.\n@param[out]\nD           pointer to type. Array on the GPU of dimension n.\nOn exit, the eigenvalues in increasing order.\n@param[out]\nE           pointer to type. Array on the GPU of dimension n.\nThis array is used to work internally with the tridiagonal matrix T associated with\nthe reduced eigenvalue problem.\nOn exit, if 0 < info <= n, it contains the unconverged off-diagonal elements of T\n(or properly speaking, a tridiagonal matrix equivalent to T). The diagonal elements\nof this matrix are in D; those that converged correspond to a subset of the\neigenvalues (not necessarily ordered).\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i <= n and evect is rocblas_evect_none, i off-diagonal elements of an\nintermediate tridiagonal form did not converge to zero.\nIf info = i <= n and evect is rocblas_evect_original, the algorithm failed to\ncompute an eigenvalue in the submatrix from [i/(n+1), i/(n+1)] to [i%(n+1), i%(n+1)].\nIf info = n + i, the leading minor of order i of B is not\npositive definite."]
     pub fn rocsolver_ssygvd(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -9202,7 +9625,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEGVD computes the eigenvalues and (optionally) eigenvectors of\na complex generalized hermitian-definite eigenproblem.\n\n\\details\nThe problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = \\lambda B X & \\: \\text{1st form,}\\\\\nA B X = \\lambda X & \\: \\text{2nd form, or}\\\\\nB A X = \\lambda X & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed using a divide-and-conquer algorithm, depending on the\nvalue of evect.\n\nWhen computed, the matrix Z of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ^H B Z=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ^H B^{-1} Z=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblem.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices\nA and B are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A and B are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the hermitian matrix A. On exit, if evect is original,\nthe normalized matrix Z of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrix A (including the diagonal) is destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A.\n@param[out]\nB           pointer to type. Array on the GPU of dimension ldb*n.\\n\nOn entry, the hermitian positive definite matrix B. On exit, the\ntriangular factor of B as returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B.\n@param[out]\nD           pointer to real type. Array on the GPU of dimension n.\\n\nOn exit, the eigenvalues in increasing order.\n@param[out]\nE           pointer to real type. Array on the GPU of dimension n.\\n\nThis array is used to work internally with the tridiagonal matrix T associated with\nthe reduced eigenvalue problem.\nOn exit, if 0 < info <= n, it contains the unconverged off-diagonal elements of T\n(or properly speaking, a tridiagonal matrix equivalent to T). The diagonal elements\nof this matrix are in D; those that converged correspond to a subset of the\neigenvalues (not necessarily ordered).\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i <= n and evect is rocblas_evect_none, i off-diagonal elements of an\nintermediate tridiagonal form did not converge to zero.\nIf info = i <= n and evect is rocblas_evect_original, the algorithm failed to\ncompute an eigenvalue in the submatrix from [i/(n+1), i/(n+1)] to [i%(n+1), i%(n+1)].\nIf info = n + i, the leading minor of order i of B is not\npositive definite."]
+    #[doc = " @{\n\\brief HEGVD computes the eigenvalues and (optionally) eigenvectors of\na complex generalized hermitian-definite eigenproblem.\n\n\\details\nThe problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = \\lambda B X & \\: \\text{1st form,}\\\\\nA B X = \\lambda X & \\: \\text{2nd form, or}\\\\\nB A X = \\lambda X & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed using a divide-and-conquer algorithm, depending on the\nvalue of evect.\n\nWhen computed, the matrix Z of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ^H B Z=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ^H B^{-1} Z=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblem.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower parts of the matrices\nA and B are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A and B are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the hermitian matrix A. On exit, if evect is original,\nthe normalized matrix Z of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrix A (including the diagonal) is destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A.\n@param[out]\nB           pointer to type. Array on the GPU of dimension ldb*n.\nOn entry, the hermitian positive definite matrix B. On exit, the\ntriangular factor of B as returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B.\n@param[out]\nD           pointer to real type. Array on the GPU of dimension n.\nOn exit, the eigenvalues in increasing order.\n@param[out]\nE           pointer to real type. Array on the GPU of dimension n.\nThis array is used to work internally with the tridiagonal matrix T associated with\nthe reduced eigenvalue problem.\nOn exit, if 0 < info <= n, it contains the unconverged off-diagonal elements of T\n(or properly speaking, a tridiagonal matrix equivalent to T). The diagonal elements\nof this matrix are in D; those that converged correspond to a subset of the\neigenvalues (not necessarily ordered).\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i <= n and evect is rocblas_evect_none, i off-diagonal elements of an\nintermediate tridiagonal form did not converge to zero.\nIf info = i <= n and evect is rocblas_evect_original, the algorithm failed to\ncompute an eigenvalue in the submatrix from [i/(n+1), i/(n+1)] to [i%(n+1), i%(n+1)].\nIf info = n + i, the leading minor of order i of B is not\npositive definite."]
     pub fn rocsolver_chegvd(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -9237,7 +9660,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYGVD_BATCHED computes the eigenvalues and (optionally)\neigenvectors of a batch of real generalized symmetric-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = \\lambda B_j X_j & \\: \\text{1st form,}\\\\\nA_j B_j X_j = \\lambda X_j & \\: \\text{2nd form, or}\\\\\nB_j A_j X_j = \\lambda X_j & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed using a divide-and-conquer algorithm, depending on the\nvalue of evect.\n\nWhen computed, the matrix \\f$Z_j\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_j^T B_j Z_j=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_j^T B_j^{-1} Z_j=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices\nA_j and B_j are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_j and B_j are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the symmetric matrices A_j. On exit, if evect is original,\nthe normalized matrix Z_j of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrices A_j (including the diagonal) are destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A_j.\n@param[out]\nB           array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*n.\\n\nOn entry, the symmetric positive definite matrices B_j. On exit, the\ntriangular factor of B_j as returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B_j.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\\n\nOn exit, the eigenvalues in increasing order.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use is strideD >= n.\n@param[out]\nE           pointer to type. Array on the GPU (the size depends on the value of strideE).\\n\nThis array is used to work internally with the tridiagonal matrix T_j associated with\nthe jth reduced eigenvalue problem.\nOn exit, if 0 < info[j] <= n, it contains the unconverged off-diagonal elements of T_j\n(or properly speaking, a tridiagonal matrix equivalent to T_j). The diagonal elements\nof this matrix are in D_j; those that converged correspond to a subset of the\neigenvalues (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit of batch j.\nIf info[j] = i <= n and evect is rocblas_evect_none, i off-diagonal elements of an\nintermediate tridiagonal form did not converge to zero.\nIf info[j] = i <= n and evect is rocblas_evect_original, the algorithm failed to\ncompute an eigenvalue in the submatrix from [i/(n+1), i/(n+1)] to [i%(n+1), i%(n+1)].\nIf info[j] = n + i, the leading minor of order i of B_j is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYGVD_BATCHED computes the eigenvalues and (optionally)\neigenvectors of a batch of real generalized symmetric-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed using a divide-and-conquer algorithm, depending on the\nvalue of evect.\n\nWhen computed, the matrix \\f$Z_l\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_l^T B_l^{} Z_l^{}=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_l^T B_l^{-1} Z_l^{}=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower parts of the matrices\nA_l and B_l are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_l and B_l are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the symmetric matrices A_l. On exit, if evect is original,\nthe normalized matrix Z_l of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrices A_l (including the diagonal) are destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A_l.\n@param[out]\nB           array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*n.\nOn entry, the symmetric positive definite matrices B_l. On exit, the\ntriangular factor of B_l as returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B_l.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\nOn exit, the eigenvalues in increasing order.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use is strideD >= n.\n@param[out]\nE           pointer to type. Array on the GPU (the size depends on the value of strideE).\nThis array is used to work internally with the tridiagonal matrix T_l associated with\nthe l-th reduced eigenvalue problem.\nOn exit, if 0 < info[l] <= n, it contains the unconverged off-diagonal elements of T_l\n(or properly speaking, a tridiagonal matrix equivalent to T_l). The diagonal elements\nof this matrix are in D_l; those that converged correspond to a subset of the\neigenvalues (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit of batch l.\nIf info[l] = i <= n and evect is rocblas_evect_none, i off-diagonal elements of an\nintermediate tridiagonal form did not converge to zero.\nIf info[l] = i <= n and evect is rocblas_evect_original, the algorithm failed to\ncompute an eigenvalue in the submatrix from [i/(n+1), i/(n+1)] to [i%(n+1), i%(n+1)].\nIf info[l] = n + i, the leading minor of order i of B_l is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssygvd_batched(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -9278,7 +9701,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEGVD_BATCHED computes the eigenvalues and (optionally)\neigenvectors of a batch of complex generalized hermitian-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = \\lambda B_j X_j & \\: \\text{1st form,}\\\\\nA_j B_j X_j = \\lambda X_j & \\: \\text{2nd form, or}\\\\\nB_j A_j X_j = \\lambda X_j & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed using a divide-and-conquer algorithm, depending on the\nvalue of evect.\n\nWhen computed, the matrix \\f$Z_j\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_j^H B_j Z_j=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_j^H B_j^{-1} Z_j=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices\nA_j and B_j are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_j and B_j are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the hermitian matrices A_j. On exit, if evect is original,\nthe normalized matrix Z_j of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrices A_j (including the diagonal) are destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A_j.\n@param[out]\nB           array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*n.\\n\nOn entry, the hermitian positive definite matrices B_j. On exit, the\ntriangular factor of B_j as returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B_j.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\\n\nOn exit, the eigenvalues in increasing order.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use is strideD >= n.\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\\n\nThis array is used to work internally with the tridiagonal matrix T_j associated with\nthe jth reduced eigenvalue problem.\nOn exit, if 0 < info[j] <= n, it contains the unconverged off-diagonal elements of T_j\n(or properly speaking, a tridiagonal matrix equivalent to T_j). The diagonal elements\nof this matrix are in D_j; those that converged correspond to a subset of the\neigenvalues (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit of batch j.\nIf info[j] = i <= n and evect is rocblas_evect_none, i off-diagonal elements of an\nintermediate tridiagonal form did not converge to zero.\nIf info[j] = i <= n and evect is rocblas_evect_original, the algorithm failed to\ncompute an eigenvalue in the submatrix from [i/(n+1), i/(n+1)] to [i%(n+1), i%(n+1)].\nIf info[j] = n + i, the leading minor of order i of B_j is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief HEGVD_BATCHED computes the eigenvalues and (optionally)\neigenvectors of a batch of complex generalized hermitian-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed using a divide-and-conquer algorithm, depending on the\nvalue of evect.\n\nWhen computed, the matrix \\f$Z_l\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_l^H B_l^{} Z_l^{}=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_l^H B_l^{-1} Z_l^{}=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower parts of the matrices\nA_l and B_l are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_l and B_l are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the hermitian matrices A_l. On exit, if evect is original,\nthe normalized matrix Z_l of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrices A_l (including the diagonal) are destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A_l.\n@param[out]\nB           array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*n.\nOn entry, the hermitian positive definite matrices B_l. On exit, the\ntriangular factor of B_l as returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B_l.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\nOn exit, the eigenvalues in increasing order.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use is strideD >= n.\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\nThis array is used to work internally with the tridiagonal matrix T_l associated with\nthe l-th reduced eigenvalue problem.\nOn exit, if 0 < info[l] <= n, it contains the unconverged off-diagonal elements of T_l\n(or properly speaking, a tridiagonal matrix equivalent to T_l). The diagonal elements\nof this matrix are in D_l; those that converged correspond to a subset of the\neigenvalues (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit of batch l.\nIf info[l] = i <= n and evect is rocblas_evect_none, i off-diagonal elements of an\nintermediate tridiagonal form did not converge to zero.\nIf info[l] = i <= n and evect is rocblas_evect_original, the algorithm failed to\ncompute an eigenvalue in the submatrix from [i/(n+1), i/(n+1)] to [i%(n+1), i%(n+1)].\nIf info[l] = n + i, the leading minor of order i of B_l is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_chegvd_batched(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -9319,7 +9742,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYGVD_STRIDED_BATCHED computes the eigenvalues and (optionally)\neigenvectors of a batch of real generalized symmetric-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = \\lambda B_j X_j & \\: \\text{1st form,}\\\\\nA_j B_j X_j = \\lambda X_j & \\: \\text{2nd form, or}\\\\\nB_j A_j X_j = \\lambda X_j & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed using a divide-and-conquer algorithm, depending on the\nvalue of evect.\n\nWhen computed, the matrix \\f$Z_j\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_j^T B_j Z_j=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_j^T B_j^{-1} Z_j=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices\nA_j and B_j are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_j and B_j are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the symmetric matrices A_j. On exit, if evect is original,\nthe normalized matrix Z_j of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrices A_j (including the diagonal) are destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use is strideA >= lda*n.\n@param[out]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\\n\nOn entry, the symmetric positive definite matrices B_j. On exit, the\ntriangular factor of B_j as returned by \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B_j.\n@param[in]\nstrideB     rocblas_stride.\\n\nStride from the start of one matrix B_j to the next one B_(j+1).\nThere is no restriction for the value of strideB. Normal use is strideB >= ldb*n.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\\n\nOn exit, the eigenvalues in increasing order.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use is strideD >= n.\n@param[out]\nE           pointer to type. Array on the GPU (the size depends on the value of strideE).\\n\nThis array is used to work internally with the tridiagonal matrix T_j associated with\nthe jth reduced eigenvalue problem.\nOn exit, if 0 < info[j] <= n, it contains the unconverged off-diagonal elements of T_j\n(or properly speaking, a tridiagonal matrix equivalent to T_j). The diagonal elements\nof this matrix are in D_j; those that converged correspond to a subset of the\neigenvalues (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit of batch j.\nIf info[j] = i <= n and evect is rocblas_evect_none, i off-diagonal elements of an\nintermediate tridiagonal form did not converge to zero.\nIf info[j] = i <= n and evect is rocblas_evect_original, the algorithm failed to\ncompute an eigenvalue in the submatrix from [i/(n+1), i/(n+1)] to [i%(n+1), i%(n+1)].\nIf info[j] = n + i, the leading minor of order i of B_j is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYGVD_STRIDED_BATCHED computes the eigenvalues and (optionally)\neigenvectors of a batch of real generalized symmetric-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed using a divide-and-conquer algorithm, depending on the\nvalue of evect.\n\nWhen computed, the matrix \\f$Z_l\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_l^T B_l^{} Z_l^{}=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_l^T B_l^{-1} Z_l^{}=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower parts of the matrices\nA_l and B_l are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_l and B_l are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the symmetric matrices A_l. On exit, if evect is original,\nthe normalized matrix Z_l of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrices A_l (including the diagonal) are destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use is strideA >= lda*n.\n@param[out]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\nOn entry, the symmetric positive definite matrices B_l. On exit, the\ntriangular factor of B_l as returned by \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B_l.\n@param[in]\nstrideB     rocblas_stride.\nStride from the start of one matrix B_l to the next one B_(l+1).\nThere is no restriction for the value of strideB. Normal use is strideB >= ldb*n.\n@param[out]\nD           pointer to type. Array on the GPU (the size depends on the value of strideD).\nOn exit, the eigenvalues in increasing order.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use is strideD >= n.\n@param[out]\nE           pointer to type. Array on the GPU (the size depends on the value of strideE).\nThis array is used to work internally with the tridiagonal matrix T_l associated with\nthe l-th reduced eigenvalue problem.\nOn exit, if 0 < info[l] <= n, it contains the unconverged off-diagonal elements of T_l\n(or properly speaking, a tridiagonal matrix equivalent to T_l). The diagonal elements\nof this matrix are in D_l; those that converged correspond to a subset of the\neigenvalues (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit of batch l.\nIf info[l] = i <= n and evect is rocblas_evect_none, i off-diagonal elements of an\nintermediate tridiagonal form did not converge to zero.\nIf info[l] = i <= n and evect is rocblas_evect_original, the algorithm failed to\ncompute an eigenvalue in the submatrix from [i/(n+1), i/(n+1)] to [i%(n+1), i%(n+1)].\nIf info[l] = n + i, the leading minor of order i of B_l is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssygvd_strided_batched(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -9364,7 +9787,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEGVD_STRIDED_BATCHED computes the eigenvalues and (optionally)\neigenvectors of a batch of complex generalized hermitian-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = \\lambda B_j X_j & \\: \\text{1st form,}\\\\\nA_j B_j X_j = \\lambda X_j & \\: \\text{2nd form, or}\\\\\nB_j A_j X_j = \\lambda X_j & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed using a divide-and-conquer algorithm, depending on the\nvalue of evect.\n\nWhen computed, the matrix \\f$Z_j\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_j^H B_j Z_j=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_j^H B_j^{-1} Z_j=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices\nA_j and B_j are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_j and B_j are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the hermitian matrices A_j. On exit, if evect is original,\nthe normalized matrix Z_j of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrices A_j (including the diagonal) are destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use is strideA >= lda*n.\n@param[out]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\\n\nOn entry, the hermitian positive definite matrices B_j. On exit, the\ntriangular factor of B_j as returned by \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B_j.\n@param[in]\nstrideB     rocblas_stride.\\n\nStride from the start of one matrix B_j to the next one B_(j+1).\nThere is no restriction for the value of strideB. Normal use is strideB >= ldb*n.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\\n\nOn exit, the eigenvalues in increasing order.\n@param[in]\nstrideD     rocblas_stride.\\n\nStride from the start of one vector D_j to the next one D_(j+1).\nThere is no restriction for the value of strideD. Normal use is strideD >= n.\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\\n\nThis array is used to work internally with the tridiagonal matrix T_j associated with\nthe jth reduced eigenvalue problem.\nOn exit, if 0 < info[j] <= n, it contains the unconverged off-diagonal elements of T_j\n(or properly speaking, a tridiagonal matrix equivalent to T_j). The diagonal elements\nof this matrix are in D_j; those that converged correspond to a subset of the\neigenvalues (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\\n\nStride from the start of one vector E_j to the next one E_(j+1).\nThere is no restriction for the value of strideE. Normal use is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit of batch j.\nIf info[j] = i <= n and evect is rocblas_evect_none, i off-diagonal elements of an\nintermediate tridiagonal form did not converge to zero.\nIf info[j] = i <= n and evect is rocblas_evect_original, the algorithm failed to\ncompute an eigenvalue in the submatrix from [i/(n+1), i/(n+1)] to [i%(n+1), i%(n+1)].\nIf info[j] = n + i, the leading minor of order i of B_j is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief HEGVD_STRIDED_BATCHED computes the eigenvalues and (optionally)\neigenvectors of a batch of complex generalized hermitian-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed using a divide-and-conquer algorithm, depending on the\nvalue of evect.\n\nWhen computed, the matrix \\f$Z_l\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_l^H B_l^{} Z_l^{}=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_l^H B_l^{-1} Z_l^{}=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower parts of the matrices\nA_l and B_l are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_l and B_l are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the hermitian matrices A_l. On exit, if evect is original,\nthe normalized matrix Z_l of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrices A_l (including the diagonal) are destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use is strideA >= lda*n.\n@param[out]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\nOn entry, the hermitian positive definite matrices B_l. On exit, the\ntriangular factor of B_l as returned by \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B_l.\n@param[in]\nstrideB     rocblas_stride.\nStride from the start of one matrix B_l to the next one B_(l+1).\nThere is no restriction for the value of strideB. Normal use is strideB >= ldb*n.\n@param[out]\nD           pointer to real type. Array on the GPU (the size depends on the value of strideD).\nOn exit, the eigenvalues in increasing order.\n@param[in]\nstrideD     rocblas_stride.\nStride from the start of one vector D_l to the next one D_(l+1).\nThere is no restriction for the value of strideD. Normal use is strideD >= n.\n@param[out]\nE           pointer to real type. Array on the GPU (the size depends on the value of strideE).\nThis array is used to work internally with the tridiagonal matrix T_l associated with\nthe l-th reduced eigenvalue problem.\nOn exit, if 0 < info[l] <= n, it contains the unconverged off-diagonal elements of T_l\n(or properly speaking, a tridiagonal matrix equivalent to T_l). The diagonal elements\nof this matrix are in D_l; those that converged correspond to a subset of the\neigenvalues (not necessarily ordered).\n@param[in]\nstrideE     rocblas_stride.\nStride from the start of one vector E_l to the next one E_(l+1).\nThere is no restriction for the value of strideE. Normal use is strideE >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit of batch l.\nIf info[l] = i <= n and evect is rocblas_evect_none, i off-diagonal elements of an\nintermediate tridiagonal form did not converge to zero.\nIf info[l] = i <= n and evect is rocblas_evect_original, the algorithm failed to\ncompute an eigenvalue in the submatrix from [i/(n+1), i/(n+1)] to [i%(n+1), i%(n+1)].\nIf info[l] = n + i, the leading minor of order i of B_l is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_chegvd_strided_batched(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -9409,7 +9832,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYGVJ computes the eigenvalues and (optionally) eigenvectors of\na real generalized symmetric-definite eigenproblem.\n\n\\details\nThe problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = \\lambda B X & \\: \\text{1st form,}\\\\\nA B X = \\lambda X & \\: \\text{2nd form, or}\\\\\nB A X = \\lambda X & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvalues are found using the iterative\nJacobi algorithm, and are returned in ascending order. The eigenvectors are computed\ndepending on the value of evect.\n\nWhen computed, the matrix Z of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ^T B Z=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ^T B^{-1} Z=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n\\note\nIn order to carry out calculations, this method may synchronize the stream contained within the\nrocblas_handle.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblem.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices\nA and B are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A and B are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the symmetric matrix A. On exit, if evect is original,\nthe normalized matrix Z of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrix A (including the diagonal) is destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A.\n@param[out]\nB           pointer to type. Array on the GPU of dimension ldb*n.\\n\nOn entry, the symmetric positive definite matrix B. On exit, the\ntriangular factor of B as returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B.\n@param[in]\nabstol      type.\\n\nThe absolute tolerance. The algorithm is considered to have converged once off(T)\nis <= norm(T) * abstol, where T is the matrix obtained by reduction to standard form.\nIf abstol <= 0, then the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to type on the GPU.\\n\nThe Frobenius norm of the off-diagonal elements of T (i.e. off(T)) at the final iteration,\nwhere T is the matrix obtained by reduction to standard form.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\\n\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to a rocblas_int on the GPU.\\n\nThe actual number of sweeps (iterations) used by the algorithm.\n@param[out]\nW           pointer to type. Array on the GPU of dimension n.\\n\nOn exit, the eigenvalues in increasing order.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = 1, the algorithm did not converge.\nIf info = n + i, the leading minor of order i of B is not\npositive definite."]
+    #[doc = " @{\n\\brief SYGVJ computes the eigenvalues and (optionally) eigenvectors of\na real generalized symmetric-definite eigenproblem.\n\n\\details\nThe problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = \\lambda B X & \\: \\text{1st form,}\\\\\nA B X = \\lambda X & \\: \\text{2nd form, or}\\\\\nB A X = \\lambda X & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvalues are found using the iterative\nJacobi algorithm, and are returned in ascending order. The eigenvectors are computed\ndepending on the value of evect.\n\nWhen computed, the matrix Z of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ^T B Z=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ^T B^{-1} Z=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblem.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower parts of the matrices\nA and B are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A and B are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the symmetric matrix A. On exit, if evect is original,\nthe normalized matrix Z of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrix A (including the diagonal) is destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A.\n@param[out]\nB           pointer to type. Array on the GPU of dimension ldb*n.\nOn entry, the symmetric positive definite matrix B. On exit, the\ntriangular factor of B as returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B.\n@param[in]\nabstol      type.\nThe absolute tolerance. The algorithm is considered to have converged once the residual\nis <= abstol. If abstol <= 0, then the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to type on the GPU.\nThe Frobenius norm of the off-diagonal elements at the final iteration.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to a rocblas_int on the GPU.\nThe actual number of sweeps (iterations) used by the algorithm.\n@param[out]\nW           pointer to type. Array on the GPU of dimension n.\nOn exit, the eigenvalues in increasing order.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = 1, the algorithm did not converge.\nIf info = n + i, the leading minor of order i of B is not\npositive definite."]
     pub fn rocsolver_ssygvj(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -9450,7 +9873,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEGVJ computes the eigenvalues and (optionally) eigenvectors of\na complex generalized hermitian-definite eigenproblem.\n\n\\details\nThe problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = \\lambda B X & \\: \\text{1st form,}\\\\\nA B X = \\lambda X & \\: \\text{2nd form, or}\\\\\nB A X = \\lambda X & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvalues are found using the iterative\nJacobi algorithm, and are returned in ascending order. The eigenvectors are computed\ndepending on the value of evect.\n\nWhen computed, the matrix Z of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ^H B Z=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ^H B^{-1} Z=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n\\note\nIn order to carry out calculations, this method may synchronize the stream contained within the\nrocblas_handle.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblem.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices\nA and B are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A and B are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the hermitian matrix A. On exit, if evect is original,\nthe normalized matrix Z of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrix A (including the diagonal) is destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A.\n@param[out]\nB           pointer to type. Array on the GPU of dimension ldb*n.\\n\nOn entry, the hermitian positive definite matrix B. On exit, the\ntriangular factor of B as returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B.\n@param[in]\nabstol      type.\\n\nThe absolute tolerance. The algorithm is considered to have converged once off(T)\nis <= norm(T) * abstol, where T is the matrix obtained by reduction to standard form.\nIf abstol <= 0, then the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to type on the GPU.\\n\nThe Frobenius norm of the off-diagonal elements of T (i.e. off(T)) at the final iteration,\nwhere T is the matrix obtained by reduction to standard form.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\\n\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to a rocblas_int on the GPU.\\n\nThe actual number of sweeps (iterations) used by the algorithm.\n@param[out]\nW           pointer to real type. Array on the GPU of dimension n.\\n\nOn exit, the eigenvalues in increasing order.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = 1, the algorithm did not converge.\nIf info = n + i, the leading minor of order i of B is not\npositive definite."]
+    #[doc = " @{\n\\brief HEGVJ computes the eigenvalues and (optionally) eigenvectors of\na complex generalized hermitian-definite eigenproblem.\n\n\\details\nThe problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = \\lambda B X & \\: \\text{1st form,}\\\\\nA B X = \\lambda X & \\: \\text{2nd form, or}\\\\\nB A X = \\lambda X & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvalues are found using the iterative\nJacobi algorithm, and are returned in ascending order. The eigenvectors are computed\ndepending on the value of evect.\n\nWhen computed, the matrix Z of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ^H B Z=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ^H B^{-1} Z=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblem.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower parts of the matrices\nA and B are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A and B are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the hermitian matrix A. On exit, if evect is original,\nthe normalized matrix Z of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrix A (including the diagonal) is destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A.\n@param[out]\nB           pointer to type. Array on the GPU of dimension ldb*n.\nOn entry, the hermitian positive definite matrix B. On exit, the\ntriangular factor of B as returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B.\n@param[in]\nabstol      real type.\nThe absolute tolerance. The algorithm is considered to have converged once the residual\nis <= abstol. If abstol <= 0, then the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to real type on the GPU.\nThe Frobenius norm of the off-diagonal elements at the final iteration.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to a rocblas_int on the GPU.\nThe actual number of sweeps (iterations) used by the algorithm.\n@param[out]\nW           pointer to real type. Array on the GPU of dimension n.\nOn exit, the eigenvalues in increasing order.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = 1, the algorithm did not converge.\nIf info = n + i, the leading minor of order i of B is not\npositive definite."]
     pub fn rocsolver_chegvj(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -9491,7 +9914,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYGVJ_BATCHED computes the eigenvalues and (optionally)\neigenvectors of a batch of real generalized symmetric-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = \\lambda B_j X_j & \\: \\text{1st form,}\\\\\nA_j B_j X_j = \\lambda X_j & \\: \\text{2nd form, or}\\\\\nB_j A_j X_j = \\lambda X_j & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvalues are found using the iterative\nJacobi algorithm, and are returned in ascending order. The eigenvectors are computed\ndepending on the value of evect.\n\nWhen computed, the matrix \\f$Z_j\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_j^T B_j Z_j=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_j^T B_j^{-1} Z_j=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n\\note\nIn order to carry out calculations, this method may synchronize the stream contained within the\nrocblas_handle.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices\nA_j and B_j are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_j and B_j are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the symmetric matrices A_j. On exit, if evect is original,\nthe normalized matrix Z_j of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrices A_j (including the diagonal) are destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A_j.\n@param[out]\nB           array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*n.\\n\nOn entry, the symmetric positive definite matrices B_j. On exit, the\ntriangular factor of B_j as returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B_j.\n@param[in]\nabstol      type.\\n\nThe absolute tolerance. The algorithm is considered to have converged once off(T_j)\nis <= norm(T_j) * abstol, where T_j is the matrix obtained by reduction to standard form.\nIf abstol <= 0, then the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to type on the GPU.\\n\nThe Frobenius norm of the off-diagonal elements of T_j (i.e. off(T_j)) at the final iteration,\nwhere T is the matrix obtained by reduction to standard form.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\\n\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nThe actual number of sweeps (iterations) used by the algorithm for each batch instance.\n@param[out]\nW           pointer to type. Array on the GPU (the size depends on the value of strideW).\\n\nOn exit, the eigenvalues in increasing order.\n@param[in]\nstrideW     rocblas_stride.\\n\nStride from the start of one vector W_j to the next one W_(j+1).\nThere is no restriction for the value of strideW. Normal use is strideW >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit of batch instance j.\nIf info[j] = 1, the algorithm did not converge.\nIf info[j] = n + i, the leading minor of order i of B_j is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYGVJ_BATCHED computes the eigenvalues and (optionally)\neigenvectors of a batch of real generalized symmetric-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvalues are found using the iterative\nJacobi algorithm, and are returned in ascending order. The eigenvectors are computed\ndepending on the value of evect.\n\nWhen computed, the matrix \\f$Z_l\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_l^T B_l^{} Z_l^{}=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_l^T B_l^{-1} Z_l^{}=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower parts of the matrices\nA_l and B_l are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_l and B_l are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the symmetric matrices A_l. On exit, if evect is original,\nthe normalized matrix Z_l of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrices A_l (including the diagonal) are destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A_l.\n@param[out]\nB           array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*n.\nOn entry, the symmetric positive definite matrices B_l. On exit, the\ntriangular factor of B_l as returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B_l.\n@param[in]\nabstol      type.\nThe absolute tolerance. The algorithm is considered to have converged once the residual\nis <= abstol. If abstol <= 0, then the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to type. Array of batch_count scalars on the GPU.\nThe Frobenius norm of the off-diagonal elements at the final iteration for each batch instance.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to rocblas_int. Array of batch_count integers on the GPU.\nThe actual number of sweeps (iterations) used by the algorithm for each batch instance.\n@param[out]\nW           pointer to type. Array on the GPU (the size depends on the value of strideW).\nOn exit, the eigenvalues in increasing order.\n@param[in]\nstrideW     rocblas_stride.\nStride from the start of one vector W_l to the next one W_(l+1).\nThere is no restriction for the value of strideW. Normal use is strideW >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit of batch instance l.\nIf info[l] = 1, the algorithm did not converge.\nIf info[l] = n + i, the leading minor of order i of B_l is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssygvj_batched(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -9536,7 +9959,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEGVJ_BATCHED computes the eigenvalues and (optionally)\neigenvectors of a batch of complex generalized hermitian-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = \\lambda B_j X_j & \\: \\text{1st form,}\\\\\nA_j B_j X_j = \\lambda X_j & \\: \\text{2nd form, or}\\\\\nB_j A_j X_j = \\lambda X_j & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvalues are found using the iterative\nJacobi algorithm, and are returned in ascending order. The eigenvectors are computed\ndepending on the value of evect.\n\nWhen computed, the matrix \\f$Z_j\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_j^H B_j Z_j=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_j^H B_j^{-1} Z_j=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n\\note\nIn order to carry out calculations, this method may synchronize the stream contained within the\nrocblas_handle.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices\nA_j and B_j are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_j and B_j are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the hermitian matrices A_j. On exit, if evect is original,\nthe normalized matrix Z_j of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrices A_j (including the diagonal) are destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A_j.\n@param[out]\nB           array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*n.\\n\nOn entry, the hermitian positive definite matrices B_j. On exit, the\ntriangular factor of B_j as returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B_j.\n@param[in]\nabstol      type.\\n\nThe absolute tolerance. The algorithm is considered to have converged once off(T_j)\nis <= norm(T_j) * abstol, where T_j is the matrix obtained by reduction to standard form.\nIf abstol <= 0, then the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to type on the GPU.\\n\nThe Frobenius norm of the off-diagonal elements of T_j (i.e. off(T_j)) at the final iteration,\nwhere T is the matrix obtained by reduction to standard form.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\\n\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nThe actual number of sweeps (iterations) used by the algorithm for each batch instance.\n@param[out]\nW           pointer to real type. Array on the GPU (the size depends on the value of strideW).\\n\nOn exit, the eigenvalues in increasing order.\n@param[in]\nstrideW     rocblas_stride.\\n\nStride from the start of one vector W_j to the next one W_(j+1).\nThere is no restriction for the value of strideW. Normal use is strideW >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit of batch j.\nIf info[j] = 1, the algorithm did not converge.\nIf info[j] = n + i, the leading minor of order i of B_j is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief HEGVJ_BATCHED computes the eigenvalues and (optionally)\neigenvectors of a batch of complex generalized hermitian-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvalues are found using the iterative\nJacobi algorithm, and are returned in ascending order. The eigenvectors are computed\ndepending on the value of evect.\n\nWhen computed, the matrix \\f$Z_l\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_l^H B_l^{} Z_l^{}=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_l^H B_l^{-1} Z_l^{}=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower parts of the matrices\nA_l and B_l are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_l and B_l are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the hermitian matrices A_l. On exit, if evect is original,\nthe normalized matrix Z_l of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrices A_l (including the diagonal) are destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A_l.\n@param[out]\nB           array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*n.\nOn entry, the hermitian positive definite matrices B_l. On exit, the\ntriangular factor of B_l as returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B_l.\n@param[in]\nabstol      real type.\nThe absolute tolerance. The algorithm is considered to have converged once the residual\nis <= abstol. If abstol <= 0, then the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to real type. Array of batch_count scalars on the GPU.\nThe Frobenius norm of the off-diagonal elements at the final iteration for each batch instance.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to rocblas_int. Array of batch_count integers on the GPU.\nThe actual number of sweeps (iterations) used by the algorithm for each batch instance.\n@param[out]\nW           pointer to real type. Array on the GPU (the size depends on the value of strideW).\nOn exit, the eigenvalues in increasing order.\n@param[in]\nstrideW     rocblas_stride.\nStride from the start of one vector W_l to the next one W_(l+1).\nThere is no restriction for the value of strideW. Normal use is strideW >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit of batch l.\nIf info[l] = 1, the algorithm did not converge.\nIf info[l] = n + i, the leading minor of order i of B_l is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_chegvj_batched(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -9581,7 +10004,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYGVJ_STRIDED_BATCHED computes the eigenvalues and (optionally)\neigenvectors of a batch of real generalized symmetric-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = \\lambda B_j X_j & \\: \\text{1st form,}\\\\\nA_j B_j X_j = \\lambda X_j & \\: \\text{2nd form, or}\\\\\nB_j A_j X_j = \\lambda X_j & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvalues are found using the iterative\nJacobi algorithm, and are returned in ascending order. The eigenvectors are computed\ndepending on the value of evect.\n\nWhen computed, the matrix \\f$Z_j\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_j^T B_j Z_j=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_j^T B_j^{-1} Z_j=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n\\note\nIn order to carry out calculations, this method may synchronize the stream contained within the\nrocblas_handle.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices\nA_j and B_j are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_j and B_j are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the symmetric matrices A_j. On exit, if evect is original,\nthe normalized matrix Z_j of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrices A_j (including the diagonal) are destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use is strideA >= lda*n.\n@param[out]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\\n\nOn entry, the symmetric positive definite matrices B_j. On exit, the\ntriangular factor of B_j as returned by \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B_j.\n@param[in]\nstrideB     rocblas_stride.\\n\nStride from the start of one matrix B_j to the next one B_(j+1).\nThere is no restriction for the value of strideB. Normal use is strideB >= ldb*n.\n@param[in]\nabstol      type.\\n\nThe absolute tolerance. The algorithm is considered to have converged once off(T_j)\nis <= norm(T_j) * abstol, where T_j is the matrix obtained by reduction to standard form.\nIf abstol <= 0, then the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to type on the GPU.\\n\nThe Frobenius norm of the off-diagonal elements of T_j (i.e. off(T_j)) at the final iteration,\nwhere T is the matrix obtained by reduction to standard form.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\\n\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nThe actual number of sweeps (iterations) used by the algorithm for each batch instance.\n@param[out]\nW           pointer to type. Array on the GPU (the size depends on the value of strideW).\\n\nOn exit, the eigenvalues in increasing order.\n@param[in]\nstrideW     rocblas_stride.\\n\nStride from the start of one vector W_j to the next one W_(j+1).\nThere is no restriction for the value of strideW. Normal use is strideW >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit of batch j.\nIf info[j] = 1, the algorithm did not converge.\nIf info[j] = n + i, the leading minor of order i of B_j is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYGVJ_STRIDED_BATCHED computes the eigenvalues and (optionally)\neigenvectors of a batch of real generalized symmetric-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvalues are found using the iterative\nJacobi algorithm, and are returned in ascending order. The eigenvectors are computed\ndepending on the value of evect.\n\nWhen computed, the matrix \\f$Z_l\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_l^T B_l^{} Z_l^{}=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_l^T B_l^{-1} Z_l^{}=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower parts of the matrices\nA_l and B_l are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_l and B_l are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the symmetric matrices A_l. On exit, if evect is original,\nthe normalized matrix Z_l of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrices A_l (including the diagonal) are destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use is strideA >= lda*n.\n@param[out]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\nOn entry, the symmetric positive definite matrices B_l. On exit, the\ntriangular factor of B_l as returned by \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B_l.\n@param[in]\nstrideB     rocblas_stride.\nStride from the start of one matrix B_l to the next one B_(l+1).\nThere is no restriction for the value of strideB. Normal use is strideB >= ldb*n.\n@param[in]\nabstol      type.\nThe absolute tolerance. The algorithm is considered to have converged once the residual\nis <= abstol. If abstol <= 0, then the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to type. Array of batch_count scalars on the GPU.\nThe Frobenius norm of the off-diagonal elements at the final iteration for each batch instance.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to rocblas_int. Array of batch_count integers on the GPU.\nThe actual number of sweeps (iterations) used by the algorithm for each batch instance.\n@param[out]\nW           pointer to type. Array on the GPU (the size depends on the value of strideW).\nOn exit, the eigenvalues in increasing order.\n@param[in]\nstrideW     rocblas_stride.\nStride from the start of one vector W_l to the next one W_(l+1).\nThere is no restriction for the value of strideW. Normal use is strideW >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit of batch l.\nIf info[l] = 1, the algorithm did not converge.\nIf info[l] = n + i, the leading minor of order i of B_l is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssygvj_strided_batched(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -9630,7 +10053,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEGVJ_STRIDED_BATCHED computes the eigenvalues and (optionally)\neigenvectors of a batch of complex generalized hermitian-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = \\lambda B_j X_j & \\: \\text{1st form,}\\\\\nA_j B_j X_j = \\lambda X_j & \\: \\text{2nd form, or}\\\\\nB_j A_j X_j = \\lambda X_j & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvalues are found using the iterative\nJacobi algorithm, and are returned in ascending order. The eigenvectors are computed\ndepending on the value of evect.\n\nWhen computed, the matrix \\f$Z_j\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_j^H B_j Z_j=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_j^H B_j^{-1} Z_j=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n\\note\nIn order to carry out calculations, this method may synchronize the stream contained within the\nrocblas_handle.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices\nA_j and B_j are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_j and B_j are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the hermitian matrices A_j. On exit, if evect is original,\nthe normalized matrix Z_j of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrices A_j (including the diagonal) are destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use is strideA >= lda*n.\n@param[out]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\\n\nOn entry, the hermitian positive definite matrices B_j. On exit, the\ntriangular factor of B_j as returned by \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B_j.\n@param[in]\nstrideB     rocblas_stride.\\n\nStride from the start of one matrix B_j to the next one B_(j+1).\nThere is no restriction for the value of strideB. Normal use is strideB >= ldb*n.\n@param[in]\nabstol      type.\\n\nThe absolute tolerance. The algorithm is considered to have converged once off(T_j)\nis <= norm(T_j) * abstol, where T_j is the matrix obtained by reduction to standard form.\nIf abstol <= 0, then the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to type on the GPU.\\n\nThe Frobenius norm of the off-diagonal elements of T_j (i.e. off(T_j)) at the final iteration,\nwhere T is the matrix obtained by reduction to standard form.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\\n\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nThe actual number of sweeps (iterations) used by the algorithm for each batch instance.\n@param[out]\nW           pointer to real type. Array on the GPU (the size depends on the value of strideW).\\n\nOn exit, the eigenvalues in increasing order.\n@param[in]\nstrideW     rocblas_stride.\\n\nStride from the start of one vector W_j to the next one W_(j+1).\nThere is no restriction for the value of strideW. Normal use is strideW >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit of batch j.\nIf info[j] = 1, the algorithm did not converge.\nIf info[j] = n + i, the leading minor of order i of B_j is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief HEGVJ_STRIDED_BATCHED computes the eigenvalues and (optionally)\neigenvectors of a batch of complex generalized hermitian-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvalues are found using the iterative\nJacobi algorithm, and are returned in ascending order. The eigenvectors are computed\ndepending on the value of evect.\n\nWhen computed, the matrix \\f$Z_l\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_l^H B_l^{} Z_l^{}=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_l^H B_l^{-1} Z_l^{}=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower parts of the matrices\nA_l and B_l are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_l and B_l are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the hermitian matrices A_l. On exit, if evect is original,\nthe normalized matrix Z_l of eigenvectors. If evect is none, then the upper or lower triangular\npart of the matrices A_l (including the diagonal) are destroyed,\ndepending on the value of uplo.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use is strideA >= lda*n.\n@param[out]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\nOn entry, the hermitian positive definite matrices B_l. On exit, the\ntriangular factor of B_l as returned by \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B_l.\n@param[in]\nstrideB     rocblas_stride.\nStride from the start of one matrix B_l to the next one B_(l+1).\nThere is no restriction for the value of strideB. Normal use is strideB >= ldb*n.\n@param[in]\nabstol      real type.\nThe absolute tolerance. The algorithm is considered to have converged once the residual\nis <= abstol. If abstol <= 0, then the tolerance will be set to machine precision.\n@param[out]\nresidual    pointer to real type. Array of batch_count scalars on the GPU.\nThe Frobenius norm of the off-diagonal elements at the final iteration for each batch instance.\n@param[in]\nmax_sweeps  rocblas_int. max_sweeps > 0.\nMaximum number of sweeps (iterations) to be used by the algorithm.\n@param[out]\nn_sweeps    pointer to rocblas_int. Array of batch_count integers on the GPU.\nThe actual number of sweeps (iterations) used by the algorithm for each batch instance.\n@param[out]\nW           pointer to real type. Array on the GPU (the size depends on the value of strideW).\nOn exit, the eigenvalues in increasing order.\n@param[in]\nstrideW     rocblas_stride.\nStride from the start of one vector W_l to the next one W_(l+1).\nThere is no restriction for the value of strideW. Normal use is strideW >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit of batch l.\nIf info[l] = 1, the algorithm did not converge.\nIf info[l] = n + i, the leading minor of order i of B_l is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_chegvj_strided_batched(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -9679,7 +10102,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYGVX computes a set of the eigenvalues and optionally the corresponding eigenvectors of\na real generalized symmetric-definite eigenproblem.\n\n\\details\nThe problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = \\lambda B X & \\: \\text{1st form,}\\\\\nA B X = \\lambda X & \\: \\text{2nd form, or}\\\\\nB A X = \\lambda X & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed depending on the\nvalue of evect.\n\nWhen computed, the matrix Z of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ^T B Z=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ^T B^{-1} Z=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\nThis function computes all the eigenvalues of A, all the eigenvalues in the half-open interval \\f$(vl, vu]\\f$,\nor the il-th through iu-th eigenvalues, depending on the value of erange. If evect is rocblas_evect_original,\nthe eigenvectors for these eigenvalues will be computed as well.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblem.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nerange      #rocblas_erange.\\n\nSpecifies the type of range or interval of the eigenvalues to be computed.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices\nA and B are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A and B are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A. On exit, the contents of A are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrix A.\n@param[out]\nB           pointer to type. Array on the GPU of dimension ldb*n.\\n\nOn entry, the symmetric positive definite matrix B. On exit, the\ntriangular factor of B as returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B.\n@param[in]\nvl          type. vl < vu.\\n\nThe lower bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nvu          type. vl < vu.\\n\nThe upper bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\\n\nThe index of the smallest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise..\\n\nThe index of the largest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of T or the eigenvalues in a half-open interval.\n@param[in]\nabstol      type.\\n\nThe absolute tolerance. An eigenvalue is considered to be located if it lies\nin an interval whose width is <= abstol. If abstol is negative, then machine-epsilon times\nthe 1-norm of T will be used as tolerance. If abstol=0, then the tolerance will be set\nto twice the underflow threshold; this is the tolerance that could get the most accurate results.\n@param[out]\nnev         pointer to a rocblas_int on the GPU.\\n\nThe total number of eigenvalues found. If erange is rocblas_erange_all, nev = n.\nIf erange is rocblas_erange_index, nev = iu - il + 1. Otherwise, 0 <= nev <= n.\n@param[out]\nW           pointer to type. Array on the GPU of dimension n.\\n\nThe first nev elements contain the computed eigenvalues. (The remaining elements\ncan be used as workspace for internal computations).\n@param[out]\nZ           pointer to type. Array on the GPU of dimension ldz*nev.\\n\nOn exit, if evect is not rocblas_evect_none and info = 0, the first nev columns contain\nthe eigenvectors of A corresponding to the output eigenvalues. Not referenced if\nevect is rocblas_evect_none.\nNote: If erange is rocblas_range_value, then the values of nev are not known in advance.\nThe user should ensure that Z is large enough to hold n columns, as all n columns\ncan be used as workspace for internal computations.\n@param[in]\nldz         rocblas_int. ldz >= n.\\n\nSpecifies the leading dimension of matrix Z.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU of dimension n.\\n\nIf info = 0, the first nev elements of ifail are zero.\nIf info = i <= n, ifail contains the indices of the i eigenvectors that failed\nto converge.\nNot referenced if evect is rocblas_evect_none.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i <= n, i columns of Z did not converge.\nIf info = n + i, the leading minor of order i of B is not\npositive definite."]
+    #[doc = " @{\n\\brief SYGVX computes a set of the eigenvalues and optionally the corresponding eigenvectors of\na real generalized symmetric-definite eigenproblem.\n\n\\details\nThe problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = \\lambda B X & \\: \\text{1st form,}\\\\\nA B X = \\lambda X & \\: \\text{2nd form, or}\\\\\nB A X = \\lambda X & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed depending on the\nvalue of evect.\n\nWhen computed, the matrix Z of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ^T B Z=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ^T B^{-1} Z=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\nThis function computes all the eigenvalues, all the eigenvalues in the half-open interval \\f$(vl, vu]\\f$,\nor the il-th through iu-th eigenvalues, depending on the value of erange. If evect is rocblas_evect_original,\nthe eigenvectors for these eigenvalues will be computed as well.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblem.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nerange      #rocblas_erange.\nSpecifies the type of range or interval of the eigenvalues to be computed.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower parts of the matrices\nA and B are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A and B are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A. On exit, the contents of A are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrix A.\n@param[out]\nB           pointer to type. Array on the GPU of dimension ldb*n.\nOn entry, the symmetric positive definite matrix B. On exit, the\ntriangular factor of B as returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B.\n@param[in]\nvl          type. vl < vu.\nThe lower bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nvu          type. vl < vu.\nThe upper bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\nThe index of the smallest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise..\nThe index of the largest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues in a half-open interval.\n@param[in]\nabstol      type.\nThe absolute tolerance. An eigenvalue is considered to be located if it lies\nin an interval whose width is <= abstol. If abstol is negative, then machine-epsilon times\nthe 1-norm of the tridiagonal form of A will be used as tolerance. If abstol=0, then the tolerance will be set\nto twice the underflow threshold; this is the tolerance that could get the most accurate results.\n@param[out]\nnev         pointer to a rocblas_int on the GPU.\nThe total number of eigenvalues found. If erange is rocblas_erange_all, nev = n.\nIf erange is rocblas_erange_index, nev = iu - il + 1. Otherwise, 0 <= nev <= n.\n@param[out]\nW           pointer to type. Array on the GPU of dimension n.\nThe first nev elements contain the computed eigenvalues. (The remaining elements\ncan be used as workspace for internal computations).\n@param[out]\nZ           pointer to type. Array on the GPU of dimension ldz*nev.\nOn exit, if evect is not rocblas_evect_none and info = 0, the first nev columns contain\nthe eigenvectors of A corresponding to the output eigenvalues. Not referenced if\nevect is rocblas_evect_none.\nNote: If erange is rocblas_range_value, then the values of nev are not known in advance.\nThe user should ensure that Z is large enough to hold n columns, as all n columns\ncan be used as workspace for internal computations.\n@param[in]\nldz         rocblas_int. ldz >= n.\nSpecifies the leading dimension of matrix Z.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU of dimension n.\nIf info = 0, the first nev elements of ifail are zero.\nIf info = i <= n, ifail contains the indices of the i eigenvectors that failed\nto converge.\nNot referenced if evect is rocblas_evect_none.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i <= n, i columns of Z did not converge.\nIf info = n + i, the leading minor of order i of B is not\npositive definite."]
     pub fn rocsolver_ssygvx(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -9732,7 +10155,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEGVX computes a set of the eigenvalues and optionally the corresponding eigenvectors of\na complex generalized hermitian-definite eigenproblem.\n\n\\details\nThe problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = \\lambda B X & \\: \\text{1st form,}\\\\\nA B X = \\lambda X & \\: \\text{2nd form, or}\\\\\nB A X = \\lambda X & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed depending on the\nvalue of evect.\n\nWhen computed, the matrix Z of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ^H B Z=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ^H B^{-1} Z=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\nThis function computes all the eigenvalues of A, all the eigenvalues in the half-open interval \\f$(vl, vu]\\f$,\nor the il-th through iu-th eigenvalues, depending on the value of erange. If evect is rocblas_evect_original,\nthe eigenvectors for these eigenvalues will be computed as well.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblem.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nerange      #rocblas_erange.\\n\nSpecifies the type of range or interval of the eigenvalues to be computed.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices\nA and B are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A and B are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the matrix A. On exit, the contents of A are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrix A.\n@param[out]\nB           pointer to type. Array on the GPU of dimension ldb*n.\\n\nOn entry, the hermitian positive definite matrix B. On exit, the\ntriangular factor of B as returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B.\n@param[in]\nvl          real type. vl < vu.\\n\nThe lower bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nvu          real type. vl < vu.\\n\nThe upper bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\\n\nThe index of the smallest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise..\\n\nThe index of the largest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of T or the eigenvalues in a half-open interval.\n@param[in]\nabstol      real type.\\n\nThe absolute tolerance. An eigenvalue is considered to be located if it lies\nin an interval whose width is <= abstol. If abstol is negative, then machine-epsilon times\nthe 1-norm of T will be used as tolerance. If abstol=0, then the tolerance will be set\nto twice the underflow threshold; this is the tolerance that could get the most accurate results.\n@param[out]\nnev         pointer to a rocblas_int on the GPU.\\n\nThe total number of eigenvalues found. If erange is rocblas_erange_all, nev = n.\nIf erange is rocblas_erange_index, nev = iu - il + 1. Otherwise, 0 <= nev <= n.\n@param[out]\nW           pointer to real type. Array on the GPU of dimension n.\\n\nThe first nev elements contain the computed eigenvalues. (The remaining elements\ncan be used as workspace for internal computations).\n@param[out]\nZ           pointer to type. Array on the GPU of dimension ldz*nev.\\n\nOn exit, if evect is not rocblas_evect_none and info = 0, the first nev columns contain\nthe eigenvectors of A corresponding to the output eigenvalues. Not referenced if\nevect is rocblas_evect_none.\nNote: If erange is rocblas_range_value, then the values of nev are not known in advance.\nThe user should ensure that Z is large enough to hold n columns, as all n columns\ncan be used as workspace for internal computations.\n@param[in]\nldz         rocblas_int. ldz >= n.\\n\nSpecifies the leading dimension of matrix Z.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU of dimension n.\\n\nIf info = 0, the first nev elements of ifail are zero.\nIf info = i <= n, ifail contains the indices of the i eigenvectors that failed\nto converge.\nNot referenced if evect is rocblas_evect_none.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i <= n, i columns of Z did not converge.\nIf info = n + i, the leading minor of order i of B is not\npositive definite."]
+    #[doc = " @{\n\\brief HEGVX computes a set of the eigenvalues and optionally the corresponding eigenvectors of\na complex generalized hermitian-definite eigenproblem.\n\n\\details\nThe problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA X = \\lambda B X & \\: \\text{1st form,}\\\\\nA B X = \\lambda X & \\: \\text{2nd form, or}\\\\\nB A X = \\lambda X & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed depending on the\nvalue of evect.\n\nWhen computed, the matrix Z of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ^H B Z=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ^H B^{-1} Z=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\nThis function computes all the eigenvalues, all the eigenvalues in the half-open interval \\f$(vl, vu]\\f$,\nor the il-th through iu-th eigenvalues, depending on the value of erange. If evect is rocblas_evect_original,\nthe eigenvectors for these eigenvalues will be computed as well.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblem.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nerange      #rocblas_erange.\nSpecifies the type of range or interval of the eigenvalues to be computed.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower parts of the matrices\nA and B are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A and B are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the matrix A. On exit, the contents of A are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrix A.\n@param[out]\nB           pointer to type. Array on the GPU of dimension ldb*n.\nOn entry, the hermitian positive definite matrix B. On exit, the\ntriangular factor of B as returned by \\ref rocsolver_spotrf \"POTRF\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B.\n@param[in]\nvl          real type. vl < vu.\nThe lower bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nvu          real type. vl < vu.\nThe upper bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\nThe index of the smallest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise..\nThe index of the largest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues in a half-open interval.\n@param[in]\nabstol      real type.\nThe absolute tolerance. An eigenvalue is considered to be located if it lies\nin an interval whose width is <= abstol. If abstol is negative, then machine-epsilon times\nthe 1-norm of the tridiagonal form of A will be used as tolerance. If abstol=0, then the tolerance will be set\nto twice the underflow threshold; this is the tolerance that could get the most accurate results.\n@param[out]\nnev         pointer to a rocblas_int on the GPU.\nThe total number of eigenvalues found. If erange is rocblas_erange_all, nev = n.\nIf erange is rocblas_erange_index, nev = iu - il + 1. Otherwise, 0 <= nev <= n.\n@param[out]\nW           pointer to real type. Array on the GPU of dimension n.\nThe first nev elements contain the computed eigenvalues. (The remaining elements\ncan be used as workspace for internal computations).\n@param[out]\nZ           pointer to type. Array on the GPU of dimension ldz*nev.\nOn exit, if evect is not rocblas_evect_none and info = 0, the first nev columns contain\nthe eigenvectors of A corresponding to the output eigenvalues. Not referenced if\nevect is rocblas_evect_none.\nNote: If erange is rocblas_range_value, then the values of nev are not known in advance.\nThe user should ensure that Z is large enough to hold n columns, as all n columns\ncan be used as workspace for internal computations.\n@param[in]\nldz         rocblas_int. ldz >= n.\nSpecifies the leading dimension of matrix Z.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU of dimension n.\nIf info = 0, the first nev elements of ifail are zero.\nIf info = i <= n, ifail contains the indices of the i eigenvectors that failed\nto converge.\nNot referenced if evect is rocblas_evect_none.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i <= n, i columns of Z did not converge.\nIf info = n + i, the leading minor of order i of B is not\npositive definite."]
     pub fn rocsolver_chegvx(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -9785,7 +10208,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYGVX_BATCHED computes a set of the eigenvalues and optionally\nthe corresponding eigenvectors of a batch of real generalized symmetric-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = \\lambda B_j X_j & \\: \\text{1st form,}\\\\\nA_j B_j X_j = \\lambda X_j & \\: \\text{2nd form, or}\\\\\nB_j A_j X_j = \\lambda X_j & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed depending on the\nvalue of evect.\n\nWhen computed, the matrix \\f$Z_j\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_j^T B_j Z_j=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_j^T B_j^{-1} Z_j=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\nThis function computes all the eigenvalues of A, all the eigenvalues in the half-open interval \\f$(vl, vu]\\f$,\nor the il-th through iu-th eigenvalues, depending on the value of erange. If evect is rocblas_evect_original,\nthe eigenvectors for these eigenvalues will be computed as well.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nerange      #rocblas_erange.\\n\nSpecifies the type of range or interval of the eigenvalues to be computed.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices\nA_j and B_j are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_j and B_j are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the matrices A_j. On exit, the contents of A_j are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[out]\nB           Array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*n.\\n\nOn entry, the symmetric positive definite matrices B_j. On exit, the\ntriangular factor of B_j as returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B_j.\n@param[in]\nvl          type. vl < vu.\\n\nThe lower bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nvu          type. vl < vu.\\n\nThe upper bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\\n\nThe index of the smallest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise..\\n\nThe index of the largest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of T or the eigenvalues in a half-open interval.\n@param[in]\nabstol      type.\\n\nThe absolute tolerance. An eigenvalue is considered to be located if it lies\nin an interval whose width is <= abstol. If abstol is negative, then machine-epsilon times\nthe 1-norm of T will be used as tolerance. If abstol=0, then the tolerance will be set\nto twice the underflow threshold; this is the tolerance that could get the most accurate results.\n@param[out]\nnev         pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nThe total number of eigenvalues found. If erange is rocblas_erange_all, nev_j = n.\nIf erange is rocblas_erange_index, nev_j = iu - il + 1. Otherwise, 0 <= nev_j <= n.\n@param[out]\nW           pointer to type. Array on the GPU (the size depends on the value of strideW).\\n\nThe first nev_j elements contain the computed eigenvalues. (The remaining elements\ncan be used as workspace for internal computations).\n@param[in]\nstrideW     rocblas_stride.\\n\nStride from the start of one vector W_j to the next one W_(j+1).\nThere is no restriction for the value of strideW. Normal use case is strideW >= n.\n@param[out]\nZ           Array of pointers to type. Each pointer points to an array on the GPU of dimension ldz*nev_j.\\n\nOn exit, if evect is not rocblas_evect_none and info = 0, the first nev_j columns contain\nthe eigenvectors of A_j corresponding to the output eigenvalues. Not referenced if\nevect is rocblas_evect_none.\nNote: If erange is rocblas_range_value, then the values of nev_j are not known in advance.\nThe user should ensure that Z_j is large enough to hold n columns, as all n columns\ncan be used as workspace for internal computations.\n@param[in]\nldz         rocblas_int. ldz >= n.\\n\nSpecifies the leading dimension of matrices Z_j.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU (the size depends on the value of strideF).\\n\nIf info[j] = 0, the first nev_j elements of ifail_j are zero.\nIf info[j] = i <= n, ifail_j contains the indices of the i eigenvectors that failed\nto converge.\nNot referenced if evect is rocblas_evect_none.\n@param[in]\nstrideF     rocblas_stride.\\n\nStride from the start of one vector ifail_j to the next one ifail_(j+1).\nThere is no restriction for the value of strideF. Normal use case is strideF >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit of batch instance j.\nIf info[j] = i <= n, i columns of Z did not converge.\nIf info[j] = n + i, the leading minor of order i of B_j is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYGVX_BATCHED computes a set of the eigenvalues and optionally\nthe corresponding eigenvectors of a batch of real generalized symmetric-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed depending on the\nvalue of evect.\n\nWhen computed, the matrix \\f$Z_l\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_l^T B_l^{} Z_l^{}=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_l^T B_l^{-1} Z_l^{}=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\nThis function computes all the eigenvalues, all the eigenvalues in the half-open interval \\f$(vl, vu]\\f$,\nor the il-th through iu-th eigenvalues, depending on the value of erange. If evect is rocblas_evect_original,\nthe eigenvectors for these eigenvalues will be computed as well.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nerange      #rocblas_erange.\nSpecifies the type of range or interval of the eigenvalues to be computed.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower parts of the matrices\nA_l and B_l are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_l and B_l are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the matrices A_l. On exit, the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[out]\nB           Array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*n.\nOn entry, the symmetric positive definite matrices B_l. On exit, the\ntriangular factor of B_l as returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B_l.\n@param[in]\nvl          type. vl < vu.\nThe lower bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues within a set of indices.\n@param[in]\nvu          type. vl < vu.\nThe upper bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\nThe index of the smallest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise..\nThe index of the largest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues in a half-open interval.\n@param[in]\nabstol      type.\nThe absolute tolerance. An eigenvalue is considered to be located if it lies\nin an interval whose width is <= abstol. If abstol is negative, then machine-epsilon times\nthe 1-norm of the tridiagonal form of A_l will be used as tolerance. If abstol=0, then the tolerance will be set\nto twice the underflow threshold; this is the tolerance that could get the most accurate results.\n@param[out]\nnev         pointer to rocblas_int. Array of batch_count integers on the GPU.\nThe total number of eigenvalues found. If erange is rocblas_erange_all, nev[l] = n.\nIf erange is rocblas_erange_index, nev[l] = iu - il + 1. Otherwise, 0 <= nev[l] <= n.\n@param[out]\nW           pointer to type. Array on the GPU (the size depends on the value of strideW).\nThe first nev[l] elements contain the computed eigenvalues. (The remaining elements\ncan be used as workspace for internal computations).\n@param[in]\nstrideW     rocblas_stride.\nStride from the start of one vector W_l to the next one W_(l+1).\nThere is no restriction for the value of strideW. Normal use case is strideW >= n.\n@param[out]\nZ           Array of pointers to type. Each pointer points to an array on the GPU of dimension ldz*nev[l].\nOn exit, if evect is not rocblas_evect_none and info[l] = 0, the first nev[l] columns contain\nthe eigenvectors of A_l corresponding to the output eigenvalues. Not referenced if\nevect is rocblas_evect_none.\nNote: If erange is rocblas_range_value, then the values of nev[l] are not known in advance.\nThe user should ensure that Z_l is large enough to hold n columns, as all n columns\ncan be used as workspace for internal computations.\n@param[in]\nldz         rocblas_int. ldz >= n.\nSpecifies the leading dimension of matrices Z_l.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU (the size depends on the value of strideF).\nIf info[l] = 0, the first nev[l] elements of ifail_l are zero.\nIf info[l] = i <= n, ifail_l contains the indices of the i eigenvectors that failed\nto converge.\nNot referenced if evect is rocblas_evect_none.\n@param[in]\nstrideF     rocblas_stride.\nStride from the start of one vector ifail_l to the next one ifail_(l+1).\nThere is no restriction for the value of strideF. Normal use case is strideF >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit of batch instance l.\nIf info[l] = i <= n, i columns of Z_l did not converge.\nIf info[l] = n + i, the leading minor of order i of B_l is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssygvx_batched(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -9844,7 +10267,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEGVX_BATCHED computes a set of the eigenvalues and optionally\nthe corresponding eigenvectors of a batch of complex generalized hermitian-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = \\lambda B_j X_j & \\: \\text{1st form,}\\\\\nA_j B_j X_j = \\lambda X_j & \\: \\text{2nd form, or}\\\\\nB_j A_j X_j = \\lambda X_j & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed depending on the\nvalue of evect.\n\nWhen computed, the matrix \\f$Z_j\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_j^H B_j Z_j=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_j^H B_j^{-1} Z_j=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\nThis function computes all the eigenvalues of A, all the eigenvalues in the half-open interval \\f$(vl, vu]\\f$,\nor the il-th through iu-th eigenvalues, depending on the value of erange. If evect is rocblas_evect_original,\nthe eigenvectors for these eigenvalues will be computed as well.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nerange      #rocblas_erange.\\n\nSpecifies the type of range or interval of the eigenvalues to be computed.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices\nA_j and B_j are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_j and B_j are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the matrices A_j. On exit, the contents of A_j are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[out]\nB           Array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*n.\\n\nOn entry, the hermitian positive definite matrices B_j. On exit, the\ntriangular factor of B_j as returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B_j.\n@param[in]\nvl          real type. vl < vu.\\n\nThe lower bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nvu          real type. vl < vu.\\n\nThe upper bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\\n\nThe index of the smallest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise..\\n\nThe index of the largest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of T or the eigenvalues in a half-open interval.\n@param[in]\nabstol      real type.\\n\nThe absolute tolerance. An eigenvalue is considered to be located if it lies\nin an interval whose width is <= abstol. If abstol is negative, then machine-epsilon times\nthe 1-norm of T will be used as tolerance. If abstol=0, then the tolerance will be set\nto twice the underflow threshold; this is the tolerance that could get the most accurate results.\n@param[out]\nnev         pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nThe total number of eigenvalues found. If erange is rocblas_erange_all, nev_j = n.\nIf erange is rocblas_erange_index, nev_j = iu - il + 1. Otherwise, 0 <= nev_j <= n.\n@param[out]\nW           pointer to real type. Array on the GPU (the size depends on the value of strideW).\\n\nThe first nev_j elements contain the computed eigenvalues. (The remaining elements\ncan be used as workspace for internal computations).\n@param[in]\nstrideW     rocblas_stride.\\n\nStride from the start of one vector W_j to the next one W_(j+1).\nThere is no restriction for the value of strideW. Normal use case is strideW >= n.\n@param[out]\nZ           Array of pointers to type. Each pointer points to an array on the GPU of dimension ldz*nev_j.\\n\nOn exit, if evect is not rocblas_evect_none and info = 0, the first nev_j columns contain\nthe eigenvectors of A_j corresponding to the output eigenvalues. Not referenced if\nevect is rocblas_evect_none.\nNote: If erange is rocblas_range_value, then the values of nev_j are not known in advance.\nThe user should ensure that Z_j is large enough to hold n columns, as all n columns\ncan be used as workspace for internal computations.\n@param[in]\nldz         rocblas_int. ldz >= n.\\n\nSpecifies the leading dimension of matrices Z_j.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU (the size depends on the value of strideF).\\n\nIf info[j] = 0, the first nev_j elements of ifail_j are zero.\nIf info[j] = i <= n, ifail_j contains the indices of the i eigenvectors that failed\nto converge.\nNot referenced if evect is rocblas_evect_none.\n@param[in]\nstrideF     rocblas_stride.\\n\nStride from the start of one vector ifail_j to the next one ifail_(j+1).\nThere is no restriction for the value of strideF. Normal use case is strideF >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit of batch instance j.\nIf info[j] = i <= n, i columns of Z did not converge.\nIf info[j] = n + i, the leading minor of order i of B_j is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief HEGVX_BATCHED computes a set of the eigenvalues and optionally\nthe corresponding eigenvectors of a batch of complex generalized hermitian-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed depending on the\nvalue of evect.\n\nWhen computed, the matrix \\f$Z_l\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_l^H B_l^{} Z_l^{}=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_l^H B_l^{-1} Z_l^{}=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\nThis function computes all the eigenvalues, all the eigenvalues in the half-open interval \\f$(vl, vu]\\f$,\nor the il-th through iu-th eigenvalues, depending on the value of erange. If evect is rocblas_evect_original,\nthe eigenvectors for these eigenvalues will be computed as well.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nerange      #rocblas_erange.\nSpecifies the type of range or interval of the eigenvalues to be computed.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower parts of the matrices\nA_l and B_l are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_l and B_l are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the matrices A_l. On exit, the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[out]\nB           Array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*n.\nOn entry, the hermitian positive definite matrices B_l. On exit, the\ntriangular factor of B_l as returned by \\ref rocsolver_spotrf_batched \"POTRF_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B_l.\n@param[in]\nvl          real type. vl < vu.\nThe lower bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues within a set of indices.\n@param[in]\nvu          real type. vl < vu.\nThe upper bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\nThe index of the smallest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise..\nThe index of the largest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues in a half-open interval.\n@param[in]\nabstol      real type.\nThe absolute tolerance. An eigenvalue is considered to be located if it lies\nin an interval whose width is <= abstol. If abstol is negative, then machine-epsilon times\nthe 1-norm of the tridiagonal form of A_l will be used as tolerance. If abstol=0, then the tolerance will be set\nto twice the underflow threshold; this is the tolerance that could get the most accurate results.\n@param[out]\nnev         pointer to rocblas_int. Array of batch_count integers on the GPU.\nThe total number of eigenvalues found. If erange is rocblas_erange_all, nev[l] = n.\nIf erange is rocblas_erange_index, nev[l] = iu - il + 1. Otherwise, 0 <= nev[l] <= n.\n@param[out]\nW           pointer to real type. Array on the GPU (the size depends on the value of strideW).\nThe first nev[l] elements contain the computed eigenvalues. (The remaining elements\ncan be used as workspace for internal computations).\n@param[in]\nstrideW     rocblas_stride.\nStride from the start of one vector W_l to the next one W_(l+1).\nThere is no restriction for the value of strideW. Normal use case is strideW >= n.\n@param[out]\nZ           Array of pointers to type. Each pointer points to an array on the GPU of dimension ldz*nev[l].\nOn exit, if evect is not rocblas_evect_none and info[l] = 0, the first nev[l] columns contain\nthe eigenvectors of A_l corresponding to the output eigenvalues. Not referenced if\nevect is rocblas_evect_none.\nNote: If erange is rocblas_range_value, then the values of nev[l] are not known in advance.\nThe user should ensure that Z_l is large enough to hold n columns, as all n columns\ncan be used as workspace for internal computations.\n@param[in]\nldz         rocblas_int. ldz >= n.\nSpecifies the leading dimension of matrices Z_l.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU (the size depends on the value of strideF).\nIf info[l] = 0, the first nev[l] elements of ifail_l are zero.\nIf info[l] = i <= n, ifail_l contains the indices of the i eigenvectors that failed\nto converge.\nNot referenced if evect is rocblas_evect_none.\n@param[in]\nstrideF     rocblas_stride.\nStride from the start of one vector ifail_l to the next one ifail_(l+1).\nThere is no restriction for the value of strideF. Normal use case is strideF >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit of batch instance l.\nIf info[l] = i <= n, i columns of Z_l did not converge.\nIf info[l] = n + i, the leading minor of order i of B_l is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_chegvx_batched(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -9903,7 +10326,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYGVX_STRIDED_BATCHED computes a set of the eigenvalues and optionally\nthe corresponding eigenvectors of a batch of real generalized symmetric-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = \\lambda B_j X_j & \\: \\text{1st form,}\\\\\nA_j B_j X_j = \\lambda X_j & \\: \\text{2nd form, or}\\\\\nB_j A_j X_j = \\lambda X_j & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed depending on the\nvalue of evect.\n\nWhen computed, the matrix \\f$Z_j\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_j^T B_j Z_j=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_j^T B_j^{-1} Z_j=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\nThis function computes all the eigenvalues of A, all the eigenvalues in the half-open interval \\f$(vl, vu]\\f$,\nor the il-th through iu-th eigenvalues, depending on the value of erange. If evect is rocblas_evect_original,\nthe eigenvectors for these eigenvalues will be computed as well.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nerange      #rocblas_erange.\\n\nSpecifies the type of range or interval of the eigenvalues to be computed.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices\nA_j and B_j are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_j and B_j are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_j. On exit, the contents of A_j are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\\n\nOn entry, the symmetric positive definite matrices B_j. On exit, the\ntriangular factor of B_j as returned by \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B_j.\n@param[in]\nstrideB     rocblas_stride.\\n\nStride from the start of one matrix B_j to the next one B_(j+1).\nThere is no restriction for the value of strideB. Normal use is strideB >= ldb*n.\n@param[in]\nvl          type. vl < vu.\\n\nThe lower bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nvu          type. vl < vu.\\n\nThe upper bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\\n\nThe index of the smallest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise..\\n\nThe index of the largest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of T or the eigenvalues in a half-open interval.\n@param[in]\nabstol      type.\\n\nThe absolute tolerance. An eigenvalue is considered to be located if it lies\nin an interval whose width is <= abstol. If abstol is negative, then machine-epsilon times\nthe 1-norm of T will be used as tolerance. If abstol=0, then the tolerance will be set\nto twice the underflow threshold; this is the tolerance that could get the most accurate results.\n@param[out]\nnev         pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nThe total number of eigenvalues found. If erange is rocblas_erange_all, nev_j = n.\nIf erange is rocblas_erange_index, nev_j = iu - il + 1. Otherwise, 0 <= nev_j <= n.\n@param[out]\nW           pointer to type. Array on the GPU (the size depends on the value of strideW).\\n\nThe first nev_j elements contain the computed eigenvalues. (The remaining elements\ncan be used as workspace for internal computations).\n@param[in]\nstrideW     rocblas_stride.\\n\nStride from the start of one vector W_j to the next one W_(j+1).\nThere is no restriction for the value of strideW. Normal use case is strideW >= n.\n@param[out]\nZ           pointer to type. Array on the GPU (the size depends on the value of strideZ).\\n\nOn exit, if evect is not rocblas_evect_none and info = 0, the first nev_j columns contain\nthe eigenvectors of A_j corresponding to the output eigenvalues. Not referenced if\nevect is rocblas_evect_none.\n@param[in]\nldz         rocblas_int. ldz >= n.\\n\nSpecifies the leading dimension of matrices Z_j.\n@param[in]\nstrideZ     rocblas_stride.\\n\nStride from the start of one matrix Z_j to the next one Z_(j+1).\nThere is no restriction for the value of strideZ. Normal use case is strideZ >= ldz*nev_j.\nNote: If erange is rocblas_range_value, then the values of nev_j are not known in advance.\nThe user should ensure that Z_j is large enough to hold n columns, as all n columns\ncan be used as workspace for internal computations.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU (the size depends on the value of strideF).\\n\nIf info[j] = 0, the first nev_j elements of ifail_j are zero.\nIf info[j] = i <= n, ifail_j contains the indices of the i eigenvectors that failed\nto converge.\nNot referenced if evect is rocblas_evect_none.\n@param[in]\nstrideF     rocblas_stride.\\n\nStride from the start of one vector ifail_j to the next one ifail_(j+1).\nThere is no restriction for the value of strideF. Normal use case is strideF >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit of batch j.\nIf info[j] = i <= n, i columns of Z did not converge.\nIf info[j] = n + i, the leading minor of order i of B_j is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYGVX_STRIDED_BATCHED computes a set of the eigenvalues and optionally\nthe corresponding eigenvectors of a batch of real generalized symmetric-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed depending on the\nvalue of evect.\n\nWhen computed, the matrix \\f$Z_l\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_l^T B_l^{} Z_l^{}=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_l^T B_l^{-1} Z_l^{}=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\nThis function computes all the eigenvalues, all the eigenvalues in the half-open interval \\f$(vl, vu]\\f$,\nor the il-th through iu-th eigenvalues, depending on the value of erange. If evect is rocblas_evect_original,\nthe eigenvectors for these eigenvalues will be computed as well.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nerange      #rocblas_erange.\nSpecifies the type of range or interval of the eigenvalues to be computed.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower parts of the matrices\nA_l and B_l are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_l and B_l are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the matrices A_l. On exit, the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\nOn entry, the symmetric positive definite matrices B_l. On exit, the\ntriangular factor of B_l as returned by \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B_l.\n@param[in]\nstrideB     rocblas_stride.\nStride from the start of one matrix B_l to the next one B_(l+1).\nThere is no restriction for the value of strideB. Normal use is strideB >= ldb*n.\n@param[in]\nvl          type. vl < vu.\nThe lower bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues within a set of indices.\n@param[in]\nvu          type. vl < vu.\nThe upper bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\nThe index of the smallest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise..\nThe index of the largest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues in a half-open interval.\n@param[in]\nabstol      type.\nThe absolute tolerance. An eigenvalue is considered to be located if it lies\nin an interval whose width is <= abstol. If abstol is negative, then machine-epsilon times\nthe 1-norm of the tridiagonal form of A_l will be used as tolerance. If abstol=0, then the tolerance will be set\nto twice the underflow threshold; this is the tolerance that could get the most accurate results.\n@param[out]\nnev         pointer to rocblas_int. Array of batch_count integers on the GPU.\nThe total number of eigenvalues found. If erange is rocblas_erange_all, nev[l] = n.\nIf erange is rocblas_erange_index, nev[l] = iu - il + 1. Otherwise, 0 <= nev[l] <= n.\n@param[out]\nW           pointer to type. Array on the GPU (the size depends on the value of strideW).\nThe first nev[l] elements contain the computed eigenvalues. (The remaining elements\ncan be used as workspace for internal computations).\n@param[in]\nstrideW     rocblas_stride.\nStride from the start of one vector W_l to the next one W_(l+1).\nThere is no restriction for the value of strideW. Normal use case is strideW >= n.\n@param[out]\nZ           pointer to type. Array on the GPU (the size depends on the value of strideZ).\nOn exit, if evect is not rocblas_evect_none and info[l] = 0, the first nev[l] columns contain\nthe eigenvectors of A_l corresponding to the output eigenvalues. Not referenced if\nevect is rocblas_evect_none.\n@param[in]\nldz         rocblas_int. ldz >= n.\nSpecifies the leading dimension of matrices Z_l.\n@param[in]\nstrideZ     rocblas_stride.\nStride from the start of one matrix Z_l to the next one Z_(l+1).\nThere is no restriction for the value of strideZ. Normal use case is strideZ >= ldz*nev[l].\nNote: If erange is rocblas_range_value, then the values of nev[l] are not known in advance.\nThe user should ensure that Z_l is large enough to hold n columns, as all n columns\ncan be used as workspace for internal computations.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU (the size depends on the value of strideF).\nIf info[l] = 0, the first nev[l] elements of ifail_l are zero.\nIf info[l] = i <= n, ifail_l contains the indices of the i eigenvectors that failed\nto converge.\nNot referenced if evect is rocblas_evect_none.\n@param[in]\nstrideF     rocblas_stride.\nStride from the start of one vector ifail_l to the next one ifail_(l+1).\nThere is no restriction for the value of strideF. Normal use case is strideF >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit of batch l.\nIf info[l] = i <= n, i columns of Z_l did not converge.\nIf info[l] = n + i, the leading minor of order i of B_l is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssygvx_strided_batched(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -9968,7 +10391,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief HEGVX_STRIDED_BATCHED computes a set of the eigenvalues and optionally\nthe corresponding eigenvectors of a batch of complex generalized hermitian-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_j X_j = \\lambda B_j X_j & \\: \\text{1st form,}\\\\\nA_j B_j X_j = \\lambda X_j & \\: \\text{2nd form, or}\\\\\nB_j A_j X_j = \\lambda X_j & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed depending on the\nvalue of evect.\n\nWhen computed, the matrix \\f$Z_j\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_j^H B_j Z_j=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_j^H B_j^{-1} Z_j=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\nThis function computes all the eigenvalues of A, all the eigenvalues in the half-open interval \\f$(vl, vu]\\f$,\nor the il-th through iu-th eigenvalues, depending on the value of erange. If evect is rocblas_evect_original,\nthe eigenvectors for these eigenvalues will be computed as well.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\\n\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\\n\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nerange      #rocblas_erange.\\n\nSpecifies the type of range or interval of the eigenvalues to be computed.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower parts of the matrices\nA_j and B_j are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_j and B_j are not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the matrices A_j. On exit, the contents of A_j are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\\n\nOn entry, the hermitian positive definite matrices B_j. On exit, the\ntriangular factor of B_j as returned by \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nSpecifies the leading dimension of B_j.\n@param[in]\nstrideB     rocblas_stride.\\n\nStride from the start of one matrix B_j to the next one B_(j+1).\nThere is no restriction for the value of strideB. Normal use is strideB >= ldb*n.\n@param[in]\nvl          real type. vl < vu.\\n\nThe lower bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nvu          real type. vl < vu.\\n\nThe upper bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\\n\nThe index of the smallest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A or the eigenvalues in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise..\\n\nThe index of the largest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of T or the eigenvalues in a half-open interval.\n@param[in]\nabstol      real type.\\n\nThe absolute tolerance. An eigenvalue is considered to be located if it lies\nin an interval whose width is <= abstol. If abstol is negative, then machine-epsilon times\nthe 1-norm of T will be used as tolerance. If abstol=0, then the tolerance will be set\nto twice the underflow threshold; this is the tolerance that could get the most accurate results.\n@param[out]\nnev         pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nThe total number of eigenvalues found. If erange is rocblas_erange_all, nev_j = n.\nIf erange is rocblas_erange_index, nev_j = iu - il + 1. Otherwise, 0 <= nev_j <= n.\n@param[out]\nW           pointer to real type. Array on the GPU (the size depends on the value of strideW).\\n\nThe first nev_j elements contain the computed eigenvalues. (The remaining elements\ncan be used as workspace for internal computations).\n@param[in]\nstrideW     rocblas_stride.\\n\nStride from the start of one vector W_j to the next one W_(j+1).\nThere is no restriction for the value of strideW. Normal use case is strideW >= n.\n@param[out]\nZ           pointer to type. Array on the GPU (the size depends on the value of strideZ).\\n\nOn exit, if evect is not rocblas_evect_none and info = 0, the first nev_j columns contain\nthe eigenvectors of A_j corresponding to the output eigenvalues. Not referenced if\nevect is rocblas_evect_none.\n@param[in]\nldz         rocblas_int. ldz >= n.\\n\nSpecifies the leading dimension of matrices Z_j.\n@param[in]\nstrideZ     rocblas_stride.\\n\nStride from the start of one matrix Z_j to the next one Z_(j+1).\nThere is no restriction for the value of strideZ. Normal use case is strideZ >= ldz*nev_j.\nNote: If erange is rocblas_range_value, then the values of nev_j are not known in advance.\nThe user should ensure that Z_j is large enough to hold n columns, as all n columns\ncan be used as workspace for internal computations.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU (the size depends on the value of strideF).\\n\nIf info[j] = 0, the first nev_j elements of ifail_j are zero.\nIf info[j] = i <= n, ifail_j contains the indices of the i eigenvectors that failed\nto converge.\nNot referenced if evect is rocblas_evect_none.\n@param[in]\nstrideF     rocblas_stride.\\n\nStride from the start of one vector ifail_j to the next one ifail_(j+1).\nThere is no restriction for the value of strideF. Normal use case is strideF >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit of batch j.\nIf info[j] = i <= n, i columns of Z did not converge.\nIf info[j] = n + i, the leading minor of order i of B_j is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief HEGVX_STRIDED_BATCHED computes a set of the eigenvalues and optionally\nthe corresponding eigenvectors of a batch of complex generalized hermitian-definite eigenproblems.\n\n\\details\nFor each instance in the batch, the problem solved by this function is either of the form\n\n\\f[\n\\begin{array}{cl}\nA_l X_l = \\lambda B_l X_l & \\: \\text{1st form,}\\\\\nA_l B_l X_l = \\lambda X_l & \\: \\text{2nd form, or}\\\\\nB_l A_l X_l = \\lambda X_l & \\: \\text{3rd form,}\n\\end{array}\n\\f]\n\ndepending on the value of itype. The eigenvectors are computed depending on the\nvalue of evect.\n\nWhen computed, the matrix \\f$Z_l\\f$ of eigenvectors is normalized as follows:\n\n\\f[\n\\begin{array}{cl}\nZ_l^H B_l^{} Z_l^{}=I & \\: \\text{if 1st or 2nd form, or}\\\\\nZ_l^H B_l^{-1} Z_l^{}=I & \\: \\text{if 3rd form.}\n\\end{array}\n\\f]\n\nThis function computes all the eigenvalues, all the eigenvalues in the half-open interval \\f$(vl, vu]\\f$,\nor the il-th through iu-th eigenvalues, depending on the value of erange. If evect is rocblas_evect_original,\nthe eigenvectors for these eigenvalues will be computed as well.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nitype       #rocblas_eform.\nSpecifies the form of the generalized eigenproblems.\n@param[in]\nevect       #rocblas_evect.\nSpecifies whether the eigenvectors are to be computed.\nIf evect is rocblas_evect_original, then the eigenvectors are computed.\nrocblas_evect_tridiagonal is not supported.\n@param[in]\nerange      #rocblas_erange.\nSpecifies the type of range or interval of the eigenvalues to be computed.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower parts of the matrices\nA_l and B_l are stored. If uplo indicates lower (or upper),\nthen the upper (or lower) parts of A_l and B_l are not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe matrix dimensions.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the matrices A_l. On exit, the contents of A_l are destroyed.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n.\n@param[out]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\nOn entry, the hermitian positive definite matrices B_l. On exit, the\ntriangular factor of B_l as returned by \\ref rocsolver_spotrf_strided_batched \"POTRF_STRIDED_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= n.\nSpecifies the leading dimension of B_l.\n@param[in]\nstrideB     rocblas_stride.\nStride from the start of one matrix B_l to the next one B_(l+1).\nThere is no restriction for the value of strideB. Normal use is strideB >= ldb*n.\n@param[in]\nvl          real type. vl < vu.\nThe lower bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues within a set of indices.\n@param[in]\nvu          real type. vl < vu.\nThe upper bound of the search interval (vl, vu]. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues within a set of indices.\n@param[in]\nil          rocblas_int. il = 1 if n = 0; 1 <= il <= iu otherwise.\nThe index of the smallest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues in a half-open interval.\n@param[in]\niu          rocblas_int. iu = 0 if n = 0; 1 <= il <= iu otherwise..\nThe index of the largest eigenvalue to be computed. Ignored if range indicates to look\nfor all the eigenvalues of A_l or the eigenvalues in a half-open interval.\n@param[in]\nabstol      real type.\nThe absolute tolerance. An eigenvalue is considered to be located if it lies\nin an interval whose width is <= abstol. If abstol is negative, then machine-epsilon times\nthe 1-norm of the tridiagonal form of A_l will be used as tolerance. If abstol=0, then the tolerance will be set\nto twice the underflow threshold; this is the tolerance that could get the most accurate results.\n@param[out]\nnev         pointer to rocblas_int. Array of batch_count integers on the GPU.\nThe total number of eigenvalues found. If erange is rocblas_erange_all, nev[l] = n.\nIf erange is rocblas_erange_index, nev[l] = iu - il + 1. Otherwise, 0 <= nev[l] <= n.\n@param[out]\nW           pointer to real type. Array on the GPU (the size depends on the value of strideW).\nThe first nev[l] elements contain the computed eigenvalues. (The remaining elements\ncan be used as workspace for internal computations).\n@param[in]\nstrideW     rocblas_stride.\nStride from the start of one vector W_l to the next one W_(l+1).\nThere is no restriction for the value of strideW. Normal use case is strideW >= n.\n@param[out]\nZ           pointer to type. Array on the GPU (the size depends on the value of strideZ).\nOn exit, if evect is not rocblas_evect_none and info[l] = 0, the first nev[l] columns contain\nthe eigenvectors of A_l corresponding to the output eigenvalues. Not referenced if\nevect is rocblas_evect_none.\n@param[in]\nldz         rocblas_int. ldz >= n.\nSpecifies the leading dimension of matrices Z_l.\n@param[in]\nstrideZ     rocblas_stride.\nStride from the start of one matrix Z_l to the next one Z_(l+1).\nThere is no restriction for the value of strideZ. Normal use case is strideZ >= ldz*nev[l].\nNote: If erange is rocblas_range_value, then the values of nev[l] are not known in advance.\nThe user should ensure that Z_l is large enough to hold n columns, as all n columns\ncan be used as workspace for internal computations.\n@param[out]\nifail       pointer to rocblas_int. Array on the GPU (the size depends on the value of strideF).\nIf info[l] = 0, the first nev[l] elements of ifail_l are zero.\nIf info[l] = i <= n, ifail_l contains the indices of the i eigenvectors that failed\nto converge.\nNot referenced if evect is rocblas_evect_none.\n@param[in]\nstrideF     rocblas_stride.\nStride from the start of one vector ifail_l to the next one ifail_(l+1).\nThere is no restriction for the value of strideF. Normal use case is strideF >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit of batch l.\nIf info[l] = i <= n, i columns of Z_l did not converge.\nIf info[l] = n + i, the leading minor of order i of B_l is not\npositive definite.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_chegvx_strided_batched(
         handle: rocblas_handle,
         itype: rocblas_eform,
@@ -10033,7 +10456,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GETRI_OUTOFPLACE computes the inverse \\f$C = A^{-1}\\f$ of a general n-by-n matrix A.\n\n\\details\nThe inverse is computed by solving the linear system\n\n\\f[\nAC = I\n\\f]\n\nwhere I is the identity matrix, and A is factorized as \\f$A = PLU\\f$ as given by \\ref rocsolver_sgetrf \"GETRF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of the matrix A.\n@param[in]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nThe factors L and U of the factorization A = P*L*U returned by \\ref rocsolver_sgetrf \"GETRF\".\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to rocblas_int. Array on the GPU of dimension n.\\n\nThe pivot indices returned by \\ref rocsolver_sgetrf \"GETRF\".\n@param[out]\nC           pointer to type. Array on the GPU of dimension ldc*n.\\n\nIf info = 0, the inverse of A. Otherwise, undefined.\n@param[in]\nldc         rocblas_int. ldc >= n.\\n\nSpecifies the leading dimension of C.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0, U is singular. U[i,i] is the first zero pivot."]
+    #[doc = " @{\n\\brief GETRI_OUTOFPLACE computes the inverse \\f$C = A^{-1}\\f$ of a general n-by-n matrix A.\n\n\\details\nThe inverse is computed by solving the linear system\n\n\\f[\nAC = I\n\\f]\n\nwhere I is the identity matrix, and A is factorized as \\f$A = PLU\\f$ as given by \\ref rocsolver_sgetrf \"GETRF\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of the matrix A.\n@param[in]\nA           pointer to type. Array on the GPU of dimension lda*n.\nThe factors L and U of the factorization A = P*L*U returned by \\ref rocsolver_sgetrf \"GETRF\".\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A.\n@param[in]\nipiv        pointer to rocblas_int. Array on the GPU of dimension n.\nThe pivot indices returned by \\ref rocsolver_sgetrf \"GETRF\".\n@param[out]\nC           pointer to type. Array on the GPU of dimension ldc*n.\nIf info = 0, the inverse of A. Otherwise, undefined.\n@param[in]\nldc         rocblas_int. ldc >= n.\nSpecifies the leading dimension of C.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0, U is singular. U[i,i] is the first zero pivot."]
     pub fn rocsolver_sgetri_outofplace(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -10086,7 +10509,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GETRI_OUTOFPLACE_BATCHED computes the inverse \\f$C_j = A_j^{-1}\\f$ of a batch of general n-by-n matrices \\f$A_j\\f$.\n\n\\details\nThe inverse is computed by solving the linear system\n\n\\f[\nA_j C_j = I\n\\f]\n\nwhere I is the identity matrix, and \\f$A_j\\f$ is factorized as \\f$A_j = P_j  L_j  U_j\\f$ as given by \\ref rocsolver_sgetrf_batched \"GETRF_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of all matrices A_j in the batch.\n@param[in]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nThe factors L_j and U_j of the factorization A_j = P_j*L_j*U_j returned by \\ref rocsolver_sgetrf_batched \"GETRF_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nipiv        pointer to rocblas_int. Array on the GPU (the size depends on the value of strideP).\\n\nThe pivot indices returned by \\ref rocsolver_sgetrf_batched \"GETRF_BATCHED\".\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(i+j).\nThere is no restriction for the value of strideP. Normal use case is strideP >= n.\n@param[out]\nC           array of pointers to type. Each pointer points to an array on the GPU of dimension ldc*n.\\n\nIf info[j] = 0, the inverse of matrices A_j. Otherwise, undefined.\n@param[in]\nldc         rocblas_int. ldc >= n.\\n\nSpecifies the leading dimension of C_j.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for inversion of A_j.\nIf info[j] = i > 0, U_j is singular. U_j[i,i] is the first zero pivot.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GETRI_OUTOFPLACE_BATCHED computes the inverse \\f$C_l = A_l^{-1}\\f$ of a batch of general n-by-n matrices \\f$A_l\\f$.\n\n\\details\nThe inverse is computed by solving the linear system\n\n\\f[\nA_l C_l = I\n\\f]\n\nwhere I is the identity matrix, and \\f$A_l\\f$ is factorized as \\f$A_l = P_l  L_l  U_l\\f$ as given by \\ref rocsolver_sgetrf_batched \"GETRF_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of all matrices A_l in the batch.\n@param[in]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nThe factors L_l and U_l of the factorization A_l = P_l*L_l*U_l returned by \\ref rocsolver_sgetrf_batched \"GETRF_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nipiv        pointer to rocblas_int. Array on the GPU (the size depends on the value of strideP).\nThe pivot indices returned by \\ref rocsolver_sgetrf_batched \"GETRF_BATCHED\".\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= n.\n@param[out]\nC           array of pointers to type. Each pointer points to an array on the GPU of dimension ldc*n.\nIf info[l] = 0, the inverse of matrices A_l. Otherwise, undefined.\n@param[in]\nldc         rocblas_int. ldc >= n.\nSpecifies the leading dimension of C_l.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for inversion of A_l.\nIf info[l] = i > 0, U_l is singular. U_l[i,i] is the first zero pivot.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgetri_outofplace_batched(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -10147,7 +10570,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GETRI_OUTOFPLACE_STRIDED_BATCHED computes the inverse \\f$C_j = A_j^{-1}\\f$ of a batch of general n-by-n matrices \\f$A_j\\f$.\n\n\\details\nThe inverse is computed by solving the linear system\n\n\\f[\nA_j C_j = I\n\\f]\n\nwhere I is the identity matrix, and \\f$A_j\\f$ is factorized as \\f$A_j = P_j  L_j  U_j\\f$ as given by \\ref rocsolver_sgetrf_strided_batched \"GETRF_STRIDED_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of all matrices A_j in the batch.\n@param[in]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nThe factors L_j and U_j of the factorization A_j = P_j*L_j*U_j returned by\n\\ref rocsolver_sgetrf_strided_batched \"GETRF_STRIDED_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n\n@param[in]\nipiv        pointer to rocblas_int. Array on the GPU (the size depends on the value of strideP).\\n\nThe pivot indices returned by \\ref rocsolver_sgetrf_strided_batched \"GETRF_STRIDED_BATCHED\".\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= n.\n@param[out]\nC           pointer to type. Array on the GPU (the size depends on the value of strideC).\\n\nIf info[j] = 0, the inverse of matrices A_j. Otherwise, undefined.\n@param[in]\nldc         rocblas_int. ldc >= n.\\n\nSpecifies the leading dimension of C_j.\n@param[in]\nstrideC     rocblas_stride.\\n\nStride from the start of one matrix C_j to the next one C_(j+1).\nThere is no restriction for the value of strideC. Normal use case is strideC >= ldc*n\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for inversion of A_j.\nIf info[j] = i > 0, U_j is singular. U_j[i,i] is the first zero pivot.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GETRI_OUTOFPLACE_STRIDED_BATCHED computes the inverse \\f$C_l = A_l^{-1}\\f$ of a batch of general n-by-n matrices \\f$A_l\\f$.\n\n\\details\nThe inverse is computed by solving the linear system\n\n\\f[\nA_l C_l = I\n\\f]\n\nwhere I is the identity matrix, and \\f$A_l\\f$ is factorized as \\f$A_l = P_l L_l U_l\\f$ as given by \\ref rocsolver_sgetrf_strided_batched \"GETRF_STRIDED_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of all matrices A_l in the batch.\n@param[in]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nThe factors L_l and U_l of the factorization A_l = P_l*L_l*U_l returned by\n\\ref rocsolver_sgetrf_strided_batched \"GETRF_STRIDED_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n\n@param[in]\nipiv        pointer to rocblas_int. Array on the GPU (the size depends on the value of strideP).\nThe pivot indices returned by \\ref rocsolver_sgetrf_strided_batched \"GETRF_STRIDED_BATCHED\".\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= n.\n@param[out]\nC           pointer to type. Array on the GPU (the size depends on the value of strideC).\nIf info[l] = 0, the inverse of matrices A_l. Otherwise, undefined.\n@param[in]\nldc         rocblas_int. ldc >= n.\nSpecifies the leading dimension of C_l.\n@param[in]\nstrideC     rocblas_stride.\nStride from the start of one matrix C_l to the next one C_(l+1).\nThere is no restriction for the value of strideC. Normal use case is strideC >= ldc*n\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for inversion of A_l.\nIf info[l] = i > 0, U_l is singular. U_l[i,i] is the first zero pivot.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgetri_outofplace_strided_batched(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -10216,7 +10639,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GETRI_NPVT_OUTOFPLACE computes the inverse \\f$C = A^{-1}\\f$ of a general n-by-n matrix A without partial pivoting.\n\n\\details\nThe inverse is computed by solving the linear system\n\n\\f[\nAC = I\n\\f]\n\nwhere I is the identity matrix, and A is factorized as \\f$A = LU\\f$ as given by \\ref rocsolver_sgetrf_npvt \"GETRF_NPVT\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of the matrix A.\n@param[in]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nThe factors L and U of the factorization A = L*U returned by \\ref rocsolver_sgetrf_npvt \"GETRF_NPVT\".\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A.\n@param[out]\nC           pointer to type. Array on the GPU of dimension ldc*n.\\n\nIf info = 0, the inverse of A. Otherwise, undefined.\n@param[in]\nldc         rocblas_int. ldc >= n.\\n\nSpecifies the leading dimension of C.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0, U is singular. U[i,i] is the first zero pivot."]
+    #[doc = " @{\n\\brief GETRI_NPVT_OUTOFPLACE computes the inverse \\f$C = A^{-1}\\f$ of a general n-by-n matrix A without partial pivoting.\n\n\\details\nThe inverse is computed by solving the linear system\n\n\\f[\nAC = I\n\\f]\n\nwhere I is the identity matrix, and A is factorized as \\f$A = LU\\f$ as given by \\ref rocsolver_sgetrf_npvt \"GETRF_NPVT\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of the matrix A.\n@param[in]\nA           pointer to type. Array on the GPU of dimension lda*n.\nThe factors L and U of the factorization A = L*U returned by \\ref rocsolver_sgetrf_npvt \"GETRF_NPVT\".\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A.\n@param[out]\nC           pointer to type. Array on the GPU of dimension ldc*n.\nIf info = 0, the inverse of A. Otherwise, undefined.\n@param[in]\nldc         rocblas_int. ldc >= n.\nSpecifies the leading dimension of C.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0, U is singular. U[i,i] is the first zero pivot."]
     pub fn rocsolver_sgetri_npvt_outofplace(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -10265,7 +10688,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GETRI_NPVT_OUTOFPLACE_BATCHED computes the inverse \\f$C_j = A_j^{-1}\\f$ of a batch of general n-by-n matrices \\f$A_j\\f$\nwithout partial pivoting.\n\n\\details\nThe inverse is computed by solving the linear system\n\n\\f[\nA_j C_j = I\n\\f]\n\nwhere I is the identity matrix, and \\f$A_j\\f$ is factorized as \\f$A_j = L_j  U_j\\f$ as given by \\ref rocsolver_sgetrf_npvt_batched \"GETRF_NPVT_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of all matrices A_j in the batch.\n@param[in]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nThe factors L_j and U_j of the factorization A_j = L_j*U_j returned by \\ref rocsolver_sgetrf_npvt_batched \"GETRF_NPVT_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[out]\nC           array of pointers to type. Each pointer points to an array on the GPU of dimension ldc*n.\\n\nIf info[j] = 0, the inverse of matrices A_j. Otherwise, undefined.\n@param[in]\nldc         rocblas_int. ldc >= n.\\n\nSpecifies the leading dimension of C_j.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for inversion of A_j.\nIf info[j] = i > 0, U_j is singular. U_j[i,i] is the first zero pivot.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GETRI_NPVT_OUTOFPLACE_BATCHED computes the inverse \\f$C_l^{} = A_l^{-1}\\f$ of a batch of general n-by-n matrices \\f$A_l\\f$\nwithout partial pivoting.\n\n\\details\nThe inverse is computed by solving the linear system\n\n\\f[\nA_l C_l = I\n\\f]\n\nwhere I is the identity matrix, and \\f$A_l\\f$ is factorized as \\f$A_l = L_l  U_l\\f$ as given by \\ref rocsolver_sgetrf_npvt_batched \"GETRF_NPVT_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of all matrices A_l in the batch.\n@param[in]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nThe factors L_l and U_l of the factorization A_l = L_l*U_l returned by \\ref rocsolver_sgetrf_npvt_batched \"GETRF_NPVT_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[out]\nC           array of pointers to type. Each pointer points to an array on the GPU of dimension ldc*n.\nIf info[l] = 0, the inverse of matrices A_l. Otherwise, undefined.\n@param[in]\nldc         rocblas_int. ldc >= n.\nSpecifies the leading dimension of C_l.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for inversion of A_l.\nIf info[l] = i > 0, U_l is singular. U_l[i,i] is the first zero pivot.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgetri_npvt_outofplace_batched(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -10318,7 +10741,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GETRI_NPVT_OUTOFPLACE_STRIDED_BATCHED computes the inverse \\f$C_j = A_j^{-1}\\f$ of a batch of general n-by-n matrices \\f$A_j\\f$\nwithout partial pivoting.\n\n\\details\nThe inverse is computed by solving the linear system\n\n\\f[\nA_j C_j = I\n\\f]\n\nwhere I is the identity matrix, and \\f$A_j\\f$ is factorized as \\f$A_j = L_j  U_j\\f$ as given by \\ref rocsolver_sgetrf_npvt_strided_batched \"GETRF_NPVT_STRIDED_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of all matrices A_j in the batch.\n@param[in]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nThe factors L_j and U_j of the factorization A_j = L_j*U_j returned by\n\\ref rocsolver_sgetrf_npvt_strided_batched \"GETRF_NPVT_STRIDED_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n\n@param[out]\nC           pointer to type. Array on the GPU (the size depends on the value of strideC).\\n\nIf info[j] = 0, the inverse of matrices A_j. Otherwise, undefined.\n@param[in]\nldc         rocblas_int. ldc >= n.\\n\nSpecifies the leading dimension of C_j.\n@param[in]\nstrideC     rocblas_stride.\\n\nStride from the start of one matrix C_j to the next one C_(j+1).\nThere is no restriction for the value of strideC. Normal use case is strideC >= ldc*n\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for inversion of A_j.\nIf info[j] = i > 0, U_j is singular. U_j[i,i] is the first zero pivot.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GETRI_NPVT_OUTOFPLACE_STRIDED_BATCHED computes the inverse \\f$C_l^{} = A_l^{-1}\\f$ of a batch of general n-by-n matrices \\f$A_l\\f$\nwithout partial pivoting.\n\n\\details\nThe inverse is computed by solving the linear system\n\n\\f[\nA_l C_l = I\n\\f]\n\nwhere I is the identity matrix, and \\f$A_l\\f$ is factorized as \\f$A_l = L_l  U_l\\f$ as given by \\ref rocsolver_sgetrf_npvt_strided_batched \"GETRF_NPVT_STRIDED_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of all matrices A_l in the batch.\n@param[in]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nThe factors L_l and U_l of the factorization A_l = L_l*U_l returned by\n\\ref rocsolver_sgetrf_npvt_strided_batched \"GETRF_NPVT_STRIDED_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n\n@param[out]\nC           pointer to type. Array on the GPU (the size depends on the value of strideC).\nIf info[l] = 0, the inverse of matrices A_l. Otherwise, undefined.\n@param[in]\nldc         rocblas_int. ldc >= n.\nSpecifies the leading dimension of C_l.\n@param[in]\nstrideC     rocblas_stride.\nStride from the start of one matrix C_l to the next one C_(l+1).\nThere is no restriction for the value of strideC. Normal use case is strideC >= ldc*n\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for inversion of A_l.\nIf info[l] = i > 0, U_l is singular. U_l[i,i] is the first zero pivot.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgetri_npvt_outofplace_strided_batched(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -10379,7 +10802,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief TRTRI inverts a triangular n-by-n matrix A.\n\n\\details\nA can be upper or lower triangular, depending on the value of uplo, and unit or non-unit\ntriangular, depending on the value of diag.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\ndiag        rocblas_diagonal.\\n\nIf diag indicates unit, then the diagonal elements of A are not referenced and\nassumed to be one.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the triangular matrix.\nOn exit, the inverse of A if info = 0.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0, A is singular. A[i,i] is the first zero element in the diagonal."]
+    #[doc = " @{\n\\brief TRTRI inverts a triangular n-by-n matrix A.\n\n\\details\nA can be upper or lower triangular, depending on the value of uplo, and unit or non-unit\ntriangular, depending on the value of diag.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\ndiag        rocblas_diagonal.\nIf diag indicates unit, then the diagonal elements of A are not referenced and\nassumed to be one.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the triangular matrix.\nOn exit, the inverse of A if info = 0.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0, A is singular. A[i,i] is the first zero element in the diagonal."]
     pub fn rocsolver_strtri(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -10428,7 +10851,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief TRTRI_BATCHED inverts a batch of triangular n-by-n matrices \\f$A_j\\f$.\n\n\\details\n\\f$A_j\\f$ can be upper or lower triangular, depending on the value of uplo, and unit or non-unit\ntriangular, depending on the value of diag.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the matrices A_j are stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A_j is not used.\n@param[in]\ndiag        rocblas_diagonal.\\n\nIf diag indicates unit, then the diagonal elements of matrices A_j are not referenced and\nassumed to be one.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of all matrices A_j in the batch.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the triangular matrices A_j.\nOn exit, the inverses of A_j if info[j] = 0.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for inversion of A_j.\nIf info[j] = i > 0, A_j is singular. A_j[i,i] is the first zero element in the diagonal.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief TRTRI_BATCHED inverts a batch of triangular n-by-n matrices \\f$A_l\\f$.\n\n\\details\n\\f$A_l\\f$ can be upper or lower triangular, depending on the value of uplo, and unit or non-unit\ntriangular, depending on the value of diag.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the matrices A_l are stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A_l is not used.\n@param[in]\ndiag        rocblas_diagonal.\nIf diag indicates unit, then the diagonal elements of matrices A_l are not referenced and\nassumed to be one.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of all matrices A_l in the batch.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the triangular matrices A_l.\nOn exit, the inverses of A_l if info[l] = 0.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for inversion of A_l.\nIf info[l] = i > 0, A_l is singular. A_l[i,i] is the first zero element in the diagonal.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_strtri_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -10481,7 +10904,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief TRTRI_STRIDED_BATCHED inverts a batch of triangular n-by-n matrices \\f$A_j\\f$.\n\n\\details\n\\f$A_j\\f$ can be upper or lower triangular, depending on the value of uplo, and unit or non-unit\ntriangular, depending on the value of diag.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the matrices A_j are stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A_j is not used.\n@param[in]\ndiag        rocblas_diagonal.\\n\nIf diag indicates unit, then the diagonal elements of matrices A_j are not referenced and\nassumed to be one.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of all matrices A_j in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the triangular matrices A_j.\nOn exit, the inverses of A_j if info[j] = 0.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for inversion of A_j.\nIf info[j] = i > 0, A_j is singular. A_j[i,i] is the first zero element in the diagonal.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief TRTRI_STRIDED_BATCHED inverts a batch of triangular n-by-n matrices \\f$A_l\\f$.\n\n\\details\n\\f$A_l\\f$ can be upper or lower triangular, depending on the value of uplo, and unit or non-unit\ntriangular, depending on the value of diag.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the matrices A_l are stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A_l is not used.\n@param[in]\ndiag        rocblas_diagonal.\nIf diag indicates unit, then the diagonal elements of matrices A_l are not referenced and\nassumed to be one.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of all matrices A_l in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the triangular matrices A_l.\nOn exit, the inverses of A_l if info[l] = 0.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for inversion of A_l.\nIf info[l] = i > 0, A_l is singular. A_l[i,i] is the first zero element in the diagonal.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_strtri_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -10538,7 +10961,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYTF2 computes the factorization of a symmetric indefinite matrix \\f$A\\f$\nusing Bunch-Kaufman diagonal pivoting.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\n\\begin{array}{cl}\nA = U D U^T & \\: \\text{or}\\\\\nA = L D L^T &\n\\end{array}\n\\f]\n\nwhere \\f$U\\f$ or \\f$L\\f$ is a product of permutation and unit upper/lower\ntriangular matrices (depending on the value of uplo), and \\f$D\\f$ is a symmetric\nblock diagonal matrix with 1-by-1 and 2-by-2 diagonal blocks \\f$D(k)\\f$.\n\nSpecifically, \\f$U\\f$ and \\f$L\\f$ are computed as\n\n\\f[\n\\begin{array}{cl}\nU = P(n) U(n) \\cdots P(k) U(k) \\cdots & \\: \\text{and}\\\\\nL = P(1) L(1) \\cdots P(k) L(k) \\cdots &\n\\end{array}\n\\f]\n\nwhere \\f$k\\f$ decreases from \\f$n\\f$ to 1 (increases from 1 to \\f$n\\f$) in steps of 1 or 2,\ndepending on the order of block \\f$D(k)\\f$, and \\f$P(k)\\f$ is a permutation matrix defined by\n\\f$ipiv[k]\\f$. If we let \\f$s\\f$ denote the order of block \\f$D(k)\\f$, then \\f$U(k)\\f$\nand \\f$L(k)\\f$ are unit upper/lower triangular matrices defined as\n\n\\f[\nU(k) = \\left[ \\begin{array}{ccc}\nI_{k-s} & v & 0 \\\\\n0 & I_s & 0 \\\\\n0 & 0 & I_{n-k}\n\\end{array} \\right]\n\\f]\n\nand\n\n\\f[\nL(k) = \\left[ \\begin{array}{ccc}\nI_{k-1} & 0 & 0 \\\\\n0 & I_s & 0 \\\\\n0 & v & I_{n-k-s+1}\n\\end{array} \\right].\n\\f]\n\nIf \\f$s = 1\\f$, then \\f$D(k)\\f$ is stored in \\f$A[k,k]\\f$ and \\f$v\\f$ is stored in the upper/lower\npart of column \\f$k\\f$ of \\f$A\\f$.\nIf \\f$s = 2\\f$ and uplo is upper, then \\f$D(k)\\f$ is stored in \\f$A[k-1,k-1]\\f$, \\f$A[k-1,k]\\f$,\nand \\f$A[k,k]\\f$, and \\f$v\\f$ is stored in the upper parts of columns \\f$k-1\\f$ and \\f$k\\f$ of \\f$A\\f$.\nIf \\f$s = 2\\f$ and uplo is lower, then \\f$D(k)\\f$ is stored in \\f$A[k,k]\\f$, \\f$A[k+1,k]\\f$,\nand \\f$A[k+1,k+1]\\f$, and \\f$v\\f$ is stored in the lower parts of columns \\f$k\\f$ and \\f$k+1\\f$ of \\f$A\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the symmetric matrix A to be factored.\nOn exit, the block diagonal matrix D and the multipliers needed to\ncompute U or L.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A.\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU of dimension n.\\n\nThe vector of pivot indices. Elements of ipiv are 1-based indices.\nFor 1 <= k <= n, if ipiv[k] > 0 then rows and columns k and ipiv[k]\nwere interchanged and D[k,k] is a 1-by-1 diagonal block.\nIf, instead, ipiv[k] = ipiv[k-1] < 0 and uplo is upper (or ipiv[k]\n= ipiv[k+1] < 0 and uplo is lower), then rows and columns k-1 and\n-ipiv[k] (or rows and columns k+1 and -ipiv[k]) were interchanged\nand D[k-1,k-1] to D[k,k] (or D[k,k] to D[k+1,k+1]) is a 2-by-2\ndiagonal block.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0, D is singular. D[i,i] is the first diagonal zero."]
+    #[doc = " @{\n\\brief SYTF2 computes the factorization of a symmetric indefinite matrix \\f$A\\f$\nusing Bunch-Kaufman diagonal pivoting.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\n\\begin{array}{cl}\nA = U D U^T & \\: \\text{or}\\\\\nA = L D L^T &\n\\end{array}\n\\f]\n\nwhere \\f$U\\f$ or \\f$L\\f$ is a product of permutation and unit upper/lower\ntriangular matrices (depending on the value of uplo), and \\f$D\\f$ is a symmetric\nblock diagonal matrix with 1-by-1 and 2-by-2 diagonal blocks \\f$D_k\\f$.\n\nSpecifically, \\f$U\\f$ and \\f$L\\f$ are computed as\n\n\\f[\n\\begin{array}{cl}\nU = P(n) U(n) \\cdots P(k) U(k) \\cdots & \\: \\text{and}\\\\\nL = P(1) L(1) \\cdots P(k) L(k) \\cdots &\n\\end{array}\n\\f]\n\nwhere \\f$k\\f$ decreases from \\f$n\\f$ to 1 (increases from 1 to \\f$n\\f$) in steps of 1 or 2,\ndepending on the order of block \\f$D_k\\f$, and \\f$P(k)\\f$ is a permutation matrix defined by\n\\f$ipiv[k]\\f$. If we let \\f$s\\f$ denote the order of block \\f$D_k\\f$, then \\f$U(k)\\f$\nand \\f$L(k)\\f$ are unit upper/lower triangular matrices defined as\n\n\\f[\nU(k) = \\left[ \\begin{array}{ccc}\nI_{k-s} & v & 0 \\\\\n0 & I_s & 0 \\\\\n0 & 0 & I_{n-k}\n\\end{array} \\right]\n\\f]\n\nand\n\n\\f[\nL(k) = \\left[ \\begin{array}{ccc}\nI_{k-1} & 0 & 0 \\\\\n0 & I_s & 0 \\\\\n0 & v & I_{n-k-s+1}\n\\end{array} \\right].\n\\f]\n\nIf \\f$s = 1\\f$, then \\f$D_k\\f$ is stored in \\f$A[k,k]\\f$ and \\f$v\\f$ is stored in the upper/lower\npart of column \\f$k\\f$ of \\f$A\\f$.\nIf \\f$s = 2\\f$ and uplo is upper, then \\f$D_k\\f$ is stored in \\f$A[k-1,k-1]\\f$, \\f$A[k-1,k]\\f$,\nand \\f$A[k,k]\\f$, and \\f$v\\f$ is stored in the upper parts of columns \\f$k-1\\f$ and \\f$k\\f$ of \\f$A\\f$.\nIf \\f$s = 2\\f$ and uplo is lower, then \\f$D_k\\f$ is stored in \\f$A[k,k]\\f$, \\f$A[k+1,k]\\f$,\nand \\f$A[k+1,k+1]\\f$, and \\f$v\\f$ is stored in the lower parts of columns \\f$k\\f$ and \\f$k+1\\f$ of \\f$A\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the symmetric matrix A to be factored.\nOn exit, the block diagonal matrix D and the multipliers needed to\ncompute U or L.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A.\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU of dimension n.\nThe vector of pivot indices. Elements of ipiv are 1-based indices.\nFor 1 <= k <= n, if ipiv[k] > 0 then rows and columns k and ipiv[k]\nwere interchanged and D[k,k] is a 1-by-1 diagonal block.\nIf, instead, ipiv[k] = ipiv[k-1] < 0 and uplo is upper (or ipiv[k]\n= ipiv[k+1] < 0 and uplo is lower), then rows and columns k-1 and\n-ipiv[k] (or rows and columns k+1 and -ipiv[k]) were interchanged\nand D[k-1,k-1] to D[k,k] (or D[k,k] to D[k+1,k+1]) is a 2-by-2\ndiagonal block.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0, D is singular. D[i,i] is the first diagonal zero."]
     pub fn rocsolver_ssytf2(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -10587,7 +11010,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYTF2_BATCHED computes the factorization of a batch of symmetric indefinite\nmatrices using Bunch-Kaufman diagonal pivoting.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\n\\begin{array}{cl}\nA_j = U_j D_j U_j^T & \\: \\text{or}\\\\\nA_j = L_j D_j L_j^T &\n\\end{array}\n\\f]\n\nwhere \\f$U_j\\f$ or \\f$L_j\\f$ is a product of permutation and unit upper/lower\ntriangular matrices (depending on the value of uplo), and \\f$D_j\\f$ is a symmetric\nblock diagonal matrix with 1-by-1 and 2-by-2 diagonal blocks \\f$D_j(k)\\f$.\n\nSpecifically, \\f$U_j\\f$ and \\f$L_j\\f$ are computed as\n\n\\f[\n\\begin{array}{cl}\nU_j = P_j(n) U_j(n) \\cdots P_j(k) U_j(k) \\cdots & \\: \\text{and}\\\\\nL_j = P_j(1) L_j(1) \\cdots P_j(k) L_j(k) \\cdots &\n\\end{array}\n\\f]\n\nwhere \\f$k\\f$ decreases from \\f$n\\f$ to 1 (increases from 1 to \\f$n\\f$) in steps of 1 or 2,\ndepending on the order of block \\f$D_j(k)\\f$, and \\f$P_j(k)\\f$ is a permutation matrix defined by\n\\f$ipiv_j[k]\\f$. If we let \\f$s\\f$ denote the order of block \\f$D_j(k)\\f$, then \\f$U_j(k)\\f$\nand \\f$L_j(k)\\f$ are unit upper/lower triangular matrices defined as\n\n\\f[\nU_j(k) = \\left[ \\begin{array}{ccc}\nI_{k-s} & v & 0 \\\\\n0 & I_s & 0 \\\\\n0 & 0 & I_{n-k}\n\\end{array} \\right]\n\\f]\n\nand\n\n\\f[\nL_j(k) = \\left[ \\begin{array}{ccc}\nI_{k-1} & 0 & 0 \\\\\n0 & I_s & 0 \\\\\n0 & v & I_{n-k-s+1}\n\\end{array} \\right].\n\\f]\n\nIf \\f$s = 1\\f$, then \\f$D_j(k)\\f$ is stored in \\f$A_j[k,k]\\f$ and \\f$v\\f$ is stored in the upper/lower\npart of column \\f$k\\f$ of \\f$A_j\\f$.\nIf \\f$s = 2\\f$ and uplo is upper, then \\f$D_j(k)\\f$ is stored in \\f$A_j[k-1,k-1]\\f$, \\f$A_j[k-1,k]\\f$,\nand \\f$A_j[k,k]\\f$, and \\f$v\\f$ is stored in the upper parts of columns \\f$k-1\\f$ and \\f$k\\f$ of \\f$A_j\\f$.\nIf \\f$s = 2\\f$ and uplo is lower, then \\f$D_j(k)\\f$ is stored in \\f$A_j[k,k]\\f$, \\f$A_j[k+1,k]\\f$,\nand \\f$A_j[k+1,k+1]\\f$, and \\f$v\\f$ is stored in the lower parts of columns \\f$k\\f$ and \\f$k+1\\f$ of \\f$A_j\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the matrices A_j are stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A_j is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of all matrices A_j in the batch.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the symmetric matrices A_j to be factored.\nOn exit, the block diagonal matrices D_j and the multipliers needed to\ncompute U_j or L_j.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU of dimension n.\\n\nThe vector of pivot indices. Elements of ipiv are 1-based indices.\nFor 1 <= k <= n, if ipiv_j[k] > 0 then rows and columns k and ipiv_j[k]\nwere interchanged and D_j[k,k] is a 1-by-1 diagonal block.\nIf, instead, ipiv_j[k] = ipiv_j[k-1] < 0 and uplo is upper (or ipiv_j[k]\n= ipiv_j[k+1] < 0 and uplo is lower), then rows and columns k-1 and\n-ipiv_j[k] (or rows and columns k+1 and -ipiv_j[k]) were interchanged\nand D_j[k-1,k-1] to D_j[k,k] (or D_j[k,k] to D_j[k+1,k+1]) is a 2-by-2\ndiagonal block.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for factorization of A_j.\nIf info[j] = i > 0, D_j is singular. D_j[i,i] is the first diagonal zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYTF2_BATCHED computes the factorization of a batch of symmetric indefinite\nmatrices using Bunch-Kaufman diagonal pivoting.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\n\\begin{array}{cl}\nA_l^{} = U_l^{} D_l^{} U_l^T & \\: \\text{or}\\\\\nA_l^{} = L_l^{} D_l^{} L_l^T &\n\\end{array}\n\\f]\n\nwhere \\f$U_l\\f$ or \\f$L_l\\f$ is a product of permutation and unit upper/lower\ntriangular matrices (depending on the value of uplo), and \\f$D_l\\f$ is a symmetric\nblock diagonal matrix with 1-by-1 and 2-by-2 diagonal blocks \\f$D_{kl}\\f$.\n\nSpecifically, \\f$U_l\\f$ and \\f$L_l\\f$ are computed as\n\n\\f[\n\\begin{array}{cl}\nU_l = P_l(n) U_l(n) \\cdots P_l(k) U_l(k) \\cdots & \\: \\text{and}\\\\\nL_l = P_l(1) L_l(1) \\cdots P_l(k) L_l(k) \\cdots &\n\\end{array}\n\\f]\n\nwhere \\f$k\\f$ decreases from \\f$n\\f$ to 1 (increases from 1 to \\f$n\\f$) in steps of 1 or 2,\ndepending on the order of block \\f$D_{kl}\\f$, and \\f$P_l(k)\\f$ is a permutation matrix defined by\n\\f$ipiv_l[k]\\f$. If we let \\f$s\\f$ denote the order of block \\f$D_{kl}\\f$, then \\f$U_l(k)\\f$\nand \\f$L_l(k)\\f$ are unit upper/lower triangular matrices defined as\n\n\\f[\nU_l(k) = \\left[ \\begin{array}{ccc}\nI_{k-s} & v & 0 \\\\\n0 & I_s & 0 \\\\\n0 & 0 & I_{n-k}\n\\end{array} \\right]\n\\f]\n\nand\n\n\\f[\nL_l(k) = \\left[ \\begin{array}{ccc}\nI_{k-1} & 0 & 0 \\\\\n0 & I_s & 0 \\\\\n0 & v & I_{n-k-s+1}\n\\end{array} \\right].\n\\f]\n\nIf \\f$s = 1\\f$, then \\f$D_{kl}\\f$ is stored in \\f$A_l[k,k]\\f$ and \\f$v\\f$ is stored in the upper/lower\npart of column \\f$k\\f$ of \\f$A_l\\f$.\nIf \\f$s = 2\\f$ and uplo is upper, then \\f$D_{kl}\\f$ is stored in \\f$A_l[k-1,k-1]\\f$, \\f$A_l[k-1,k]\\f$,\nand \\f$A_l[k,k]\\f$, and \\f$v\\f$ is stored in the upper parts of columns \\f$k-1\\f$ and \\f$k\\f$ of \\f$A_l\\f$.\nIf \\f$s = 2\\f$ and uplo is lower, then \\f$D_{kl}\\f$ is stored in \\f$A_l[k,k]\\f$, \\f$A_l[k+1,k]\\f$,\nand \\f$A_l[k+1,k+1]\\f$, and \\f$v\\f$ is stored in the lower parts of columns \\f$k\\f$ and \\f$k+1\\f$ of \\f$A_l\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the matrices A_l are stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A_l is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of all matrices A_l in the batch.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the symmetric matrices A_l to be factored.\nOn exit, the block diagonal matrices D_l and the multipliers needed to\ncompute U_l or L_l.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU of dimension n.\nThe vector of pivot indices. Elements of ipiv are 1-based indices.\nFor 1 <= k <= n, if ipiv_l[k] > 0 then rows and columns k and ipiv_l[k]\nwere interchanged and D_l[k,k] is a 1-by-1 diagonal block.\nIf, instead, ipiv_l[k] = ipiv_l[k-1] < 0 and uplo is upper (or ipiv_l[k]\n= ipiv_l[k+1] < 0 and uplo is lower), then rows and columns k-1 and\n-ipiv_l[k] (or rows and columns k+1 and -ipiv_l[k]) were interchanged\nand D_l[k-1,k-1] to D_l[k,k] (or D_l[k,k] to D_l[k+1,k+1]) is a 2-by-2\ndiagonal block.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for factorization of A_l.\nIf info[l] = i > 0, D_l is singular. D_l[i,i] is the first diagonal zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssytf2_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -10644,7 +11067,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYTF2_STRIDED_BATCHED computes the factorization of a batch of symmetric indefinite\nmatrices using Bunch-Kaufman diagonal pivoting.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\n\\begin{array}{cl}\nA_j = U_j D_j U_j^T & \\: \\text{or}\\\\\nA_j = L_j D_j L_j^T &\n\\end{array}\n\\f]\n\nwhere \\f$U_j\\f$ or \\f$L_j\\f$ is a product of permutation and unit upper/lower\ntriangular matrices (depending on the value of uplo), and \\f$D_j\\f$ is a symmetric\nblock diagonal matrix with 1-by-1 and 2-by-2 diagonal blocks \\f$D_j(k)\\f$.\n\nSpecifically, \\f$U_j\\f$ and \\f$L_j\\f$ are computed as\n\n\\f[\n\\begin{array}{cl}\nU_j = P_j(n) U_j(n) \\cdots P_j(k) U_j(k) \\cdots & \\: \\text{and}\\\\\nL_j = P_j(1) L_j(1) \\cdots P_j(k) L_j(k) \\cdots &\n\\end{array}\n\\f]\n\nwhere \\f$k\\f$ decreases from \\f$n\\f$ to 1 (increases from 1 to \\f$n\\f$) in steps of 1 or 2,\ndepending on the order of block \\f$D_j(k)\\f$, and \\f$P_j(k)\\f$ is a permutation matrix defined by\n\\f$ipiv_j[k]\\f$. If we let \\f$s\\f$ denote the order of block \\f$D_j(k)\\f$, then \\f$U_j(k)\\f$\nand \\f$L_j(k)\\f$ are unit upper/lower triangular matrices defined as\n\n\\f[\nU_j(k) = \\left[ \\begin{array}{ccc}\nI_{k-s} & v & 0 \\\\\n0 & I_s & 0 \\\\\n0 & 0 & I_{n-k}\n\\end{array} \\right]\n\\f]\n\nand\n\n\\f[\nL_j(k) = \\left[ \\begin{array}{ccc}\nI_{k-1} & 0 & 0 \\\\\n0 & I_s & 0 \\\\\n0 & v & I_{n-k-s+1}\n\\end{array} \\right].\n\\f]\n\nIf \\f$s = 1\\f$, then \\f$D_j(k)\\f$ is stored in \\f$A_j[k,k]\\f$ and \\f$v\\f$ is stored in the upper/lower\npart of column \\f$k\\f$ of \\f$A_j\\f$.\nIf \\f$s = 2\\f$ and uplo is upper, then \\f$D_j(k)\\f$ is stored in \\f$A_j[k-1,k-1]\\f$, \\f$A_j[k-1,k]\\f$,\nand \\f$A_j[k,k]\\f$, and \\f$v\\f$ is stored in the upper parts of columns \\f$k-1\\f$ and \\f$k\\f$ of \\f$A_j\\f$.\nIf \\f$s = 2\\f$ and uplo is lower, then \\f$D_j(k)\\f$ is stored in \\f$A_j[k,k]\\f$, \\f$A_j[k+1,k]\\f$,\nand \\f$A_j[k+1,k+1]\\f$, and \\f$v\\f$ is stored in the lower parts of columns \\f$k\\f$ and \\f$k+1\\f$ of \\f$A_j\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the matrices A_j are stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A_j is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of all matrices A_j in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the symmetric matrices A_j to be factored.\nOn exit, the block diagonal matrices D_j and the multipliers needed to\ncompute U_j or L_j.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU of dimension n.\\n\nThe vector of pivot indices. Elements of ipiv are 1-based indices.\nFor 1 <= k <= n, if ipiv_j[k] > 0 then rows and columns k and ipiv_j[k]\nwere interchanged and D_j[k,k] is a 1-by-1 diagonal block.\nIf, instead, ipiv_j[k] = ipiv_j[k-1] < 0 and uplo is upper (or ipiv_j[k]\n= ipiv_j[k+1] < 0 and uplo is lower), then rows and columns k-1 and\n-ipiv_j[k] (or rows and columns k+1 and -ipiv_j[k]) were interchanged\nand D_j[k-1,k-1] to D_j[k,k] (or D_j[k,k] to D_j[k+1,k+1]) is a 2-by-2\ndiagonal block.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for factorization of A_j.\nIf info[j] = i > 0, D_j is singular. D_j[i,i] is the first diagonal zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYTF2_STRIDED_BATCHED computes the factorization of a batch of symmetric indefinite\nmatrices using Bunch-Kaufman diagonal pivoting.\n\n\\details\n(This is the unblocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\n\\begin{array}{cl}\nA_l^{} = U_l^{} D_l^{} U_l^T & \\: \\text{or}\\\\\nA_l^{} = L_l^{} D_l^{} L_l^T &\n\\end{array}\n\\f]\n\nwhere \\f$U_l\\f$ or \\f$L_l\\f$ is a product of permutation and unit upper/lower\ntriangular matrices (depending on the value of uplo), and \\f$D_l\\f$ is a symmetric\nblock diagonal matrix with 1-by-1 and 2-by-2 diagonal blocks \\f$D_{kl}\\f$.\n\nSpecifically, \\f$U_l\\f$ and \\f$L_l\\f$ are computed as\n\n\\f[\n\\begin{array}{cl}\nU_l = P_l(n) U_l(n) \\cdots P_l(k) U_l(k) \\cdots & \\: \\text{and}\\\\\nL_l = P_l(1) L_l(1) \\cdots P_l(k) L_l(k) \\cdots &\n\\end{array}\n\\f]\n\nwhere \\f$k\\f$ decreases from \\f$n\\f$ to 1 (increases from 1 to \\f$n\\f$) in steps of 1 or 2,\ndepending on the order of block \\f$D_{kl}\\f$, and \\f$P_l(k)\\f$ is a permutation matrix defined by\n\\f$ipiv_l[k]\\f$. If we let \\f$s\\f$ denote the order of block \\f$D_{kl}\\f$, then \\f$U_l(k)\\f$\nand \\f$L_l(k)\\f$ are unit upper/lower triangular matrices defined as\n\n\\f[\nU_l(k) = \\left[ \\begin{array}{ccc}\nI_{k-s} & v & 0 \\\\\n0 & I_s & 0 \\\\\n0 & 0 & I_{n-k}\n\\end{array} \\right]\n\\f]\n\nand\n\n\\f[\nL_l(k) = \\left[ \\begin{array}{ccc}\nI_{k-1} & 0 & 0 \\\\\n0 & I_s & 0 \\\\\n0 & v & I_{n-k-s+1}\n\\end{array} \\right].\n\\f]\n\nIf \\f$s = 1\\f$, then \\f$D_{kl}\\f$ is stored in \\f$A_l[k,k]\\f$ and \\f$v\\f$ is stored in the upper/lower\npart of column \\f$k\\f$ of \\f$A_l\\f$.\nIf \\f$s = 2\\f$ and uplo is upper, then \\f$D_{kl}\\f$ is stored in \\f$A_l[k-1,k-1]\\f$, \\f$A_l[k-1,k]\\f$,\nand \\f$A_l[k,k]\\f$, and \\f$v\\f$ is stored in the upper parts of columns \\f$k-1\\f$ and \\f$k\\f$ of \\f$A_l\\f$.\nIf \\f$s = 2\\f$ and uplo is lower, then \\f$D_{kl}\\f$ is stored in \\f$A_l[k,k]\\f$, \\f$A_l[k+1,k]\\f$,\nand \\f$A_l[k+1,k+1]\\f$, and \\f$v\\f$ is stored in the lower parts of columns \\f$k\\f$ and \\f$k+1\\f$ of \\f$A_l\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the matrices A_l are stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A_l is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of all matrices A_l in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the symmetric matrices A_l to be factored.\nOn exit, the block diagonal matrices D_l and the multipliers needed to\ncompute U_l or L_l.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU of dimension n.\nThe vector of pivot indices. Elements of ipiv are 1-based indices.\nFor 1 <= k <= n, if ipiv_l[k] > 0 then rows and columns k and ipiv_l[k]\nwere interchanged and D_l[k,k] is a 1-by-1 diagonal block.\nIf, instead, ipiv_l[k] = ipiv_l[k-1] < 0 and uplo is upper (or ipiv_l[k]\n= ipiv_l[k+1] < 0 and uplo is lower), then rows and columns k-1 and\n-ipiv_l[k] (or rows and columns k+1 and -ipiv_l[k]) were interchanged\nand D_l[k-1,k-1] to D_l[k,k] (or D_l[k,k] to D_l[k+1,k+1]) is a 2-by-2\ndiagonal block.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for factorization of A_l.\nIf info[l] = i > 0, D_l is singular. D_l[i,i] is the first diagonal zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssytf2_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -10705,7 +11128,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYTRF computes the factorization of a symmetric indefinite matrix \\f$A\\f$\nusing Bunch-Kaufman diagonal pivoting.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\n\\begin{array}{cl}\nA = U D U^T & \\: \\text{or}\\\\\nA = L D L^T &\n\\end{array}\n\\f]\n\nwhere \\f$U\\f$ or \\f$L\\f$ is a product of permutation and unit upper/lower\ntriangular matrices (depending on the value of uplo), and \\f$D\\f$ is a symmetric\nblock diagonal matrix with 1-by-1 and 2-by-2 diagonal blocks \\f$D(k)\\f$.\n\nSpecifically, \\f$U\\f$ and \\f$L\\f$ are computed as\n\n\\f[\n\\begin{array}{cl}\nU = P(n) U(n) \\cdots P(k) U(k) \\cdots & \\: \\text{and}\\\\\nL = P(1) L(1) \\cdots P(k) L(k) \\cdots &\n\\end{array}\n\\f]\n\nwhere \\f$k\\f$ decreases from \\f$n\\f$ to 1 (increases from 1 to \\f$n\\f$) in steps of 1 or 2,\ndepending on the order of block \\f$D(k)\\f$, and \\f$P(k)\\f$ is a permutation matrix defined by\n\\f$ipiv[k]\\f$. If we let \\f$s\\f$ denote the order of block \\f$D(k)\\f$, then \\f$U(k)\\f$\nand \\f$L(k)\\f$ are unit upper/lower triangular matrices defined as\n\n\\f[\nU(k) = \\left[ \\begin{array}{ccc}\nI_{k-s} & v & 0 \\\\\n0 & I_s & 0 \\\\\n0 & 0 & I_{n-k}\n\\end{array} \\right]\n\\f]\n\nand\n\n\\f[\nL(k) = \\left[ \\begin{array}{ccc}\nI_{k-1} & 0 & 0 \\\\\n0 & I_s & 0 \\\\\n0 & v & I_{n-k-s+1}\n\\end{array} \\right].\n\\f]\n\nIf \\f$s = 1\\f$, then \\f$D(k)\\f$ is stored in \\f$A[k,k]\\f$ and \\f$v\\f$ is stored in the upper/lower\npart of column \\f$k\\f$ of \\f$A\\f$.\nIf \\f$s = 2\\f$ and uplo is upper, then \\f$D(k)\\f$ is stored in \\f$A[k-1,k-1]\\f$, \\f$A[k-1,k]\\f$,\nand \\f$A[k,k]\\f$, and \\f$v\\f$ is stored in the upper parts of columns \\f$k-1\\f$ and \\f$k\\f$ of \\f$A\\f$.\nIf \\f$s = 2\\f$ and uplo is lower, then \\f$D(k)\\f$ is stored in \\f$A[k,k]\\f$, \\f$A[k+1,k]\\f$,\nand \\f$A[k+1,k+1]\\f$, and \\f$v\\f$ is stored in the lower parts of columns \\f$k\\f$ and \\f$k+1\\f$ of \\f$A\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\\n\nOn entry, the symmetric matrix A to be factored.\nOn exit, the block diagonal matrix D and the multipliers needed to\ncompute U or L.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of A.\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU of dimension n.\\n\nThe vector of pivot indices. Elements of ipiv are 1-based indices.\nFor 1 <= k <= n, if ipiv[k] > 0 then rows and columns k and ipiv[k]\nwere interchanged and D[k,k] is a 1-by-1 diagonal block.\nIf, instead, ipiv[k] = ipiv[k-1] < 0 and uplo is upper (or ipiv[k]\n= ipiv[k+1] < 0 and uplo is lower), then rows and columns k-1 and\n-ipiv[k] (or rows and columns k+1 and -ipiv[k]) were interchanged\nand D[k-1,k-1] to D[k,k] (or D[k,k] to D[k+1,k+1]) is a 2-by-2\ndiagonal block.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0, D is singular. D[i,i] is the first diagonal zero."]
+    #[doc = " @{\n\\brief SYTRF computes the factorization of a symmetric indefinite matrix \\f$A\\f$\nusing Bunch-Kaufman diagonal pivoting.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\n\\begin{array}{cl}\nA = U D U^T & \\: \\text{or}\\\\\nA = L D L^T &\n\\end{array}\n\\f]\n\nwhere \\f$U\\f$ or \\f$L\\f$ is a product of permutation and unit upper/lower\ntriangular matrices (depending on the value of uplo), and \\f$D\\f$ is a symmetric\nblock diagonal matrix with 1-by-1 and 2-by-2 diagonal blocks \\f$D_k\\f$.\n\nSpecifically, \\f$U\\f$ and \\f$L\\f$ are computed as\n\n\\f[\n\\begin{array}{cl}\nU = P(n) U(n) \\cdots P(k) U(k) \\cdots & \\: \\text{and}\\\\\nL = P(1) L(1) \\cdots P(k) L(k) \\cdots &\n\\end{array}\n\\f]\n\nwhere \\f$k\\f$ decreases from \\f$n\\f$ to 1 (increases from 1 to \\f$n\\f$) in steps of 1 or 2,\ndepending on the order of block \\f$D_k\\f$, and \\f$P(k)\\f$ is a permutation matrix defined by\n\\f$ipiv[k]\\f$. If we let \\f$s\\f$ denote the order of block \\f$D_k\\f$, then \\f$U(k)\\f$\nand \\f$L(k)\\f$ are unit upper/lower triangular matrices defined as\n\n\\f[\nU(k) = \\left[ \\begin{array}{ccc}\nI_{k-s} & v & 0 \\\\\n0 & I_s & 0 \\\\\n0 & 0 & I_{n-k}\n\\end{array} \\right]\n\\f]\n\nand\n\n\\f[\nL(k) = \\left[ \\begin{array}{ccc}\nI_{k-1} & 0 & 0 \\\\\n0 & I_s & 0 \\\\\n0 & v & I_{n-k-s+1}\n\\end{array} \\right].\n\\f]\n\nIf \\f$s = 1\\f$, then \\f$D_k\\f$ is stored in \\f$A[k,k]\\f$ and \\f$v\\f$ is stored in the upper/lower\npart of column \\f$k\\f$ of \\f$A\\f$.\nIf \\f$s = 2\\f$ and uplo is upper, then \\f$D_k\\f$ is stored in \\f$A[k-1,k-1]\\f$, \\f$A[k-1,k]\\f$,\nand \\f$A[k,k]\\f$, and \\f$v\\f$ is stored in the upper parts of columns \\f$k-1\\f$ and \\f$k\\f$ of \\f$A\\f$.\nIf \\f$s = 2\\f$ and uplo is lower, then \\f$D_k\\f$ is stored in \\f$A[k,k]\\f$, \\f$A[k+1,k]\\f$,\nand \\f$A[k+1,k+1]\\f$, and \\f$v\\f$ is stored in the lower parts of columns \\f$k\\f$ and \\f$k+1\\f$ of \\f$A\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the matrix A is stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of the matrix A.\n@param[inout]\nA           pointer to type. Array on the GPU of dimension lda*n.\nOn entry, the symmetric matrix A to be factored.\nOn exit, the block diagonal matrix D and the multipliers needed to\ncompute U or L.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of A.\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU of dimension n.\nThe vector of pivot indices. Elements of ipiv are 1-based indices.\nFor 1 <= k <= n, if ipiv[k] > 0 then rows and columns k and ipiv[k]\nwere interchanged and D[k,k] is a 1-by-1 diagonal block.\nIf, instead, ipiv[k] = ipiv[k-1] < 0 and uplo is upper (or ipiv[k]\n= ipiv[k+1] < 0 and uplo is lower), then rows and columns k-1 and\n-ipiv[k] (or rows and columns k+1 and -ipiv[k]) were interchanged\nand D[k-1,k-1] to D[k,k] (or D[k,k] to D[k+1,k+1]) is a 2-by-2\ndiagonal block.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0, D is singular. D[i,i] is the first diagonal zero."]
     pub fn rocsolver_ssytrf(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -10754,7 +11177,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYTRF_BATCHED computes the factorization of a batch of symmetric indefinite\nmatrices using Bunch-Kaufman diagonal pivoting.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\n\\begin{array}{cl}\nA_j = U_j D_j U_j^T & \\: \\text{or}\\\\\nA_j = L_j D_j L_j^T &\n\\end{array}\n\\f]\n\nwhere \\f$U_j\\f$ or \\f$L_j\\f$ is a product of permutation and unit upper/lower\ntriangular matrices (depending on the value of uplo), and \\f$D_j\\f$ is a symmetric\nblock diagonal matrix with 1-by-1 and 2-by-2 diagonal blocks \\f$D_j(k)\\f$.\n\nSpecifically, \\f$U_j\\f$ and \\f$L_j\\f$ are computed as\n\n\\f[\n\\begin{array}{cl}\nU_j = P_j(n) U_j(n) \\cdots P_j(k) U_j(k) \\cdots & \\: \\text{and}\\\\\nL_j = P_j(1) L_j(1) \\cdots P_j(k) L_j(k) \\cdots &\n\\end{array}\n\\f]\n\nwhere \\f$k\\f$ decreases from \\f$n\\f$ to 1 (increases from 1 to \\f$n\\f$) in steps of 1 or 2,\ndepending on the order of block \\f$D_j(k)\\f$, and \\f$P_j(k)\\f$ is a permutation matrix defined by\n\\f$ipiv_j[k]\\f$. If we let \\f$s\\f$ denote the order of block \\f$D_j(k)\\f$, then \\f$U_j(k)\\f$\nand \\f$L_j(k)\\f$ are unit upper/lower triangular matrices defined as\n\n\\f[\nU_j(k) = \\left[ \\begin{array}{ccc}\nI_{k-s} & v & 0 \\\\\n0 & I_s & 0 \\\\\n0 & 0 & I_{n-k}\n\\end{array} \\right]\n\\f]\n\nand\n\n\\f[\nL_j(k) = \\left[ \\begin{array}{ccc}\nI_{k-1} & 0 & 0 \\\\\n0 & I_s & 0 \\\\\n0 & v & I_{n-k-s+1}\n\\end{array} \\right].\n\\f]\n\nIf \\f$s = 1\\f$, then \\f$D_j(k)\\f$ is stored in \\f$A_j[k,k]\\f$ and \\f$v\\f$ is stored in the upper/lower\npart of column \\f$k\\f$ of \\f$A_j\\f$.\nIf \\f$s = 2\\f$ and uplo is upper, then \\f$D_j(k)\\f$ is stored in \\f$A_j[k-1,k-1]\\f$, \\f$A_j[k-1,k]\\f$,\nand \\f$A_j[k,k]\\f$, and \\f$v\\f$ is stored in the upper parts of columns \\f$k-1\\f$ and \\f$k\\f$ of \\f$A_j\\f$.\nIf \\f$s = 2\\f$ and uplo is lower, then \\f$D_j(k)\\f$ is stored in \\f$A_j[k,k]\\f$, \\f$A_j[k+1,k]\\f$,\nand \\f$A_j[k+1,k+1]\\f$, and \\f$v\\f$ is stored in the lower parts of columns \\f$k\\f$ and \\f$k+1\\f$ of \\f$A_j\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the matrices A_j are stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A_j is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of all matrices A_j in the batch.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\\n\nOn entry, the symmetric matrices A_j to be factored.\nOn exit, the block diagonal matrices D_j and the multipliers needed to\ncompute U_j or L_j.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU of dimension n.\\n\nThe vector of pivot indices. Elements of ipiv are 1-based indices.\nFor 1 <= k <= n, if ipiv_j[k] > 0 then rows and columns k and ipiv_j[k]\nwere interchanged and D_j[k,k] is a 1-by-1 diagonal block.\nIf, instead, ipiv_j[k] = ipiv_j[k-1] < 0 and uplo is upper (or ipiv_j[k]\n= ipiv_j[k+1] < 0 and uplo is lower), then rows and columns k-1 and\n-ipiv_j[k] (or rows and columns k+1 and -ipiv_j[k]) were interchanged\nand D_j[k-1,k-1] to D_j[k,k] (or D_j[k,k] to D_j[k+1,k+1]) is a 2-by-2\ndiagonal block.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for factorization of A_j.\nIf info[j] = i > 0, D_j is singular. D_j[i,i] is the first diagonal zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYTRF_BATCHED computes the factorization of a batch of symmetric indefinite\nmatrices using Bunch-Kaufman diagonal pivoting.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\n\\begin{array}{cl}\nA_l^{} = U_l^{} D_l^{} U_l^T & \\: \\text{or}\\\\\nA_l^{} = L_l^{} D_l^{} L_l^T &\n\\end{array}\n\\f]\n\nwhere \\f$U_l\\f$ or \\f$L_l\\f$ is a product of permutation and unit upper/lower\ntriangular matrices (depending on the value of uplo), and \\f$D_l\\f$ is a symmetric\nblock diagonal matrix with 1-by-1 and 2-by-2 diagonal blocks \\f$D_{kl}\\f$.\n\nSpecifically, \\f$U_l\\f$ and \\f$L_l\\f$ are computed as\n\n\\f[\n\\begin{array}{cl}\nU_l = P_l(n) U_l(n) \\cdots P_l(k) U_l(k) \\cdots & \\: \\text{and}\\\\\nL_l = P_l(1) L_l(1) \\cdots P_l(k) L_l(k) \\cdots &\n\\end{array}\n\\f]\n\nwhere \\f$k\\f$ decreases from \\f$n\\f$ to 1 (increases from 1 to \\f$n\\f$) in steps of 1 or 2,\ndepending on the order of block \\f$D_{kl}\\f$, and \\f$P_l(k)\\f$ is a permutation matrix defined by\n\\f$ipiv_l[k]\\f$. If we let \\f$s\\f$ denote the order of block \\f$D_{kl}\\f$, then \\f$U_l(k)\\f$\nand \\f$L_l(k)\\f$ are unit upper/lower triangular matrices defined as\n\n\\f[\nU_l(k) = \\left[ \\begin{array}{ccc}\nI_{k-s} & v & 0 \\\\\n0 & I_s & 0 \\\\\n0 & 0 & I_{n-k}\n\\end{array} \\right]\n\\f]\n\nand\n\n\\f[\nL_l(k) = \\left[ \\begin{array}{ccc}\nI_{k-1} & 0 & 0 \\\\\n0 & I_s & 0 \\\\\n0 & v & I_{n-k-s+1}\n\\end{array} \\right].\n\\f]\n\nIf \\f$s = 1\\f$, then \\f$D_{kl}\\f$ is stored in \\f$A_l[k,k]\\f$ and \\f$v\\f$ is stored in the upper/lower\npart of column \\f$k\\f$ of \\f$A_l\\f$.\nIf \\f$s = 2\\f$ and uplo is upper, then \\f$D_{kl}\\f$ is stored in \\f$A_l[k-1,k-1]\\f$, \\f$A_l[k-1,k]\\f$,\nand \\f$A_l[k,k]\\f$, and \\f$v\\f$ is stored in the upper parts of columns \\f$k-1\\f$ and \\f$k\\f$ of \\f$A_l\\f$.\nIf \\f$s = 2\\f$ and uplo is lower, then \\f$D_{kl}\\f$ is stored in \\f$A_l[k,k]\\f$, \\f$A_l[k+1,k]\\f$,\nand \\f$A_l[k+1,k+1]\\f$, and \\f$v\\f$ is stored in the lower parts of columns \\f$k\\f$ and \\f$k+1\\f$ of \\f$A_l\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the matrices A_l are stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A_l is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of all matrices A_l in the batch.\n@param[inout]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n.\nOn entry, the symmetric matrices A_l to be factored.\nOn exit, the block diagonal matrices D_l and the multipliers needed to\ncompute U_l or L_l.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU of dimension n.\nThe vector of pivot indices. Elements of ipiv are 1-based indices.\nFor 1 <= k <= n, if ipiv_l[k] > 0 then rows and columns k and ipiv_l[k]\nwere interchanged and D_l[k,k] is a 1-by-1 diagonal block.\nIf, instead, ipiv_l[k] = ipiv_l[k-1] < 0 and uplo is upper (or ipiv_l[k]\n= ipiv_l[k+1] < 0 and uplo is lower), then rows and columns k-1 and\n-ipiv_l[k] (or rows and columns k+1 and -ipiv_l[k]) were interchanged\nand D_l[k-1,k-1] to D_l[k,k] (or D_l[k,k] to D_l[k+1,k+1]) is a 2-by-2\ndiagonal block.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for factorization of A_l.\nIf info[l] = i > 0, D_l is singular. D_l[i,i] is the first diagonal zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssytrf_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -10811,7 +11234,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief SYTRF_STRIDED_BATCHED computes the factorization of a batch of symmetric indefinite\nmatrices using Bunch-Kaufman diagonal pivoting.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\n\\begin{array}{cl}\nA_j = U_j D_j U_j^T & \\: \\text{or}\\\\\nA_j = L_j D_j L_j^T &\n\\end{array}\n\\f]\n\nwhere \\f$U_j\\f$ or \\f$L_j\\f$ is a product of permutation and unit upper/lower\ntriangular matrices (depending on the value of uplo), and \\f$D_j\\f$ is a symmetric\nblock diagonal matrix with 1-by-1 and 2-by-2 diagonal blocks \\f$D_j(k)\\f$.\n\nSpecifically, \\f$U_j\\f$ and \\f$L_j\\f$ are computed as\n\n\\f[\n\\begin{array}{cl}\nU_j = P_j(n) U_j(n) \\cdots P_j(k) U_j(k) \\cdots & \\: \\text{and}\\\\\nL_j = P_j(1) L_j(1) \\cdots P_j(k) L_j(k) \\cdots &\n\\end{array}\n\\f]\n\nwhere \\f$k\\f$ decreases from \\f$n\\f$ to 1 (increases from 1 to \\f$n\\f$) in steps of 1 or 2,\ndepending on the order of block \\f$D_j(k)\\f$, and \\f$P_j(k)\\f$ is a permutation matrix defined by\n\\f$ipiv_j[k]\\f$. If we let \\f$s\\f$ denote the order of block \\f$D_j(k)\\f$, then \\f$U_j(k)\\f$\nand \\f$L_j(k)\\f$ are unit upper/lower triangular matrices defined as\n\n\\f[\nU_j(k) = \\left[ \\begin{array}{ccc}\nI_{k-s} & v & 0 \\\\\n0 & I_s & 0 \\\\\n0 & 0 & I_{n-k}\n\\end{array} \\right]\n\\f]\n\nand\n\n\\f[\nL_j(k) = \\left[ \\begin{array}{ccc}\nI_{k-1} & 0 & 0 \\\\\n0 & I_s & 0 \\\\\n0 & v & I_{n-k-s+1}\n\\end{array} \\right].\n\\f]\n\nIf \\f$s = 1\\f$, then \\f$D_j(k)\\f$ is stored in \\f$A_j[k,k]\\f$ and \\f$v\\f$ is stored in the upper/lower\npart of column \\f$k\\f$ of \\f$A_j\\f$.\nIf \\f$s = 2\\f$ and uplo is upper, then \\f$D_j(k)\\f$ is stored in \\f$A_j[k-1,k-1]\\f$, \\f$A_j[k-1,k]\\f$,\nand \\f$A_j[k,k]\\f$, and \\f$v\\f$ is stored in the upper parts of columns \\f$k-1\\f$ and \\f$k\\f$ of \\f$A_j\\f$.\nIf \\f$s = 2\\f$ and uplo is lower, then \\f$D_j(k)\\f$ is stored in \\f$A_j[k,k]\\f$, \\f$A_j[k+1,k]\\f$,\nand \\f$A_j[k+1,k+1]\\f$, and \\f$v\\f$ is stored in the lower parts of columns \\f$k\\f$ and \\f$k+1\\f$ of \\f$A_j\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\\n\nSpecifies whether the upper or lower part of the matrices A_j are stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A_j is not used.\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows and columns of all matrices A_j in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nOn entry, the symmetric matrices A_j to be factored.\nOn exit, the block diagonal matrices D_j and the multipliers needed to\ncompute U_j or L_j.\n@param[in]\nlda         rocblas_int. lda >= n.\\n\nSpecifies the leading dimension of matrices A_j.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one matrix A_j to the next one A_(j+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU of dimension n.\\n\nThe vector of pivot indices. Elements of ipiv are 1-based indices.\nFor 1 <= k <= n, if ipiv_j[k] > 0 then rows and columns k and ipiv_j[k]\nwere interchanged and D_j[k,k] is a 1-by-1 diagonal block.\nIf, instead, ipiv_j[k] = ipiv_j[k-1] < 0 and uplo is upper (or ipiv_j[k]\n= ipiv_j[k+1] < 0 and uplo is lower), then rows and columns k-1 and\n-ipiv_j[k] (or rows and columns k+1 and -ipiv_j[k]) were interchanged\nand D_j[k-1,k-1] to D_j[k,k] (or D_j[k,k] to D_j[k+1,k+1]) is a 2-by-2\ndiagonal block.\n@param[in]\nstrideP     rocblas_stride.\\n\nStride from the start of one vector ipiv_j to the next one ipiv_(j+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for factorization of A_j.\nIf info[j] = i > 0, D_j is singular. D_j[i,i] is the first diagonal zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief SYTRF_STRIDED_BATCHED computes the factorization of a batch of symmetric indefinite\nmatrices using Bunch-Kaufman diagonal pivoting.\n\n\\details\n(This is the blocked version of the algorithm).\n\nThe factorization has the form\n\n\\f[\n\\begin{array}{cl}\nA_l^{} = U_l^{} D_l^{} U_l^T & \\: \\text{or}\\\\\nA_l^{} = L_l^{} D_l^{} L_l^T &\n\\end{array}\n\\f]\n\nwhere \\f$U_l\\f$ or \\f$L_l\\f$ is a product of permutation and unit upper/lower\ntriangular matrices (depending on the value of uplo), and \\f$D_l\\f$ is a symmetric\nblock diagonal matrix with 1-by-1 and 2-by-2 diagonal blocks \\f$D_{kl}\\f$.\n\nSpecifically, \\f$U_l\\f$ and \\f$L_l\\f$ are computed as\n\n\\f[\n\\begin{array}{cl}\nU_l = P_l(n) U_l(n) \\cdots P_l(k) U_l(k) \\cdots & \\: \\text{and}\\\\\nL_l = P_l(1) L_l(1) \\cdots P_l(k) L_l(k) \\cdots &\n\\end{array}\n\\f]\n\nwhere \\f$k\\f$ decreases from \\f$n\\f$ to 1 (increases from 1 to \\f$n\\f$) in steps of 1 or 2,\ndepending on the order of block \\f$D_{kl}\\f$, and \\f$P_l(k)\\f$ is a permutation matrix defined by\n\\f$ipiv_l[k]\\f$. If we let \\f$s\\f$ denote the order of block \\f$D_{kl}\\f$, then \\f$U_l(k)\\f$\nand \\f$L_l(k)\\f$ are unit upper/lower triangular matrices defined as\n\n\\f[\nU_l(k) = \\left[ \\begin{array}{ccc}\nI_{k-s} & v & 0 \\\\\n0 & I_s & 0 \\\\\n0 & 0 & I_{n-k}\n\\end{array} \\right]\n\\f]\n\nand\n\n\\f[\nL_l(k) = \\left[ \\begin{array}{ccc}\nI_{k-1} & 0 & 0 \\\\\n0 & I_s & 0 \\\\\n0 & v & I_{n-k-s+1}\n\\end{array} \\right].\n\\f]\n\nIf \\f$s = 1\\f$, then \\f$D_{kl}\\f$ is stored in \\f$A_l[k,k]\\f$ and \\f$v\\f$ is stored in the upper/lower\npart of column \\f$k\\f$ of \\f$A_l\\f$.\nIf \\f$s = 2\\f$ and uplo is upper, then \\f$D_{kl}\\f$ is stored in \\f$A_l[k-1,k-1]\\f$, \\f$A_l[k-1,k]\\f$,\nand \\f$A_l[k,k]\\f$, and \\f$v\\f$ is stored in the upper parts of columns \\f$k-1\\f$ and \\f$k\\f$ of \\f$A_l\\f$.\nIf \\f$s = 2\\f$ and uplo is lower, then \\f$D_l(k)\\f$ is stored in \\f$A_l[k,k]\\f$, \\f$A_l[k+1,k]\\f$,\nand \\f$A_l[k+1,k+1]\\f$, and \\f$v\\f$ is stored in the lower parts of columns \\f$k\\f$ and \\f$k+1\\f$ of \\f$A_l\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nuplo        rocblas_fill.\nSpecifies whether the upper or lower part of the matrices A_l are stored.\nIf uplo indicates lower (or upper), then the upper (or lower)\npart of A_l is not used.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows and columns of all matrices A_l in the batch.\n@param[inout]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nOn entry, the symmetric matrices A_l to be factored.\nOn exit, the block diagonal matrices D_l and the multipliers needed to\ncompute U_l or L_l.\n@param[in]\nlda         rocblas_int. lda >= n.\nSpecifies the leading dimension of matrices A_l.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one matrix A_l to the next one A_(l+1).\nThere is no restriction for the value of strideA. Normal use case is strideA >= lda*n\n@param[out]\nipiv        pointer to rocblas_int. Array on the GPU of dimension n.\nThe vector of pivot indices. Elements of ipiv are 1-based indices.\nFor 1 <= k <= n, if ipiv_l[k] > 0 then rows and columns k and ipiv_l[k]\nwere interchanged and D_l[k,k] is a 1-by-1 diagonal block.\nIf, instead, ipiv_l[k] = ipiv_l[k-1] < 0 and uplo is upper (or ipiv_l[k]\n= ipiv_l[k+1] < 0 and uplo is lower), then rows and columns k-1 and\n-ipiv_l[k] (or rows and columns k+1 and -ipiv_l[k]) were interchanged\nand D_l[k-1,k-1] to D_l[k,k] (or D_l[k,k] to D_l[k+1,k+1]) is a 2-by-2\ndiagonal block.\n@param[in]\nstrideP     rocblas_stride.\nStride from the start of one vector ipiv_l to the next one ipiv_(l+1).\nThere is no restriction for the value of strideP. Normal use case is strideP >= n.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for factorization of A_l.\nIf info[l] = i > 0, D_l is singular. D_l[i,i] is the first diagonal zero.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_ssytrf_strided_batched(
         handle: rocblas_handle,
         uplo: rocblas_fill,
@@ -10872,7 +11295,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GEBLTTRF_NPVT computes the LU factorization of a block tridiagonal matrix without partial pivoting.\n\n\\details The LU factorization of a block tridiagonal matrix\n\n\\f[\nM = \\left[\\begin{array}{ccccc}\nB_1 & C_1\\\\\nA_1 & B_2 & C_2\\\\\n& \\ddots & \\ddots & \\ddots \\\\\n&  & A_{n-2} & B_{n-1} & C_{n-1}\\\\\n&  &  & A_{n-1} & B_n\n\\end{array}\\right]\n\\f]\n\nwith \\f$n = \\mathrm{nblocks}\\f$ diagonal blocks of size nb, can be represented as\n\n\\f[\nM = \\left[\\begin{array}{cccc}\nL_1 \\\\\nA_1 & L_2\\\\\n& \\ddots & \\ddots \\\\\n&  & A_{n-1} & L_n\n\\end{array}\\right] \\left[\\begin{array}{cccc}\nI & U_1 \\\\\n& \\ddots & \\ddots \\\\\n&  & I & U_{n-1}\\\\\n&  &  & I\n\\end{array}\\right] = LU\n\\f]\n\nwhere the blocks \\f$L_i\\f$ and \\f$U_i\\f$ are also general blocks of size nb.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nnb          rocblas_int. nb >= 0.\\n\nThe number of rows and columns of each block.\n@param[in]\nnblocks     rocblas_int. nblocks >= 0.\\n\nThe number of blocks along the diagonal of the matrix.\n@param[in]\nA           pointer to type. Array on the GPU of dimension lda*nb*(nblocks-1).\\n\nContains the blocks A_i arranged one after the other.\n@param[in]\nlda         rocblas_int. lda >= nb.\\n\nSpecifies the leading dimension of blocks A_i.\n@param[inout]\nB           pointer to type. Array on the GPU of dimension ldb*nb*nblocks.\\n\nOn entry, contains the blocks B_i arranged one after the other.\nOn exit it is overwritten by blocks L_i in factorized form as returned by\n\\ref rocsolver_sgetrf_npvt \"GETRF_NPVT\"\n@param[in]\nldb         rocblas_int. ldb >= nb.\\n\nSpecifies the leading dimension of blocks B_i.\n@param[inout]\nC           pointer to type. Array on the GPU of dimension ldc*nb*(nblocks-1).\\n\nOn entry, contains the blocks C_i arranged one after the other.\nOn exit it is overwritten by blocks U_i.\n@param[in]\nldc         rocblas_int. ldc >= nb.\\n\nSpecifies the leading dimension of blocks C_i.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\\n\nIf info = 0, successful exit.\nIf info = i > 0, the matrix is singular."]
+    #[doc = "! @}\n*! @{\n\\brief GEBLTTRF_NPVT computes the LU factorization of a block tridiagonal matrix without partial pivoting.\n\n\\details The LU factorization of a block tridiagonal matrix\n\n\\f[\nM = \\left[\\begin{array}{ccccc}\nB_1 & C_1\\\\\nA_1 & B_2 & C_2\\\\\n& \\ddots & \\ddots & \\ddots \\\\\n&  & A_{n-2} & B_{n-1} & C_{n-1}\\\\\n&  &  & A_{n-1} & B_n\n\\end{array}\\right]\n\\f]\n\nwith \\f$n = \\mathrm{nblocks}\\f$ diagonal blocks of size nb, can be represented as\n\n\\f[\nM = \\left[\\begin{array}{cccc}\nL_1 \\\\\nA_1 & L_2\\\\\n& \\ddots & \\ddots \\\\\n&  & A_{n-1} & L_n\n\\end{array}\\right] \\left[\\begin{array}{cccc}\nI & U_1 \\\\\n& \\ddots & \\ddots \\\\\n&  & I & U_{n-1}\\\\\n&  &  & I\n\\end{array}\\right] = LU\n\\f]\n\nwhere the blocks \\f$L_i\\f$ and \\f$U_i\\f$ are also general blocks of size nb.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nnb          rocblas_int. nb >= 0.\nThe number of rows and columns of each block.\n@param[in]\nnblocks     rocblas_int. nblocks >= 0.\nThe number of blocks along the diagonal of the matrix.\n@param[in]\nA           pointer to type. Array on the GPU of dimension lda*nb*(nblocks-1).\nContains the blocks A_i arranged one after the other.\n@param[in]\nlda         rocblas_int. lda >= nb.\nSpecifies the leading dimension of blocks A_i.\n@param[inout]\nB           pointer to type. Array on the GPU of dimension ldb*nb*nblocks.\nOn entry, contains the blocks B_i arranged one after the other.\nOn exit it is overwritten by blocks L_i in factorized form as returned by\n\\ref rocsolver_sgetrf_npvt \"GETRF_NPVT\"\n@param[in]\nldb         rocblas_int. ldb >= nb.\nSpecifies the leading dimension of blocks B_i.\n@param[inout]\nC           pointer to type. Array on the GPU of dimension ldc*nb*(nblocks-1).\nOn entry, contains the blocks C_i arranged one after the other.\nOn exit it is overwritten by blocks U_i.\n@param[in]\nldc         rocblas_int. ldc >= nb.\nSpecifies the leading dimension of blocks C_i.\n@param[out]\ninfo        pointer to a rocblas_int on the GPU.\nIf info = 0, successful exit.\nIf info = i > 0, the matrix is singular.\n********************************************************************/"]
     pub fn rocsolver_sgeblttrf_npvt(
         handle: rocblas_handle,
         nb: rocblas_int,
@@ -10933,7 +11356,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GEBLTTRF_NPVT_BATCHED computes the LU factorization of a batch of block tridiagonal matrices without\npartial pivoting.\n\n\\details The LU factorization of a block tridiagonal matrix \\f$M_j\\f$ in the batch\n\n\\f[\nM_j = \\left[\\begin{array}{ccccc}\nB_{j1} & C_{j1}\\\\\nA_{j1} & B_{j2} & C_{j2}\\\\\n& \\ddots & \\ddots & \\ddots \\\\\n&  & A_{j(n-2)} & B_{j(n-1)} & C_{j(n-1)}\\\\\n&  &  & A_{j(n-1)} & B_{jn}\n\\end{array}\\right]\n\\f]\n\nwith \\f$n = \\mathrm{nblocks}\\f$ diagonal blocks of size nb, can be represented as\n\n\\f[\nM_j = \\left[\\begin{array}{cccc}\nL_{j1} \\\\\nA_{j1} & L_{j2}\\\\\n& \\ddots & \\ddots \\\\\n&  & A_{j(n-1)} & L_{jn}\n\\end{array}\\right] \\left[\\begin{array}{cccc}\nI & U_{j1} \\\\\n& \\ddots & \\ddots \\\\\n&  & I & U_{j(n-1)}\\\\\n&  &  & I\n\\end{array}\\right] = L_jU_j\n\\f]\n\nwhere the blocks \\f$L_{ji}\\f$ and \\f$U_{ji}\\f$ are also general blocks of size nb.\n\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nnb          rocblas_int. nb >= 0.\\n\nThe number of rows and columns of each block.\n@param[in]\nnblocks     rocblas_int. nblocks >= 0.\\n\nThe number of blocks along the diagonal of each matrix in the batch.\n@param[in]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension\nlda*nb*(nblocks-1).\\n\nContains the blocks A_{ji} arranged one after the other.\n@param[in]\nlda         rocblas_int. lda >= nb.\\n\nSpecifies the leading dimension of blocks A_{ji}.\n@param[inout]\nB           array of pointers to type. Each pointer points to an array on the GPU of dimension\nldb*nb*nblocks.\\n\nOn entry, contains the blocks B_{ji} arranged one after the other.\nOn exit it is overwritten by blocks L_{ji} in factorized form as returned by\n\\ref rocsolver_sgetrf_npvt \"GETRF_NPVT\"\n@param[in]\nldb         rocblas_int. ldb >= nb.\\n\nSpecifies the leading dimension of blocks B_{ji}.\n@param[inout]\nC           array of pointers to type. Each pointer points to an array on the GPU of dimension\nldc*nb*(nblocks-1).\\n\nOn entry, contains the blocks C_{ji} arranged one after the other.\nOn exit it is overwritten by blocks U_{ji}.\n@param[in]\nldc         rocblas_int. ldc >= nb.\\n\nSpecifies the leading dimension of blocks C_{ji}.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for factorization of j-th batch instance.\nIf info[j] = i > 0, the j-th batch instance is singular.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GEBLTTRF_NPVT_BATCHED computes the LU factorization of a batch of block tridiagonal matrices without\npartial pivoting.\n\n\\details The LU factorization of a block tridiagonal matrix \\f$M_l\\f$ in the batch\n\n\\f[\nM_l = \\left[\\begin{array}{ccccc}\nB_{l1} & C_{l1}\\\\\nA_{l1} & B_{l2} & C_{l2}\\\\\n& \\ddots & \\ddots & \\ddots \\\\\n&  & A_{l(n-2)} & B_{l(n-1)} & C_{l(l-1)}\\\\\n&  &  & A_{l(n-1)} & B_{ln}\n\\end{array}\\right]\n\\f]\n\nwith \\f$n = \\mathrm{nblocks}\\f$ diagonal blocks of size nb, can be represented as\n\n\\f[\nM_l = \\left[\\begin{array}{cccc}\nL_{l1} \\\\\nA_{l1} & L_{l2}\\\\\n& \\ddots & \\ddots \\\\\n&  & A_{l(n-1)} & L_{ln}\n\\end{array}\\right] \\left[\\begin{array}{cccc}\nI & U_{l1} \\\\\n& \\ddots & \\ddots \\\\\n&  & I & U_{l(n-1)}\\\\\n&  &  & I\n\\end{array}\\right] = L_lU_l\n\\f]\n\nwhere the blocks \\f$L_{li}\\f$ and \\f$U_{li}\\f$ are also general blocks of size nb.\n\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nnb          rocblas_int. nb >= 0.\nThe number of rows and columns of each block.\n@param[in]\nnblocks     rocblas_int. nblocks >= 0.\nThe number of blocks along the diagonal of each matrix in the batch.\n@param[in]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension\nlda*nb*(nblocks-1).\nContains the blocks A_{li} arranged one after the other.\n@param[in]\nlda         rocblas_int. lda >= nb.\nSpecifies the leading dimension of blocks A_{li}.\n@param[inout]\nB           array of pointers to type. Each pointer points to an array on the GPU of dimension\nldb*nb*nblocks.\nOn entry, contains the blocks B_{li} arranged one after the other.\nOn exit it is overwritten by blocks L_{li} in factorized form as returned by\n\\ref rocsolver_sgetrf_npvt \"GETRF_NPVT\"\n@param[in]\nldb         rocblas_int. ldb >= nb.\nSpecifies the leading dimension of blocks B_{li}.\n@param[inout]\nC           array of pointers to type. Each pointer points to an array on the GPU of dimension\nldc*nb*(nblocks-1).\nOn entry, contains the blocks C_{li} arranged one after the other.\nOn exit it is overwritten by blocks U_{li}.\n@param[in]\nldc         rocblas_int. ldc >= nb.\nSpecifies the leading dimension of blocks C_{li}.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for factorization of l-th batch instance.\nIf info[l] = i > 0, the l-th batch instance is singular.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgeblttrf_npvt_batched(
         handle: rocblas_handle,
         nb: rocblas_int,
@@ -10998,7 +11421,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GEBLTTRF_NPVT_STRIDED_BATCHED computes the LU factorization of a batch of block tridiagonal\nmatrices without partial pivoting.\n\n\\details The LU factorization of a block tridiagonal matrix \\f$M_j\\f$ in the batch\n\n\\f[\nM_j = \\left[\\begin{array}{ccccc}\nB_{j1} & C_{j1}\\\\\nA_{j1} & B_{j2} & C_{j2}\\\\\n& \\ddots & \\ddots & \\ddots \\\\\n&  & A_{j(n-2)} & B_{j(n-1)} & C_{j(n-1)}\\\\\n&  &  & A_{j(n-1)} & B_{jn}\n\\end{array}\\right]\n\\f]\n\nwith \\f$n = \\mathrm{nblocks}\\f$ diagonal blocks of size nb, can be represented as\n\n\\f[\nM_j = \\left[\\begin{array}{cccc}\nL_{j1} \\\\\nA_{j1} & L_{j2}\\\\\n& \\ddots & \\ddots \\\\\n&  & A_{j(n-1)} & L_{jn}\n\\end{array}\\right] \\left[\\begin{array}{cccc}\nI & U_{j1} \\\\\n& \\ddots & \\ddots \\\\\n&  & I & U_{j(n-1)}\\\\\n&  &  & I\n\\end{array}\\right] = L_jU_j\n\\f]\n\nwhere the blocks \\f$L_{ji}\\f$ and \\f$U_{ji}\\f$ are also general blocks of size nb.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nnb          rocblas_int. nb >= 0.\\n\nThe number of rows and columns of each block.\n@param[in]\nnblocks     rocblas_int. nblocks >= 0.\\n\nThe number of blocks along the diagonal of each matrix in the batch.\n@param[in]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nContains the blocks A_{ji} arranged one after the other.\n@param[in]\nlda         rocblas_int. lda >= nb.\\n\nSpecifies the leading dimension of blocks A_{ji}.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one block A_{ji} to the same block in the next batch\ninstance A_{(j+1)i}.\nThere is no restriction for the value of strideA. Normal use case is strideA >=\nlda*nb*nblocks.\n@param[inout]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\\n\nOn entry, contains the blocks B_{ji} arranged one after the other.\nOn exit it is overwritten by blocks L_{ji} in factorized form as returned by\n\\ref rocsolver_sgetrf_npvt \"GETRF_NPVT\"\n@param[in]\nldb         rocblas_int. ldb >= nb.\\n\nSpecifies the leading dimension of matrix blocks B_{ji}.\n@param[in]\nstrideB     rocblas_stride.\\n\nStride from the start of one block B_{ji} to the same block in the next batch\ninstance B_{(j+1)i}.\nThere is no restriction for the value of strideB. Normal use case is strideB >=\nldb*nb*nblocks.\n@param[inout]\nC           pointer to type. Array on the GPU (the size depends on the value of strideC).\\n\nOn entry, contains the blocks C_{ji} arranged one after the other.\nOn exit it is overwritten by blocks U_{ji}.\n@param[in]\nldc         rocblas_int. ldc >= nb.\\n\nSpecifies the leading dimension of matrix blocks C_{ji}.\n@param[in]\nstrideC     rocblas_stride.\\n\nStride from the start of one block B_{ji} to the same block in the next batch\ninstance B_{(j+1)i}.\nThere is no restriction for the value of strideC. Normal use case is strideC >=\nldc*nb*nblocks.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\\n\nIf info[j] = 0, successful exit for factorization of j-th batch instance.\nIf info[j] = i > 0, the j-th batch instance is singular.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GEBLTTRF_NPVT_STRIDED_BATCHED computes the LU factorization of a batch of block tridiagonal\nmatrices without partial pivoting.\n\n\\details The LU factorization of a block tridiagonal matrix \\f$M_l\\f$ in the batch\n\n\\f[\nM_l = \\left[\\begin{array}{ccccc}\nB_{l1} & C_{l1}\\\\\nA_{l1} & B_{l2} & C_{l2}\\\\\n& \\ddots & \\ddots & \\ddots \\\\\n&  & A_{l(n-2)} & B_{l(n-1)} & C_{l(n-1)}\\\\\n&  &  & A_{l(n-1)} & B_{ln}\n\\end{array}\\right]\n\\f]\n\nwith \\f$n = \\mathrm{nblocks}\\f$ diagonal blocks of size nb, can be represented as\n\n\\f[\nM_l = \\left[\\begin{array}{cccc}\nL_{l1} \\\\\nA_{l1} & L_{l2}\\\\\n& \\ddots & \\ddots \\\\\n&  & A_{l(n-1)} & L_{ln}\n\\end{array}\\right] \\left[\\begin{array}{cccc}\nI & U_{l1} \\\\\n& \\ddots & \\ddots \\\\\n&  & I & U_{l(n-1)}\\\\\n&  &  & I\n\\end{array}\\right] = L_lU_l\n\\f]\n\nwhere the blocks \\f$L_{li}\\f$ and \\f$U_{li}\\f$ are also general blocks of size nb.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nnb          rocblas_int. nb >= 0.\nThe number of rows and columns of each block.\n@param[in]\nnblocks     rocblas_int. nblocks >= 0.\nThe number of blocks along the diagonal of each matrix in the batch.\n@param[in]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nContains the blocks A_{li} arranged one after the other.\n@param[in]\nlda         rocblas_int. lda >= nb.\nSpecifies the leading dimension of blocks A_{li}.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one block A_{li} to the same block in the next batch\ninstance A_{(l+1)i}.\nThere is no restriction for the value of strideA. Normal use case is strideA >=\nlda*nb*nblocks.\n@param[inout]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\nOn entry, contains the blocks B_{li} arranged one after the other.\nOn exit it is overwritten by blocks L_{li} in factorized form as returned by\n\\ref rocsolver_sgetrf_npvt \"GETRF_NPVT\"\n@param[in]\nldb         rocblas_int. ldb >= nb.\nSpecifies the leading dimension of matrix blocks B_{li}.\n@param[in]\nstrideB     rocblas_stride.\nStride from the start of one block B_{li} to the same block in the next batch\ninstance B_{(l+1)i}.\nThere is no restriction for the value of strideB. Normal use case is strideB >=\nldb*nb*nblocks.\n@param[inout]\nC           pointer to type. Array on the GPU (the size depends on the value of strideC).\nOn entry, contains the blocks C_{li} arranged one after the other.\nOn exit it is overwritten by blocks U_{li}.\n@param[in]\nldc         rocblas_int. ldc >= nb.\nSpecifies the leading dimension of matrix blocks C_{li}.\n@param[in]\nstrideC     rocblas_stride.\nStride from the start of one block B_{li} to the same block in the next batch\ninstance B_{(l+1)i}.\nThere is no restriction for the value of strideC. Normal use case is strideC >=\nldc*nb*nblocks.\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for factorization of l-th batch instance.\nIf info[l] = i > 0, the l-th batch instance is singular.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgeblttrf_npvt_strided_batched(
         handle: rocblas_handle,
         nb: rocblas_int,
@@ -11075,7 +11498,96 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GEBLTTRS_NPVT solves a system of linear equations given by a block tridiagonal matrix\nin its factorized form (without partial pivoting).\n\n\\details The linear system has the form\n\n\\f[\nMX = \\left[\\begin{array}{ccccc}\nB_1 & C_1\\\\\nA_1 & B_2 & C_2\\\\\n& \\ddots & \\ddots & \\ddots \\\\\n&  & A_{n-2} & B_{n-1} & C_{n-1}\\\\\n&  &  & A_{n-1} & B_n\n\\end{array}\\right]\\left[\\begin{array}{c}\nX_1\\\\\nX_2\\\\\nX_3\\\\\n\\vdots\\\\\nX_n\n\\end{array}\\right]=\\left[\\begin{array}{c}\nR_1\\\\\nR_2\\\\\nR_3\\\\\n\\vdots\\\\\nR_n\n\\end{array}\\right]=R\n\\f]\n\nwhere matrix M has \\f$n = \\mathrm{nblocks}\\f$ diagonal blocks of size nb, and the right-hand-side\nblocks \\f$R_i\\f$ are general blocks of size nb-by-nrhs. The blocks of matrix M should be in\nthe factorized form as returned by \\ref rocsolver_sgeblttrf_npvt \"GEBLTTRF_NPVT\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nnb          rocblas_int. nb >= 0.\\n\nThe number of rows and columns of each block.\n@param[in]\nnblocks     rocblas_int. nblocks >= 0.\\n\nThe number of blocks along the diagonal of the matrix.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\\n\nThe number of right hand sides, i.e., the number of columns of blocks R_i.\n@param[in]\nA           pointer to type. Array on the GPU of dimension lda*nb*(nblocks-1).\\n\nContains the blocks A_i as returned by \\ref rocsolver_sgeblttrf_npvt \"GEBLTTRF_NPVT\".\n@param[in]\nlda         rocblas_int. lda >= nb.\\n\nSpecifies the leading dimension of blocks A_i.\n@param[in]\nB           pointer to type. Array on the GPU of dimension ldb*nb*nblocks.\\n\nContains the blocks B_i as returned by \\ref rocsolver_sgeblttrf_npvt \"GEBLTTRF_NPVT\".\n@param[in]\nldb         rocblas_int. ldb >= nb.\\n\nSpecifies the leading dimension of blocks B_i.\n@param[in]\nC           pointer to type. Array on the GPU of dimension ldc*nb*(nblocks-1).\\n\nContains the blocks C_i as returned by \\ref rocsolver_sgeblttrf_npvt \"GEBLTTRF_NPVT\".\n@param[in]\nldc         rocblas_int. ldc >= nb.\\n\nSpecifies the leading dimension of blocks C_i.\n@param[inout]\nX           pointer to type. Array on the GPU of dimension ldx*nblocks*nrhs.\\n\nOn entry, X contains the right-hand-side blocks R_i. It is overwritten by solution\nvectors X_i on exit.\n@param[in]\nldx         rocblas_int. ldx >= nb.\\n\nSpecifies the leading dimension of blocks X_i."]
+    #[doc = " @{\n\\brief GEBLTTRF_NPVT_INTERLEAVED_BATCHED computes the LU factorization of a batch of block tridiagonal\nmatrices without partial pivoting.\n\n\\details The LU factorization of a block tridiagonal matrix \\f$M_l\\f$ in the batch\n\n\\f[\nM_l = \\left[\\begin{array}{ccccc}\nB_{l1} & C_{l1}\\\\\nA_{l1} & B_{l2} & C_{l2}\\\\\n& \\ddots & \\ddots & \\ddots \\\\\n&  & A_{l(n-2)} & B_{l(n-1)} & C_{l(n-1)}\\\\\n&  &  & A_{l(n-1)} & B_{ln}\n\\end{array}\\right]\n\\f]\n\nwith \\f$n = \\mathrm{nblocks}\\f$ diagonal blocks of size nb, can be represented as\n\n\\f[\nM_l = \\left[\\begin{array}{cccc}\nL_{l1} \\\\\nA_{l1} & L_{l2}\\\\\n& \\ddots & \\ddots \\\\\n&  & A_{l(n-1)} & L_{ln}\n\\end{array}\\right] \\left[\\begin{array}{cccc}\nI & U_{l1} \\\\\n& \\ddots & \\ddots \\\\\n&  & I & U_{l(n-1)}\\\\\n&  &  & I\n\\end{array}\\right] = L_lU_l\n\\f]\n\nwhere the blocks \\f$L_{li}\\f$ and \\f$U_{li}\\f$ are also general blocks of size nb.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nnb          rocblas_int. nb >= 0.\nThe number of rows and columns of each block.\n@param[in]\nnblocks     rocblas_int. nblocks >= 0.\nThe number of blocks along the diagonal of each matrix in the batch.\n@param[in]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nContains the blocks A_{li} arranged one after the other.\n@param[in]\ninca        rocblas_int. inca > 0.\nStride from the start of one row of A_{li} to the next. Normal use cases are\ninca = 1 (strided batched case) or inca = batch_count (interleaved batched case).\n@param[in]\nlda         rocblas_int. lda >= inca * nb.\nSpecifies the leading dimension of blocks A_{li}, i.e. the stride from the start\nof one column of A_{li} to the next.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one block A_{li} to the same block in the next batch\ninstance A_{(l+1)i}.\nThere is no restriction for the value of strideA. Normal use cases are strideA >=\nlda*nb*nblocks (strided batched case) or strideA = 1 (interleaved batched case).\n@param[inout]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\nOn entry, contains the blocks B_{li} arranged one after the other.\nOn exit it is overwritten by blocks L_{li} in factorized form as returned by\n\\ref rocsolver_sgetrf_npvt \"GETRF_NPVT\"\n@param[in]\nincb        rocblas_int. incb > 0.\nStride from the start of one row of B_{li} to the next. Normal use cases are\nincb = 1 (strided batched case) or incb = batch_count (interleaved batched case).\n@param[in]\nldb         rocblas_int. ldb >= incb * nb.\nSpecifies the leading dimension of blocks B_{li}, i.e. the stride from the start\nof one column of B_{li} to the next.\n@param[in]\nstrideB     rocblas_stride.\nStride from the start of one block B_{li} to the same block in the next batch\ninstance B_{(l+1)i}.\nThere is no restriction for the value of strideB. Normal use cases are strideB >=\nldb*nb*nblocks (strided batched case) or strideB = 1 (interleaved batched case).\n@param[inout]\nC           pointer to type. Array on the GPU (the size depends on the value of strideC).\nOn entry, contains the blocks C_{li} arranged one after the other.\nOn exit it is overwritten by blocks U_{li}.\n@param[in]\nincc        rocblas_int. incc > 0.\nStride from the start of one row of C_{li} to the next. Normal use cases are\nincc = 1 (strided batched case) or incc = batch_count (interleaved batched case).\n@param[in]\nldc         rocblas_int. ldc >= incc * nb.\nSpecifies the leading dimension of blocks C_{li}, i.e. the stride from the start\nof one column of C_{li} to the next.\n@param[in]\nstrideC     rocblas_stride.\nStride from the start of one block B_{li} to the same block in the next batch\ninstance B_{(l+1)i}.\nThere is no restriction for the value of strideC. Normal use cases are strideC >=\nldc*nb*nblocks (strided batched case) or strideC = 1 (interleaved batched case).\n@param[out]\ninfo        pointer to rocblas_int. Array of batch_count integers on the GPU.\nIf info[l] = 0, successful exit for factorization of l-th batch instance.\nIf info[l] = i > 0, the l-th batch instance is singular.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
+    pub fn rocsolver_sgeblttrf_npvt_interleaved_batched(
+        handle: rocblas_handle,
+        nb: rocblas_int,
+        nblocks: rocblas_int,
+        A: *mut f32,
+        inca: rocblas_int,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        B: *mut f32,
+        incb: rocblas_int,
+        ldb: rocblas_int,
+        strideB: rocblas_stride,
+        C: *mut f32,
+        incc: rocblas_int,
+        ldc: rocblas_int,
+        strideC: rocblas_stride,
+        info: *mut rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsolver_dgeblttrf_npvt_interleaved_batched(
+        handle: rocblas_handle,
+        nb: rocblas_int,
+        nblocks: rocblas_int,
+        A: *mut f64,
+        inca: rocblas_int,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        B: *mut f64,
+        incb: rocblas_int,
+        ldb: rocblas_int,
+        strideB: rocblas_stride,
+        C: *mut f64,
+        incc: rocblas_int,
+        ldc: rocblas_int,
+        strideC: rocblas_stride,
+        info: *mut rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsolver_cgeblttrf_npvt_interleaved_batched(
+        handle: rocblas_handle,
+        nb: rocblas_int,
+        nblocks: rocblas_int,
+        A: *mut rocblas_float_complex,
+        inca: rocblas_int,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        B: *mut rocblas_float_complex,
+        incb: rocblas_int,
+        ldb: rocblas_int,
+        strideB: rocblas_stride,
+        C: *mut rocblas_float_complex,
+        incc: rocblas_int,
+        ldc: rocblas_int,
+        strideC: rocblas_stride,
+        info: *mut rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsolver_zgeblttrf_npvt_interleaved_batched(
+        handle: rocblas_handle,
+        nb: rocblas_int,
+        nblocks: rocblas_int,
+        A: *mut rocblas_double_complex,
+        inca: rocblas_int,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        B: *mut rocblas_double_complex,
+        incb: rocblas_int,
+        ldb: rocblas_int,
+        strideB: rocblas_stride,
+        C: *mut rocblas_double_complex,
+        incc: rocblas_int,
+        ldc: rocblas_int,
+        strideC: rocblas_stride,
+        info: *mut rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief GEBLTTRS_NPVT solves a system of linear equations given by a block tridiagonal matrix\nin its factorized form (without partial pivoting).\n\n\\details The linear system has the form\n\n\\f[\nMX = \\left[\\begin{array}{ccccc}\nB_1 & C_1\\\\\nA_1 & B_2 & C_2\\\\\n& \\ddots & \\ddots & \\ddots \\\\\n&  & A_{n-2} & B_{n-1} & C_{n-1}\\\\\n&  &  & A_{n-1} & B_n\n\\end{array}\\right]\\left[\\begin{array}{c}\nX_1\\\\\nX_2\\\\\nX_3\\\\\n\\vdots\\\\\nX_n\n\\end{array}\\right]=\\left[\\begin{array}{c}\nR_1\\\\\nR_2\\\\\nR_3\\\\\n\\vdots\\\\\nR_n\n\\end{array}\\right]=R\n\\f]\n\nwhere matrix M has \\f$n = \\mathrm{nblocks}\\f$ diagonal blocks of size nb, and the right-hand-side\nblocks \\f$R_i\\f$ are general blocks of size nb-by-nrhs. The blocks of matrix M should be in\nthe factorized form as returned by \\ref rocsolver_sgeblttrf_npvt \"GEBLTTRF_NPVT\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nnb          rocblas_int. nb >= 0.\nThe number of rows and columns of each block.\n@param[in]\nnblocks     rocblas_int. nblocks >= 0.\nThe number of blocks along the diagonal of the matrix.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\nThe number of right hand sides, i.e., the number of columns of blocks R_i.\n@param[in]\nA           pointer to type. Array on the GPU of dimension lda*nb*(nblocks-1).\nContains the blocks A_i as returned by \\ref rocsolver_sgeblttrf_npvt \"GEBLTTRF_NPVT\".\n@param[in]\nlda         rocblas_int. lda >= nb.\nSpecifies the leading dimension of blocks A_i.\n@param[in]\nB           pointer to type. Array on the GPU of dimension ldb*nb*nblocks.\nContains the blocks B_i as returned by \\ref rocsolver_sgeblttrf_npvt \"GEBLTTRF_NPVT\".\n@param[in]\nldb         rocblas_int. ldb >= nb.\nSpecifies the leading dimension of blocks B_i.\n@param[in]\nC           pointer to type. Array on the GPU of dimension ldc*nb*(nblocks-1).\nContains the blocks C_i as returned by \\ref rocsolver_sgeblttrf_npvt \"GEBLTTRF_NPVT\".\n@param[in]\nldc         rocblas_int. ldc >= nb.\nSpecifies the leading dimension of blocks C_i.\n@param[inout]\nX           pointer to type. Array on the GPU of dimension ldx*nblocks*nrhs.\nOn entry, X contains the right-hand-side blocks R_i. It is overwritten by solution\nvectors X_i on exit.\n@param[in]\nldx         rocblas_int. ldx >= nb.\nSpecifies the leading dimension of blocks X_i."]
     pub fn rocsolver_sgeblttrs_npvt(
         handle: rocblas_handle,
         nb: rocblas_int,
@@ -11144,7 +11656,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GEBLTTRS_NPVT_BATCHED solves a batch of system of linear equations given by block tridiagonal\nmatrices in its factorized form (without partial pivoting).\n\n\\details Each linear system has the form\n\n\\f[\nM_jX_j = \\left[\\begin{array}{ccccc}\nB_{j1} & C_{j1}\\\\\nA_{j1} & B_{j2} & C_{j2}\\\\\n& \\ddots & \\ddots & \\ddots \\\\\n&  & A_{j(n-2)} & B_{j(n-1)} & C_{j(n-1)}\\\\\n&  &  & A_{j(n-1)} & B_{jn}\n\\end{array}\\right]\\left[\\begin{array}{c}\nX_{j1}\\\\\nX_{j2}\\\\\nX_{j3}\\\\\n\\vdots\\\\\nX_{jn}\n\\end{array}\\right]=\\left[\\begin{array}{c}\nR_{j1}\\\\\nR_{j2}\\\\\nR_{j3}\\\\\n\\vdots\\\\\nR_{jn}\n\\end{array}\\right]=R_j\n\\f]\n\nwhere matrix \\f$M_j\\f$ has \\f$n = \\mathrm{nblocks}\\f$ diagonal blocks of size nb, and the right-hand-side\nblocks \\f$R_{ji}\\f$ are general blocks of size nb-by-nrhs. The blocks of matrix \\f$M_j\\f$ should be in\nthe factorized form as returned by \\ref rocsolver_sgeblttrf_npvt_batched \"GEBLTTRF_NPVT_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nnb          rocblas_int. nb >= 0.\\n\nThe number of rows and columns of each block.\n@param[in]\nnblocks     rocblas_int. nblocks >= 0.\\n\nThe number of blocks along the diagonal of each matrix in the batch.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\\n\nThe number of right hand sides, i.e., the number of columns of blocks R_{ji}.\n@param[in]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension\nlda*nb*(nblocks-1).\\n\nContains the blocks A_{ji} as returned by \\ref rocsolver_sgeblttrf_npvt_batched \"GEBLTTRF_NPVT_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= nb.\\n\nSpecifies the leading dimension of blocks A_{ji}.\n@param[in]\nB           array of pointers to type. Each pointer points to an array on the GPU of dimension\nlda*nb*nblocks.\\n\nContains the blocks B_{ji} as returned by \\ref rocsolver_sgeblttrf_npvt_batched \"GEBLTTRF_NPVT_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= nb.\\n\nSpecifies the leading dimension of blocks B_{ji}.\n@param[in]\nC           array of pointers to type. Each pointer points to an array on the GPU of dimension\nldc*nb*(nblocks-1).\\n\nContains the blocks C_{ji} as returned by \\ref rocsolver_sgeblttrf_npvt_batched \"GEBLTTRF_NPVT_BATCHED\".\n@param[in]\nldc         rocblas_int. ldc >= nb.\\n\nSpecifies the leading dimension of blocks C_{ji}.\n@param[inout]\nX           array of pointers to type. Each pointer points to an array on the GPU of dimension\nldx*nblocks*nrhs.\\n\nOn entry, X contains the right-hand-side blocks R_{ji}. It is overwritten by solution\nvectors X_{ji} on exit.\n@param[in]\nldx         rocblas_int. ldx >= nb.\\n\nSpecifies the leading dimension of blocks X_{ji}.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GEBLTTRS_NPVT_BATCHED solves a batch of system of linear equations given by block tridiagonal\nmatrices in its factorized form (without partial pivoting).\n\n\\details Each linear system has the form\n\n\\f[\nM_lX_l = \\left[\\begin{array}{ccccc}\nB_{l1} & C_{l1}\\\\\nA_{l1} & B_{l2} & C_{l2}\\\\\n& \\ddots & \\ddots & \\ddots \\\\\n&  & A_{l(n-2)} & B_{l(n-1)} & C_{l(n-1)}\\\\\n&  &  & A_{l(n-1)} & B_{ln}\n\\end{array}\\right]\\left[\\begin{array}{c}\nX_{l1}\\\\\nX_{l2}\\\\\nX_{l3}\\\\\n\\vdots\\\\\nX_{ln}\n\\end{array}\\right]=\\left[\\begin{array}{c}\nR_{l1}\\\\\nR_{l2}\\\\\nR_{l3}\\\\\n\\vdots\\\\\nR_{ln}\n\\end{array}\\right]=R_l\n\\f]\n\nwhere matrix \\f$M_l\\f$ has \\f$n = \\mathrm{nblocks}\\f$ diagonal blocks of size nb, and the right-hand-side\nblocks \\f$R_{li}\\f$ are general blocks of size nb-by-nrhs. The blocks of matrix \\f$M_l\\f$ should be in\nthe factorized form as returned by \\ref rocsolver_sgeblttrf_npvt_batched \"GEBLTTRF_NPVT_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nnb          rocblas_int. nb >= 0.\nThe number of rows and columns of each block.\n@param[in]\nnblocks     rocblas_int. nblocks >= 0.\nThe number of blocks along the diagonal of each matrix in the batch.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\nThe number of right hand sides, i.e., the number of columns of blocks R_{li}.\n@param[in]\nA           array of pointers to type. Each pointer points to an array on the GPU of dimension\nlda*nb*(nblocks-1).\nContains the blocks A_{li} as returned by \\ref rocsolver_sgeblttrf_npvt_batched \"GEBLTTRF_NPVT_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= nb.\nSpecifies the leading dimension of blocks A_{li}.\n@param[in]\nB           array of pointers to type. Each pointer points to an array on the GPU of dimension\nlda*nb*nblocks.\nContains the blocks B_{li} as returned by \\ref rocsolver_sgeblttrf_npvt_batched \"GEBLTTRF_NPVT_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= nb.\nSpecifies the leading dimension of blocks B_{li}.\n@param[in]\nC           array of pointers to type. Each pointer points to an array on the GPU of dimension\nldc*nb*(nblocks-1).\nContains the blocks C_{li} as returned by \\ref rocsolver_sgeblttrf_npvt_batched \"GEBLTTRF_NPVT_BATCHED\".\n@param[in]\nldc         rocblas_int. ldc >= nb.\nSpecifies the leading dimension of blocks C_{li}.\n@param[inout]\nX           array of pointers to type. Each pointer points to an array on the GPU of dimension\nldx*nblocks*nrhs.\nOn entry, X contains the right-hand-side blocks R_{li}. It is overwritten by solution\nvectors X_{li} on exit.\n@param[in]\nldx         rocblas_int. ldx >= nb.\nSpecifies the leading dimension of blocks X_{li}.\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgeblttrs_npvt_batched(
         handle: rocblas_handle,
         nb: rocblas_int,
@@ -11217,7 +11729,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief GEBLTTRS_NPVT_STRIDED_BATCHED solves a batch of system of linear equations given by block\ntridiagonal matrices in its factorized form (without partial pivoting).\n\n\\details Each linear system has the form\n\n\\f[\nM_jX_j = \\left[\\begin{array}{ccccc}\nB_{j1} & C_{j1}\\\\\nA_{j1} & B_{j2} & C_{j2}\\\\\n& \\ddots & \\ddots & \\ddots \\\\\n&  & A_{j(n-2)} & B_{j(n-1)} & C_{j(n-1)}\\\\\n&  &  & A_{j(n-1)} & B_{jn}\n\\end{array}\\right]\\left[\\begin{array}{c}\nX_{j1}\\\\\nX_{j2}\\\\\nX_{j3}\\\\\n\\vdots\\\\\nX_{jn}\n\\end{array}\\right]=\\left[\\begin{array}{c}\nR_{j1}\\\\\nR_{j2}\\\\\nR_{j3}\\\\\n\\vdots\\\\\nR_{jn}\n\\end{array}\\right]=R_j\n\\f]\n\nwhere matrix \\f$M_j\\f$ has \\f$n = \\mathrm{nblocks}\\f$ diagonal blocks of size nb, and the right-hand-side\nblocks \\f$R_{ji}\\f$ are general blocks of size nb-by-nrhs. The blocks of matrix \\f$M_j\\f$ should be in\nthe factorized form as returned by \\ref rocsolver_sgeblttrf_npvt_strided_batched \"GEBLTTRF_NPVT_STRIDED_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nnb          rocblas_int. nb >= 0.\\n\nThe number of rows and columns of each block.\n@param[in]\nnblocks     rocblas_int. nblocks >= 0.\\n\nThe number of blocks along the diagonal of each matrix in the batch.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\\n\nThe number of right hand sides, i.e., the number of columns of blocks R_{ji}.\n@param[in]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\\n\nContains the blocks A_{ji} as returned by \\ref rocsolver_sgeblttrf_npvt_strided_batched \"GEBLTTRF_NPVT_STRIDED_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= nb.\\n\nSpecifies the leading dimension of blocks A_{ji}.\n@param[in]\nstrideA     rocblas_stride.\\n\nStride from the start of one block A_{ji} to the same block in the next batch\ninstance A_{(j+1)i}.\nThere is no restriction for the value of strideA. Normal use case is strideA >=\nlda*nb*nblocks\n@param[in]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\\n\nContains the blocks B_{ji} as returned by \\ref rocsolver_sgeblttrf_npvt_strided_batched \"GEBLTTRF_NPVT_STRIDED_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= nb.\\n\nSpecifies the leading dimension of blocks B_{ji}.\n@param[in]\nstrideB     rocblas_stride.\\n\nStride from the start of one block B_{ji} to the same block in the next batch\ninstance B_{(j+1)i}.\nThere is no restriction for the value of strideB. Normal use case is strideB >=\nldb*nb*nblocks\n@param[in]\nC           pointer to type. Array on the GPU (the size depends on the value of strideC).\\n\nContains the blocks C_{ji} as returned by \\ref rocsolver_sgeblttrf_npvt_strided_batched \"GEBLTTRF_NPVT_STRIDED_BATCHED\".\n@param[in]\nldc         rocblas_int. ldc >= nb.\\n\nSpecifies the leading dimension of blocks C_{ji}.\n@param[in]\nstrideC     rocblas_stride.\\n\nStride from the start of one block C_{ji} to the same block in the next batch\ninstance C_{(j+1)i}.\nThere is no restriction for the value of strideC. Normal use case is strideC >=\nldc*nb*nblocks\n@param[inout]\nX           pointer to type. Array on the GPU (the size depends on the value of strideX).\\n\nOn entry, X contains the right-hand-side blocks R_{ji}. It is overwritten by solution\nvectors X_{ji} on exit.\n@param[in]\nldx         rocblas_int. ldx >= nb.\\n\nSpecifies the leading dimension of blocks X_{ji}.\n@param[in]\nstrideX     rocblas_stride.\\n\nStride from the start of one block X_{ji} to the same block in the next batch\ninstance X_{(j+1)i}.\nThere is no restriction for the value of strideX. Normal use case is strideX >=\nldx*nblocks*nrhs\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\\n\nNumber of matrices in the batch."]
+    #[doc = " @{\n\\brief GEBLTTRS_NPVT_STRIDED_BATCHED solves a batch of system of linear equations given by block\ntridiagonal matrices in its factorized form (without partial pivoting).\n\n\\details Each linear system has the form\n\n\\f[\nM_lX_l = \\left[\\begin{array}{ccccc}\nB_{l1} & C_{l1}\\\\\nA_{l1} & B_{l2} & C_{l2}\\\\\n& \\ddots & \\ddots & \\ddots \\\\\n&  & A_{l(n-2)} & B_{l(n-1)} & C_{l(n-1)}\\\\\n&  &  & A_{l(n-1)} & B_{ln}\n\\end{array}\\right]\\left[\\begin{array}{c}\nX_{l1}\\\\\nX_{l2}\\\\\nX_{l3}\\\\\n\\vdots\\\\\nX_{ln}\n\\end{array}\\right]=\\left[\\begin{array}{c}\nR_{l1}\\\\\nR_{l2}\\\\\nR_{l3}\\\\\n\\vdots\\\\\nR_{ln}\n\\end{array}\\right]=R_l\n\\f]\n\nwhere matrix \\f$M_l\\f$ has \\f$n = \\mathrm{nblocks}\\f$ diagonal blocks of size nb, and the right-hand-side\nblocks \\f$R_{li}\\f$ are general blocks of size nb-by-nrhs. The blocks of matrix \\f$M_l\\f$ should be in\nthe factorized form as returned by \\ref rocsolver_sgeblttrf_npvt_strided_batched \"GEBLTTRF_NPVT_STRIDED_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nnb          rocblas_int. nb >= 0.\nThe number of rows and columns of each block.\n@param[in]\nnblocks     rocblas_int. nblocks >= 0.\nThe number of blocks along the diagonal of each matrix in the batch.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\nThe number of right hand sides, i.e., the number of columns of blocks R_{li}.\n@param[in]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nContains the blocks A_{li} as returned by \\ref rocsolver_sgeblttrf_npvt_strided_batched \"GEBLTTRF_NPVT_STRIDED_BATCHED\".\n@param[in]\nlda         rocblas_int. lda >= nb.\nSpecifies the leading dimension of blocks A_{li}.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one block A_{li} to the same block in the next batch\ninstance A_{(l+1)i}.\nThere is no restriction for the value of strideA. Normal use case is strideA >=\nlda*nb*nblocks\n@param[in]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\nContains the blocks B_{li} as returned by \\ref rocsolver_sgeblttrf_npvt_strided_batched \"GEBLTTRF_NPVT_STRIDED_BATCHED\".\n@param[in]\nldb         rocblas_int. ldb >= nb.\nSpecifies the leading dimension of blocks B_{li}.\n@param[in]\nstrideB     rocblas_stride.\nStride from the start of one block B_{li} to the same block in the next batch\ninstance B_{(l+1)i}.\nThere is no restriction for the value of strideB. Normal use case is strideB >=\nldb*nb*nblocks\n@param[in]\nC           pointer to type. Array on the GPU (the size depends on the value of strideC).\nContains the blocks C_{li} as returned by \\ref rocsolver_sgeblttrf_npvt_strided_batched \"GEBLTTRF_NPVT_STRIDED_BATCHED\".\n@param[in]\nldc         rocblas_int. ldc >= nb.\nSpecifies the leading dimension of blocks C_{li}.\n@param[in]\nstrideC     rocblas_stride.\nStride from the start of one block C_{li} to the same block in the next batch\ninstance C_{(l+1)i}.\nThere is no restriction for the value of strideC. Normal use case is strideC >=\nldc*nb*nblocks\n@param[inout]\nX           pointer to type. Array on the GPU (the size depends on the value of strideX).\nOn entry, X contains the right-hand-side blocks R_{li}. It is overwritten by solution\nvectors X_{li} on exit.\n@param[in]\nldx         rocblas_int. ldx >= nb.\nSpecifies the leading dimension of blocks X_{li}.\n@param[in]\nstrideX     rocblas_stride.\nStride from the start of one block X_{li} to the same block in the next batch\ninstance X_{(l+1)i}.\nThere is no restriction for the value of strideX. Normal use case is strideX >=\nldx*nblocks*nrhs\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
     pub fn rocsolver_sgeblttrs_npvt_strided_batched(
         handle: rocblas_handle,
         nb: rocblas_int,
@@ -11306,7 +11818,112 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\brief CREATE_RFINFO initializes the structure rfinfo, required by the re-factorization functions\n\\ref rocsolver_scsrrf_refactlu \"CSRRF_REFACTLU\" and \\ref rocsolver_scsrrf_solve \"CSRRF_SOLVE\",\nthat contains the meta data and descriptors of the involved matrices.\n\n\\details\n@param[out]\nrfinfo      #rocsolver_rfinfo.\\n\nThe pointer to the rfinfo struct to be initialized.\n@param[in]\nhandle      rocblas_handle.\\n"]
+    #[doc = " @{\n\\brief GEBLTTRS_NPVT_INTERLEAVED_BATCHED solves a batch of system of linear equations given by block\ntridiagonal matrices in its factorized form (without partial pivoting).\n\n\\details Each linear system has the form\n\n\\f[\nM_lX_l = \\left[\\begin{array}{ccccc}\nB_{l1} & C_{ll}\\\\\nA_{l1} & B_{ll} & C_{ll}\\\\\n& \\ddots & \\ddots & \\ddots \\\\\n&  & A_{l(n-2)} & B_{l(n-1)} & C_{l(n-1)}\\\\\n&  &  & A_{l(n-1)} & B_{ln}\n\\end{array}\\right]\\left[\\begin{array}{c}\nX_{l1}\\\\\nX_{l2}\\\\\nX_{l3}\\\\\n\\vdots\\\\\nX_{ln}\n\\end{array}\\right]=\\left[\\begin{array}{c}\nR_{l1}\\\\\nR_{l2}\\\\\nR_{l3}\\\\\n\\vdots\\\\\nR_{ln}\n\\end{array}\\right]=R_l\n\\f]\n\nwhere matrix \\f$M_l\\f$ has \\f$n = \\mathrm{nblocks}\\f$ diagonal blocks of size nb, and the right-hand-side\nblocks \\f$R_{li}\\f$ are general blocks of size nb-by-nrhs. The blocks of matrix \\f$M_l\\f$ should be in\nthe factorized form as returned by \\ref rocsolver_sgeblttrf_npvt_interleaved_batched \"GEBLTTRF_NPVT_INTERLEAVED_BATCHED\".\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nnb          rocblas_int. nb >= 0.\nThe number of rows and columns of each block.\n@param[in]\nnblocks     rocblas_int. nblocks >= 0.\nThe number of blocks along the diagonal of each matrix in the batch.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\nThe number of right hand sides, i.e., the number of columns of blocks R_{li}.\n@param[in]\nA           pointer to type. Array on the GPU (the size depends on the value of strideA).\nContains the blocks A_{li} as returned by \\ref rocsolver_sgeblttrf_npvt_interleaved_batched \"GEBLTTRF_NPVT_INTERLEAVED_BATCHED\".\n@param[in]\ninca        rocblas_int. inca > 0.\nStride from the start of one row of A_{li} to the next. Normal use cases are\ninca = 1 (strided batched case) or inca = batch_count (interleaved batched case).\n@param[in]\nlda         rocblas_int. lda >= inca * nb.\nSpecifies the leading dimension of blocks A_{li}, i.e. the stride from the start\nof one column of A_{li} to the next.\n@param[in]\nstrideA     rocblas_stride.\nStride from the start of one block A_{li} to the same block in the next batch\ninstance A_{(l+1)i}.\nThere is no restriction for the value of strideA. Normal use cases are strideA >=\nlda*nb*nblocks (strided batched case) or strideA = 1 (interleaved batched case).\n@param[in]\nB           pointer to type. Array on the GPU (the size depends on the value of strideB).\nContains the blocks B_{li} as returned by \\ref rocsolver_sgeblttrf_npvt_interleaved_batched \"GEBLTTRF_NPVT_INTERLEAVED_BATCHED\".\n@param[in]\nincb        rocblas_int. incb > 0.\nStride from the start of one row of B_{li} to the next. Normal use cases are\nincb = 1 (strided batched case) or incb = batch_count (interleaved batched case).\n@param[in]\nldb         rocblas_int. ldb >= incb * nb.\nSpecifies the leading dimension of blocks B_{li}, i.e. the stride from the start\nof one column of B_{li} to the next.\n@param[in]\nstrideB     rocblas_stride.\nStride from the start of one block B_{li} to the same block in the next batch\ninstance B_{(l+1)i}.\nThere is no restriction for the value of strideB. Normal use cases are strideB >=\nldb*nb*nblocks (strided batched case) or strideB = 1 (interleaved batched case).\n@param[in]\nC           pointer to type. Array on the GPU (the size depends on the value of strideC).\nContains the blocks C_{li} as returned by \\ref rocsolver_sgeblttrf_npvt_interleaved_batched \"GEBLTTRF_NPVT_INTERLEAVED_BATCHED\".\n@param[in]\nincc        rocblas_int. incc > 0.\nStride from the start of one row of C_{li} to the next. Normal use cases are\nincc = 1 (strided batched case) or incc = batch_count (interleaved batched case).\n@param[in]\nldc         rocblas_int. ldc >= incc * nb.\nSpecifies the leading dimension of blocks C_{li}, i.e. the stride from the start\nof one column of C_{li} to the next.\n@param[in]\nstrideC     rocblas_stride.\nStride from the start of one block C_{li} to the same block in the next batch\ninstance C_{(l+1)i}.\nThere is no restriction for the value of strideC. Normal use cases are strideC >=\nldc*nb*nblocks (strided batched case) or strideC = 1 (interleaved batched case).\n@param[inout]\nX           pointer to type. Array on the GPU (the size depends on the value of strideX).\nOn entry, X contains the right-hand-side blocks R_{li}. It is overwritten by solution\nvectors X_{li} on exit.\n@param[in]\nincx        rocblas_int. incx > 0.\nStride from the start of one row of X_{li} to the next. Normal use cases are\nincx = 1 (strided batched case) or incx = batch_count (interleaved batched case).\n@param[in]\nldx         rocblas_int. ldx >= incx * nb.\nSpecifies the leading dimension of blocks X_{li}, i.e. the stride from the start\nof one column of X_{li} to the next.\n@param[in]\nstrideX     rocblas_stride.\nStride from the start of one block X_{li} to the same block in the next batch\ninstance X_{(l+1)i}.\nThere is no restriction for the value of strideX. Normal use cases are strideX >=\nldx*nrhs*nblocks (strided batched case) or strideX = 1 (interleaved batched case).\n@param[in]\nbatch_count rocblas_int. batch_count >= 0.\nNumber of matrices in the batch."]
+    pub fn rocsolver_sgeblttrs_npvt_interleaved_batched(
+        handle: rocblas_handle,
+        nb: rocblas_int,
+        nblocks: rocblas_int,
+        nrhs: rocblas_int,
+        A: *mut f32,
+        inca: rocblas_int,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        B: *mut f32,
+        incb: rocblas_int,
+        ldb: rocblas_int,
+        strideB: rocblas_stride,
+        C: *mut f32,
+        incc: rocblas_int,
+        ldc: rocblas_int,
+        strideC: rocblas_stride,
+        X: *mut f32,
+        incx: rocblas_int,
+        ldx: rocblas_int,
+        strideX: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsolver_dgeblttrs_npvt_interleaved_batched(
+        handle: rocblas_handle,
+        nb: rocblas_int,
+        nblocks: rocblas_int,
+        nrhs: rocblas_int,
+        A: *mut f64,
+        inca: rocblas_int,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        B: *mut f64,
+        incb: rocblas_int,
+        ldb: rocblas_int,
+        strideB: rocblas_stride,
+        C: *mut f64,
+        incc: rocblas_int,
+        ldc: rocblas_int,
+        strideC: rocblas_stride,
+        X: *mut f64,
+        incx: rocblas_int,
+        ldx: rocblas_int,
+        strideX: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsolver_cgeblttrs_npvt_interleaved_batched(
+        handle: rocblas_handle,
+        nb: rocblas_int,
+        nblocks: rocblas_int,
+        nrhs: rocblas_int,
+        A: *mut rocblas_float_complex,
+        inca: rocblas_int,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        B: *mut rocblas_float_complex,
+        incb: rocblas_int,
+        ldb: rocblas_int,
+        strideB: rocblas_stride,
+        C: *mut rocblas_float_complex,
+        incc: rocblas_int,
+        ldc: rocblas_int,
+        strideC: rocblas_stride,
+        X: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        ldx: rocblas_int,
+        strideX: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsolver_zgeblttrs_npvt_interleaved_batched(
+        handle: rocblas_handle,
+        nb: rocblas_int,
+        nblocks: rocblas_int,
+        nrhs: rocblas_int,
+        A: *mut rocblas_double_complex,
+        inca: rocblas_int,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        B: *mut rocblas_double_complex,
+        incb: rocblas_int,
+        ldb: rocblas_int,
+        strideB: rocblas_stride,
+        C: *mut rocblas_double_complex,
+        incc: rocblas_int,
+        ldc: rocblas_int,
+        strideC: rocblas_stride,
+        X: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        ldx: rocblas_int,
+        strideX: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief CREATE_RFINFO initializes the structure rfinfo that contains the meta data and descriptors of the involved matrices\nrequired by the re-factorization functions\n\\ref rocsolver_scsrrf_refactlu \"CSRRF_REFACTLU\" and \\ref rocsolver_scsrrf_refactchol \"CSRRF_REFACTCHOL\", and\nby the direct solver \\ref rocsolver_scsrrf_solve \"CSRRF_SOLVE\".\n\n\\details\n@param[out]\nrfinfo      #rocsolver_rfinfo.\nThe pointer to the rfinfo struct to be initialized.\n@param[in]\nhandle      rocblas_handle."]
     pub fn rocsolver_create_rfinfo(
         rfinfo: *mut rocsolver_rfinfo,
         handle: rocblas_handle,
@@ -11314,12 +11931,28 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\brief DESTROY_RFINFO destroys the structure rfinfo used by the re-factorization functions\n\\ref rocsolver_scsrrf_refactlu \"CSRRF_REFACTLU\" and \\ref rocsolver_scsrrf_solve \"CSRRF_SOLVE\".\n\n\\details\n@param[in]\nrfinfo      #rocsolver_rfinfo.\\n\nThe rfinfo struct to be destroyed."]
+    #[doc = " \\brief DESTROY_RFINFO destroys the structure rfinfo used by the re-factorization functions\n\\ref rocsolver_scsrrf_refactlu \"CSRRF_REFACTLU\" and \\ref rocsolver_scsrrf_refactchol \"CSRRF_REFACTCHOL\", and\nby the direct solver \\ref rocsolver_scsrrf_solve \"CSRRF_SOLVE\".\n\n\\details\n@param[in]\nrfinfo      #rocsolver_rfinfo.\nThe rfinfo struct to be destroyed."]
     pub fn rocsolver_destroy_rfinfo(rfinfo: rocsolver_rfinfo) -> rocblas_status;
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief CSRRF_SUMLU bundles the factors \\f$L\\f$ and \\f$U\\f$, associated with the LU factorization\nof a sparse matrix \\f$A\\f$, into a single sparse matrix \\f$T=(L-I)+U\\f$.\n\n\\details Factor \\f$L\\f$ is a sparse lower triangular matrix with unit diagonal elements, and\n\\f$U\\f$ is a sparse upper triangular matrix. The resulting sparse matrix \\f$T\\f$ combines both\nsparse factors without storing the unit diagonal; in other words, the number of non-zero\nelements of T, nnzT, is given by nnzT = nnzL - n + nnzU.\n\n@param[in]\nhandle      rocblas_handle.\\n\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows (and columns) of matrix A.\n@param[in]\nnnzL        rocblas_int. nnzL >= n.\\n\nThe number of non-zero elements in L.\n@param[in]\nptrL        pointer to rocblas_int. Array on the GPU of dimension n+1.\\n\nIt contains the positions of the beginning of each row in indL and valL.\nThe last element of ptrL is equal to nnzL.\n@param[in]\nindL        pointer to rocblas_int. Array on the GPU of dimension nnzL.\\n\nIt contains the column indices of the non-zero elements of L. Indices are\nsorted by row and by column within each row.\n@param[in]\nvalL        pointer to type. Array on the GPU of dimension nnzL.\\n\nThe values of the non-zero elements of L.\n@param[in]\nnnzU        rocblas_int. nnzU >= 0.\\n\nThe number of non-zero elements in U.\n@param[in]\nptrU        pointer to rocblas_int. Array on the GPU of dimension n+1.\\n\nIt contains the positions of the beginning of each row in indU and valU.\nThe last element of ptrU is equal to nnzU.\n@param[in]\nindU        pointer to rocblas_int. Array on the GPU of dimension nnzU.\\n\nIt contains the column indices of the non-zero elements of U. Indices are\nsorted by row and by column within each row.\n@param[in]\nvalU        pointer to type. Array on the GPU of dimension nnzU.\\n\nThe values of the non-zero elements of U.\n@param[out]\nptrT        pointer to rocblas_int. Array on the GPU of dimension n+1.\\n\nIt contains the positions of the beginning of each row in indT and valT.\nThe last element of ptrT is equal to nnzT.\n@param[out]\nindT        pointer to rocblas_int. Array on the GPU of dimension nnzT.\\n\nIt contains the column indices of the non-zero elements of T. Indices are\nsorted by row and by column within each row.\n@param[out]\nvalT        pointer to type. Array on the GPU of dimension nnzT.\\n\nThe values of the non-zero elements of T."]
+    #[doc = " \\brief SET_RFINFO_MODE sets the mode of the structure rfinfo required by the re-factorization functions\n\\ref rocsolver_scsrrf_refactlu \"CSRRF_REFACTLU\" and \\ref rocsolver_scsrrf_refactchol \"CSRRF_REFACTCHOL\", and\nby the direct solver \\ref rocsolver_scsrrf_solve \"CSRRF_SOLVE\".\n\n\\details\n@param[in]\nrfinfo      #rocsolver_rfinfo.\nThe rfinfo struct to be set up.\n@param[in]\nmode        #rocsolver_rfinfo_mode.\nUse rocsolver_rfinfo_mode_cholesky when the Cholesky factorization is required."]
+    pub fn rocsolver_set_rfinfo_mode(
+        rfinfo: rocsolver_rfinfo,
+        mode: rocsolver_rfinfo_mode,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief GET_RFINFO_MODE gets the mode of the structure rfinfo required by the re-factorization functions\n\\ref rocsolver_scsrrf_refactlu \"CSRRF_REFACTLU\" and \\ref rocsolver_scsrrf_refactchol \"CSRRF_REFACTCHOL\", and\nby the direct solver \\ref rocsolver_scsrrf_solve \"CSRRF_SOLVE\".\n\n\\details\n@param[in]\nrfinfo      #rocsolver_rfinfo.\nThe referenced rfinfo struct.\n@param[out]\nmode        #rocsolver_rfinfo_mode.\nThe queried mode."]
+    pub fn rocsolver_get_rfinfo_mode(
+        rfinfo: rocsolver_rfinfo,
+        mode: *mut rocsolver_rfinfo_mode,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief CSRRF_SUMLU bundles the factors \\f$L\\f$ and \\f$U\\f$, associated with the LU factorization\nof a sparse matrix \\f$A\\f$, into a single sparse matrix \\f$T=(L-I)+U\\f$.\n\n\\details Factor \\f$L\\f$ is a sparse lower triangular matrix with unit diagonal elements, and\n\\f$U\\f$ is a sparse upper triangular matrix. The resulting sparse matrix \\f$T\\f$ combines both\nsparse factors without storing the unit diagonal; in other words, the number of non-zero\nelements of T, nnzT, is given by nnzT = nnzL - n + nnzU.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows (and columns) of matrix A.\n@param[in]\nnnzL        rocblas_int. nnzL >= n.\nThe number of non-zero elements in L.\n@param[in]\nptrL        pointer to rocblas_int. Array on the GPU of dimension n+1.\nIt contains the positions of the beginning of each row in indL and valL.\nThe last element of ptrL is equal to nnzL.\n@param[in]\nindL        pointer to rocblas_int. Array on the GPU of dimension nnzL.\nIt contains the column indices of the non-zero elements of L. Indices are\nsorted by row and by column within each row.\n@param[in]\nvalL        pointer to type. Array on the GPU of dimension nnzL.\nThe values of the non-zero elements of L.\n@param[in]\nnnzU        rocblas_int. nnzU >= 0.\nThe number of non-zero elements in U.\n@param[in]\nptrU        pointer to rocblas_int. Array on the GPU of dimension n+1.\nIt contains the positions of the beginning of each row in indU and valU.\nThe last element of ptrU is equal to nnzU.\n@param[in]\nindU        pointer to rocblas_int. Array on the GPU of dimension nnzU.\nIt contains the column indices of the non-zero elements of U. Indices are\nsorted by row and by column within each row.\n@param[in]\nvalU        pointer to type. Array on the GPU of dimension nnzU.\nThe values of the non-zero elements of U.\n@param[out]\nptrT        pointer to rocblas_int. Array on the GPU of dimension n+1.\nIt contains the positions of the beginning of each row in indT and valT.\nThe last element of ptrT is equal to nnzT.\n@param[out]\nindT        pointer to rocblas_int. Array on the GPU of dimension nnzT.\nIt contains the column indices of the non-zero elements of T. Indices are\nsorted by row and by column within each row.\n@param[out]\nvalT        pointer to type. Array on the GPU of dimension nnzT.\nThe values of the non-zero elements of T."]
     pub fn rocsolver_scsrrf_sumlu(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -11356,7 +11989,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief CSRRF_SPLITLU splits the factors \\f$L\\f$ and \\f$U\\f$, associated with the LU factorization\nof a sparse matrix \\f$A\\f$, from a bundled matrix \\f$T=(L-I)+U\\f$.\n\n\\details Factor \\f$L\\f$ is a sparse lower triangular matrix with unit diagonal elements, and\n\\f$U\\f$ is a sparse upper triangular matrix. Conceptually, on input, U is stored on the diagonal\nand upper part of \\f$T\\f$, while the non diagonal elements of \\f$L\\f$ are stored on the strictly\nlower part of \\f$T\\f$.\n\n@param[in]\nhandle      rocblas_handle.\\n\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows (and columns) of matrix A.\n@param[in]\nnnzT        rocblas_int. nnzT >= 0.\\n\nThe number of non-zero elements in T.\n@param[in]\nptrT        pointer to rocblas_int. Array on the GPU of dimension n+1.\\n\nIt contains the positions of the beginning of each row in indT and valT.\nThe last element of ptrT is equal to nnzT.\n@param[in]\nindT        pointer to rocblas_int. Array on the GPU of dimension nnzT.\\n\nIt contains the column indices of the non-zero elements of T. Indices are\nsorted by row and by column within each row.\n@param[in]\nvalT        pointer to type. Array on the GPU of dimension nnzT.\\n\nThe values of the non-zero elements of T.\n@param[out]\nptrL        pointer to rocblas_int. Array on the GPU of dimension n+1.\\n\nIt contains the positions of the beginning of each row in indL and valL.\nThe last element of ptrL is equal to nnzL.\n@param[out]\nindL        pointer to rocblas_int. Array on the GPU of dimension nnzL.\\n\nIt contains the column indices of the non-zero elements of L. Indices are\nsorted by row and by column within each row. (If nnzL is not known in advance,\nthe size of this array could be set to nnzT + n as an upper bound).\n@param[out]\nvalL        pointer to type. Array on the GPU of dimension nnzL.\\n\nThe values of the non-zero elements of L. (If nnzL is not known in advance,\nthe size of this array could be set to nnzT + n as an upper bound).\n@param[out]\nptrU        pointer to rocblas_int. Array on the GPU of dimension n+1.\\n\nIt contains the positions of the beginning of each row in indU and valU.\nThe last element of ptrU is equal to nnzU.\n@param[out]\nindU        pointer to rocblas_int. Array on the GPU of dimension nnzU.\\n\nIt contains the column indices of the non-zero elements of U. Indices are\nsorted by row and by column within each row. (If nnzU is not known in advance,\nthe size of this array could be set to nnzT as an upper bound).\n@param[out]\nvalU        pointer to type. Array on the GPU of dimension nnzU.\\n\nThe values of the non-zero elements of U. (If nnzU is not known in advance,\nthe size of this array could be set to nnzT as an upper bound)."]
+    #[doc = " @{\n\\brief CSRRF_SPLITLU splits the factors \\f$L\\f$ and \\f$U\\f$, associated with the LU factorization\nof a sparse matrix \\f$A\\f$, from a bundled matrix \\f$T=(L-I)+U\\f$.\n\n\\details Factor \\f$L\\f$ is a sparse lower triangular matrix with unit diagonal elements, and\n\\f$U\\f$ is a sparse upper triangular matrix. Conceptually, on input, U is stored on the diagonal\nand upper part of \\f$T\\f$, while the non diagonal elements of \\f$L\\f$ are stored on the strictly\nlower part of \\f$T\\f$.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows (and columns) of matrix A.\n@param[in]\nnnzT        rocblas_int. nnzT >= 0.\nThe number of non-zero elements in T.\n@param[in]\nptrT        pointer to rocblas_int. Array on the GPU of dimension n+1.\nIt contains the positions of the beginning of each row in indT and valT.\nThe last element of ptrT is equal to nnzT.\n@param[in]\nindT        pointer to rocblas_int. Array on the GPU of dimension nnzT.\nIt contains the column indices of the non-zero elements of T. Indices are\nsorted by row and by column within each row.\n@param[in]\nvalT        pointer to type. Array on the GPU of dimension nnzT.\nThe values of the non-zero elements of T.\n@param[out]\nptrL        pointer to rocblas_int. Array on the GPU of dimension n+1.\nIt contains the positions of the beginning of each row in indL and valL.\nThe last element of ptrL is equal to nnzL.\n@param[out]\nindL        pointer to rocblas_int. Array on the GPU of dimension nnzL.\nIt contains the column indices of the non-zero elements of L. Indices are\nsorted by row and by column within each row. (If nnzL is not known in advance,\nthe size of this array could be set to nnzT + n as an upper bound).\n@param[out]\nvalL        pointer to type. Array on the GPU of dimension nnzL.\nThe values of the non-zero elements of L. (If nnzL is not known in advance,\nthe size of this array could be set to nnzT + n as an upper bound).\n@param[out]\nptrU        pointer to rocblas_int. Array on the GPU of dimension n+1.\nIt contains the positions of the beginning of each row in indU and valU.\nThe last element of ptrU is equal to nnzU.\n@param[out]\nindU        pointer to rocblas_int. Array on the GPU of dimension nnzU.\nIt contains the column indices of the non-zero elements of U. Indices are\nsorted by row and by column within each row. (If nnzU is not known in advance,\nthe size of this array could be set to nnzT as an upper bound).\n@param[out]\nvalU        pointer to type. Array on the GPU of dimension nnzU.\nThe values of the non-zero elements of U. (If nnzU is not known in advance,\nthe size of this array could be set to nnzT as an upper bound)."]
     pub fn rocsolver_scsrrf_splitlu(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -11391,7 +12024,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief CSRRF_ANALYSIS performs the analysis phase required by the re-factorization functions\n\\ref rocsolver_scsrrf_refactlu \"CSRRF_REFACTLU\" and \\ref rocsolver_scsrrf_solve \"CSRRF_SOLVE\"\n\n\\details Consider a sparse matrix \\f$M\\f$ previously factorized as\n\n\\f[\nPMQ = L_MU_M\n\\f]\n\nwhere \\f$L_M\\f$ is lower triangular with unit diagonal, \\f$U_M\\f$ is upper triangular, and \\f$P\\f$\nand \\f$Q\\f$ are permutation matrices associated with pivoting and re-ordering (to minimize\nfill-in), respectively. The meta data generated by this routine is collected in the output parameter\nrfinfo. This information will allow the fast LU re-factorization of another sparse matrix \\f$A\\f$ as\n\n\\f[\nPAQ = L_AU_A\n\\f]\n\nand, eventually, the computation of the solution vector \\f$X\\f$ of any linear system of the form\n\n\\f[\nAX = B\n\\f]\n\nas long as \\f$A\\f$ has the same sparsity pattern as the previous matrix \\f$M\\f$.\n\nThis function supposes that the LU factors \\f$L_M\\f$ and \\f$U_M\\f$ are passed in a bundle\nmatrix \\f$T=(L_M-I)+U_M\\f$ as returned by \\ref rocsolver_scsrrf_sumlu \"CSRRF_SUMLU\",\nand that rfinfo has been initialized by \\ref rocsolver_create_rfinfo \"RFINFO_CREATE\".\n\n\\note\nIf only a re-factorization will be executed (i.e. no solver phase), then nrhs can be set to zero\nand B can be null.\n\n@param[in]\nhandle      rocblas_handle.\\n\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows (and columns) of matrix M.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\\n\nThe number of right-hand-sides (columns of matrix B). Set nrhs to zero when only the\nre-factorization is needed.\n@param[in]\nnnzM        rocblas_int. nnzM >= 0.\\n\nThe number of non-zero elements in M.\n@param[in]\nptrM        pointer to rocblas_int. Array on the GPU of dimension n+1.\\n\nIt contains the positions of the beginning of each row in indM and valM.\nThe last element of ptrM is equal to nnzM.\n@param[in]\nindM        pointer to rocblas_int. Array on the GPU of dimension nnzM.\\n\nIt contains the column indices of the non-zero elements of M. Indices are\nsorted by row and by column within each row.\n@param[in]\nvalM        pointer to type. Array on the GPU of dimension nnzM.\\n\nThe values of the non-zero elements of M.\n@param[in]\nnnzT        rocblas_int. nnzT >= 0.\\n\nThe number of non-zero elements in T.\n@param[in]\nptrT        pointer to rocblas_int. Array on the GPU of dimension n+1.\\n\nIt contains the positions of the beginning of each row in indT and valT.\nThe last element of ptrT is equal to nnzT.\n@param[in]\nindT        pointer to rocblas_int. Array on the GPU of dimension nnzT.\\n\nIt contains the column indices of the non-zero elements of T. Indices are\nsorted by row and by column within each row.\n@param[in]\nvalT        pointer to type. Array on the GPU of dimension nnzT.\\n\nThe values of the non-zero elements of T.\n@param[in]\npivP        pointer to rocblas_int. Array on the GPU of dimension n.\\n\nContains the pivot indices representing the permutation matrix P, i.e. the\norder in which the rows of matrix M were re-arranged.\n@param[in]\npivQ        pointer to rocblas_int. Array on the GPU of dimension n.\\n\nContains the pivot indices representing the permutation matrix Q, i.e. the\norder in which the columns of matrix M were re-arranged.\n@param[in]\nB           pointer to type. Array on the GPU of dimension ldb*nrhs.\\n\nThe right hand side matrix B. It can be null if only the re-factorization is needed.\n@param[in]\nldb         rocblas_int. ldb >= n.\nThe leading dimension of B.\n@param[out]\nrfinfo      rocsolver_rfinfo.\\n\nStructure that holds the meta data generated in the analysis phase."]
+    #[doc = " @{\n\\brief CSRRF_ANALYSIS performs the analysis phase required by the re-factorization functions\n\\ref rocsolver_scsrrf_refactlu \"CSRRF_REFACTLU\" and \\ref rocsolver_scsrrf_refactchol \"CSRRF_REFACTCHOL\", and\nby the direct solver \\ref rocsolver_scsrrf_solve \"CSRRF_SOLVE\".\n\n\\details Consider a sparse matrix \\f$M\\f$ previously factorized as\n\n\\f[\nQ^TMQ = L_ML_M^T\n\\f]\n\n(Cholesky factorization for the symmetric positive definite case), or\n\n\\f[\nPMQ = L_MU_M\n\\f]\n\n(LU factorization for the general case)\n\nwhere \\f$L_M\\f$ is lower triangular (with unit diagonal in the general case), \\f$U_M\\f$ is upper triangular, and \\f$P\\f$\nand \\f$Q\\f$ are permutation matrices associated with pivoting and re-ordering (to minimize\nfill-in), respectively. The meta data generated by this routine is collected in the output parameter\nrfinfo. This information will allow the fast re-factorization of another sparse matrix \\f$A\\f$ as\n\n\\f[\nQ^TAQ = L_AL_A^T, \\quad \\text{or}\n\\f]\n\n\\f[\nPAQ = L_AU_A,\n\\f]\n\nand, eventually, the computation of the solution vector \\f$X\\f$ of any linear system of the form\n\n\\f[\nAX = B\n\\f]\n\nas long as \\f$A\\f$ has the same sparsity pattern as the previous matrix \\f$M\\f$.\n\nThis function supposes that the rfinfo struct has been initialized by \\ref rocsolver_create_rfinfo \"RFINFO_CREATE\".\nBy default, rfinfo is set up to work with the LU factorization (general matrices). If the matrix is symmetric positive definite,\nand the Cholesky factorization is\ndesired, then the corresponding mode must be manually set up by \\ref rocsolver_set_rfinfo_mode \"SET_RFINFO_MODE\". This function\ndoes not automatically detect symmetry.\n\nFor the LU factorization mode, the LU factors \\f$L_M\\f$ and \\f$U_M\\f$ must be passed in a bundle\nmatrix \\f$T=(L_M-I)+U_M\\f$ as returned by \\ref rocsolver_scsrrf_sumlu \"CSRRF_SUMLU\". For the Cholesky mode,\nthe lower triangular part of \\f$T\\f$ must contain the Cholesky factor \\f$L_M\\f$; the strictly upper triangular\npart of \\f$T\\f$ will be ignored. Similarly, the strictly upper triangular part of \\f$M\\f$ is ignored when working\nin Cholesky mode.\n\n\\note\nIf only a re-factorization will be executed (i.e. no solver phase), then nrhs can be set to zero\nand B can be null.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows (and columns) of matrix M.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\nThe number of right-hand-sides (columns of matrix B). Set nrhs to zero when only the\nre-factorization is needed.\n@param[in]\nnnzM        rocblas_int. nnzM >= 0.\nThe number of non-zero elements in M.\n@param[in]\nptrM        pointer to rocblas_int. Array on the GPU of dimension n+1.\nIt contains the positions of the beginning of each row in indM and valM.\nThe last element of ptrM is equal to nnzM.\n@param[in]\nindM        pointer to rocblas_int. Array on the GPU of dimension nnzM.\nIt contains the column indices of the non-zero elements of M. Indices are\nsorted by row and by column within each row.\n@param[in]\nvalM        pointer to type. Array on the GPU of dimension nnzM.\nThe values of the non-zero elements of M. The strictly upper triangular entries are\nnot referenced when working in Cholesky mode.\n@param[in]\nnnzT        rocblas_int. nnzT >= 0.\nThe number of non-zero elements in T.\n@param[in]\nptrT        pointer to rocblas_int. Array on the GPU of dimension n+1.\nIt contains the positions of the beginning of each row in indT and valT.\nThe last element of ptrT is equal to nnzT.\n@param[in]\nindT        pointer to rocblas_int. Array on the GPU of dimension nnzT.\nIt contains the column indices of the non-zero elements of T. Indices are\nsorted by row and by column within each row.\n@param[in]\nvalT        pointer to type. Array on the GPU of dimension nnzT.\nThe values of the non-zero elements of T. The strictly upper triangular entries are\nnot referenced when working in Cholesky mode.\n@param[in]\npivP        pointer to rocblas_int. Array on the GPU of dimension n.\nContains the pivot indices representing the permutation matrix P, i.e. the\norder in which the rows of matrix M were re-arranged. When working in Cholesky mode,\nthis array is not referenced and can be null.\n@param[in]\npivQ        pointer to rocblas_int. Array on the GPU of dimension n.\nContains the pivot indices representing the permutation matrix Q, i.e. the\norder in which the columns of matrix M were re-arranged.\n@param[in]\nB           pointer to type. Array on the GPU of dimension ldb*nrhs.\nThe right hand side matrix B. It can be null if only the re-factorization is needed.\n@param[in]\nldb         rocblas_int. ldb >= n.\nThe leading dimension of B.\n@param[out]\nrfinfo      rocsolver_rfinfo.\nStructure that holds the meta data generated in the analysis phase."]
     pub fn rocsolver_scsrrf_analysis(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -11434,7 +12067,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief CSRRF_REFACTLU performs a fast LU factorization of a sparse matrix \\f$A\\f$ based on the\ninformation from the factorization of a previous matrix \\f$M\\f$ with the same sparsity pattern\n(re-factorization).\n\n\\details Consider a sparse matrix \\f$M\\f$ previously factorized as\n\n\\f[\nPMQ = L_MU_M\n\\f]\n\nwhere \\f$L_M\\f$ is lower triangular with unit diagonal, \\f$U_M\\f$ is upper triangular, and \\f$P\\f$\nand \\f$Q\\f$ are permutation matrices associated with pivoting and re-ordering (to minimize\nfill-in), respectively. If \\f$A\\f$ has the same sparsity pattern as \\f$M\\f$, then the re-factorization\n\n\\f[\nPAQ = L_AU_A\n\\f]\n\ncan be computed numerically without any symbolic or analysis phases.\n\nThis function supposes that rfinfo has been updated, by function \\ref rocsolver_scsrrf_analysis \"CSRRF_ANALYSIS\",\nafter the analysis phase of the previous matrix M and its initial factorization.\n\n@param[in]\nhandle      rocblas_handle.\\n\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows (and columns) of matrix A.\n@param[in]\nnnzA        rocblas_int. nnzA >= 0.\\n\nThe number of non-zero elements in A.\n@param[in]\nptrA        pointer to rocblas_int. Array on the GPU of dimension n+1.\\n\nIt contains the positions of the beginning of each row in indA and valA.\nThe last element of ptrM is equal to nnzA.\n@param[in]\nindA        pointer to rocblas_int. Array on the GPU of dimension nnzA.\\n\nIt contains the column indices of the non-zero elements of M. Indices are\nsorted by row and by column within each row.\n@param[in]\nvalA        pointer to type. Array on the GPU of dimension nnzA.\\n\nThe values of the non-zero elements of A.\n@param[in]\nnnzT        rocblas_int. nnzT >= 0.\\n\nThe number of non-zero elements in T.\n@param[in]\nptrT        pointer to rocblas_int. Array on the GPU of dimension n+1.\\n\nIt contains the positions of the beginning of each row in indT and valT.\nThe last element of ptrT is equal to nnzT.\n@param[in]\nindT        pointer to rocblas_int. Array on the GPU of dimension nnzT.\\n\nIt contains the column indices of the non-zero elements of T. Indices are\nsorted by row and by column within each row.\n@param[out]\nvalT        pointer to type. Array on the GPU of dimension nnzT.\\n\nThe values of the non-zero elements of the new bundle matrix (L_A - I) + U_A.\n@param[in]\npivP        pointer to rocblas_int. Array on the GPU of dimension n.\\n\nContains the pivot indices representing the permutation matrix P, i.e. the\norder in which the rows of matrix M were re-arranged.\n@param[in]\npivQ        pointer to rocblas_int. Array on the GPU of dimension n.\\n\nContains the pivot indices representing the permutation matrix Q, i.e. the\norder in which the columns of matrix M were re-arranged.\n@param[in]\nrfinfo      rocsolver_rfinfo.\\n\nStructure that holds the meta data generated in the analysis phase."]
+    #[doc = " @{\n\\brief CSRRF_REFACTLU performs a fast LU factorization of a sparse matrix \\f$A\\f$ based on the\ninformation from the factorization of a previous matrix \\f$M\\f$ with the same sparsity pattern\n(re-factorization).\n\n\\details Consider a sparse matrix \\f$M\\f$ previously factorized as\n\n\\f[\nPMQ = L_MU_M\n\\f]\n\nwhere \\f$L_M\\f$ is lower triangular with unit diagonal, \\f$U_M\\f$ is upper triangular, and \\f$P\\f$\nand \\f$Q\\f$ are permutation matrices associated with pivoting and re-ordering (to minimize\nfill-in), respectively. If \\f$A\\f$ has the same sparsity pattern as \\f$M\\f$, then the re-factorization\n\n\\f[\nPAQ = L_AU_A\n\\f]\n\ncan be computed numerically without a symbolic analysis phase.\n\nThis function supposes that rfinfo has been updated, by function \\ref rocsolver_scsrrf_analysis \"CSRRF_ANALYSIS\",\nafter the analysis phase of the previous matrix M and its initial factorization. Both functions, CSRRF_ANALYSIS and\nCSRRF_REFACTLU must be run with the same rfinfo mode (LU factorization, the default mode), otherwise the workflow will\nresult in an error.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows (and columns) of matrix A.\n@param[in]\nnnzA        rocblas_int. nnzA >= 0.\nThe number of non-zero elements in A.\n@param[in]\nptrA        pointer to rocblas_int. Array on the GPU of dimension n+1.\nIt contains the positions of the beginning of each row in indA and valA.\nThe last element of ptrM is equal to nnzA.\n@param[in]\nindA        pointer to rocblas_int. Array on the GPU of dimension nnzA.\nIt contains the column indices of the non-zero elements of M. Indices are\nsorted by row and by column within each row.\n@param[in]\nvalA        pointer to type. Array on the GPU of dimension nnzA.\nThe values of the non-zero elements of A.\n@param[in]\nnnzT        rocblas_int. nnzT >= 0.\nThe number of non-zero elements in T.\n@param[in]\nptrT        pointer to rocblas_int. Array on the GPU of dimension n+1.\nIt contains the positions of the beginning of each row in indT and valT.\nThe last element of ptrT is equal to nnzT.\n@param[in]\nindT        pointer to rocblas_int. Array on the GPU of dimension nnzT.\nIt contains the column indices of the non-zero elements of T. Indices are\nsorted by row and by column within each row.\n@param[out]\nvalT        pointer to type. Array on the GPU of dimension nnzT.\nThe values of the non-zero elements of the new bundle matrix (L_A - I) + U_A.\n@param[in]\npivP        pointer to rocblas_int. Array on the GPU of dimension n.\nContains the pivot indices representing the permutation matrix P, i.e. the\norder in which the rows of matrix M were re-arranged.\n@param[in]\npivQ        pointer to rocblas_int. Array on the GPU of dimension n.\nContains the pivot indices representing the permutation matrix Q, i.e. the\norder in which the columns of matrix M were re-arranged.\n@param[in]\nrfinfo      rocsolver_rfinfo.\nStructure that holds the meta data generated in the analysis phase."]
     pub fn rocsolver_scsrrf_refactlu(
         handle: rocblas_handle,
         n: rocblas_int,
@@ -11471,7 +12104,42 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " @{\n\\brief CSRRF_SOLVE solves a linear system with sparse coefficient matrix \\f$A\\f$ in its\nfactorized form.\n\n\\details The linear system is of the form\n\n\\f[\nAX = B\n\\f]\n\nwhere the sparse matrix \\f$A\\f$ is factorized as\n\n\\f[\nPAQ = L_AU_A\n\\f]\n\nand \\f$B\\f$ is a dense matrix of right hand sides.\n\nThis function supposes that the LU factors \\f$L_A\\f$ and \\f$U_A\\f$ are passed in a bundle\nmatrix \\f$T=(L_A-I)+U_A\\f$ as returned by \\ref rocsolver_scsrrf_refactlu \"CSRRF_REFACTLU\" or\n\\ref rocsolver_scsrrf_sumlu \"CSRRF_SUMLU\", and that rfinfo has been updated, by function\n\\ref rocsolver_scsrrf_analysis \"CSRRF_ANALYSIS\", after the analysis phase.\n\n@param[in]\nhandle      rocblas_handle.\\n\n@param[in]\nn           rocblas_int. n >= 0.\\n\nThe number of rows (and columns) of matrix A.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\\n\nThe number of right hand sides, i.e. the number of columns of matrix B.\n@param[in]\nnnzT        rocblas_int. nnzT >= 0.\\n\nThe number of non-zero elements in T.\n@param[in]\nptrT        pointer to rocblas_int. Array on the GPU of dimension n+1.\\n\nIt contains the positions of the beginning of each row in indT and valT.\nThe last element of ptrT is equal to nnzT.\n@param[in]\nindT        pointer to rocblas_int. Array on the GPU of dimension nnzT.\\n\nIt contains the column indices of the non-zero elements of T. Indices are\nsorted by row and by column within each row.\n@param[in]\nvalT        pointer to type. Array on the GPU of dimension nnzT.\\n\nThe values of the non-zero elements of T.\n@param[in]\npivP        pointer to rocblas_int. Array on the GPU of dimension n.\\n\nContains the pivot indices representing the permutation matrix P, i.e. the\norder in which the rows of matrix A were re-arranged.\n@param[in]\npivQ        pointer to rocblas_int. Array on the GPU of dimension n.\\n\nContains the pivot indices representing the permutation matrix Q, i.e. the\norder in which the columns of matrix A were re-arranged.\n@param[inout]\nB           pointer to type. Array on the GPU of dimension ldb*nrhs.\\n\nOn entry the right hand side matrix B. On exit, the solution matrix X.\n@param[in]\nldb         rocblas_int. ldb >= n.\\n\nThe leading dimension of B.\n@param[in]\nrfinfo      rocsolver_rfinfo.\\n\nStructure that holds the meta data generated in the analysis phase."]
+    #[doc = " @{\n\\brief CSRRF_REFACTCHOL performs a fast Cholesky factorization of a sparse symmetric positive definite matrix \\f$A\\f$\nbased on the information from the factorization of a previous matrix \\f$M\\f$ with the same sparsity pattern\n(re-factorization).\n\n\\details Consider a sparse matrix \\f$M\\f$ previously factorized as\n\n\\f[\nQ^TMQ = L_ML_M^T\n\\f]\n\nwhere \\f$L_M\\f$ is lower triangular, and \\f$Q\\f$ is a permutation matrices associated with re-ordering to minimize\nfill-in. If \\f$A\\f$ has the same sparsity pattern as \\f$M\\f$, then the re-factorization\n\n\\f[\nQ^TAQ = L_AL_A^T\n\\f]\n\ncan be computed numerically without a symbolic analysis phase.\n\nThis function supposes that rfinfo has been updated by function \\ref rocsolver_scsrrf_analysis \"CSRRF_ANALYSIS\",\nafter the analysis phase of the previous matrix M and its initial factorization. Both functions, CSRRF_ANALYSIS and\nCSRRF_REFACTCHOL must be run with the same rfinfo mode (Cholesky factorization), otherwise the workflow will\nresult in an error.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows (and columns) of matrix A.\n@param[in]\nnnzA        rocblas_int. nnzA >= 0.\nThe number of non-zero elements in A.\n@param[in]\nptrA        pointer to rocblas_int. Array on the GPU of dimension n+1.\nIt contains the positions of the beginning of each row in indA and valA.\nThe last element of ptrM is equal to nnzA.\n@param[in]\nindA        pointer to rocblas_int. Array on the GPU of dimension nnzA.\nIt contains the column indices of the non-zero elements of M. Indices are\nsorted by row and by column within each row.\n@param[in]\nvalA        pointer to type. Array on the GPU of dimension nnzA.\nThe values of the non-zero elements of A. The strictly upper triangular entries are\nnot referenced.\n@param[in]\nnnzT        rocblas_int. nnzT >= 0.\nThe number of non-zero elements in T.\n@param[in]\nptrT        pointer to rocblas_int. Array on the GPU of dimension n+1.\nIt contains the positions of the beginning of each row in indT and valT.\nThe last element of ptrT is equal to nnzT.\n@param[in]\nindT        pointer to rocblas_int. Array on the GPU of dimension nnzT.\nIt contains the column indices of the non-zero elements of T. Indices are\nsorted by row and by column within each row.\n@param[out]\nvalT        pointer to type. Array on the GPU of dimension nnzT.\nThe values of the non-zero elements of the new Cholesky factor L_A.\nThe strictly upper triangular entries of this array are not referenced.\n@param[in]\npivQ        pointer to rocblas_int. Array on the GPU of dimension n.\nContains the pivot indices representing the permutation matrix Q, i.e. the\norder in which the columns of matrix M were re-arranged.\n@param[in]\nrfinfo      #rocsolver_rfinfo.\nStructure that holds the meta data generated in the analysis phase."]
+    pub fn rocsolver_scsrrf_refactchol(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        nnzA: rocblas_int,
+        ptrA: *mut rocblas_int,
+        indA: *mut rocblas_int,
+        valA: *mut f32,
+        nnzT: rocblas_int,
+        ptrT: *mut rocblas_int,
+        indT: *mut rocblas_int,
+        valT: *mut f32,
+        pivQ: *mut rocblas_int,
+        rfinfo: rocsolver_rfinfo,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsolver_dcsrrf_refactchol(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        nnzA: rocblas_int,
+        ptrA: *mut rocblas_int,
+        indA: *mut rocblas_int,
+        valA: *mut f64,
+        nnzT: rocblas_int,
+        ptrT: *mut rocblas_int,
+        indT: *mut rocblas_int,
+        valT: *mut f64,
+        pivQ: *mut rocblas_int,
+        rfinfo: rocsolver_rfinfo,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief CSRRF_SOLVE solves a linear system with sparse coefficient matrix \\f$A\\f$ in its\nfactorized form.\n\n\\details The linear system is of the form\n\n\\f[\nAX = B\n\\f]\n\nwhere the sparse matrix \\f$A\\f$ is factorized as\n\n\\f[\nQ^TAQ = L_AL_A^T\n\\f]\n\n(Cholesky factorization for the symmetric positive definite case), or\n\n\\f[\nPAQ = L_AU_A\n\\f]\n\n(LU factorization for the general case),\n\nand \\f$B\\f$ is a dense matrix of right hand sides.\n\nThis function supposes that rfinfo has been updated by function \\ref rocsolver_scsrrf_analysis \"CSRRF_ANALYSIS\",\nafter the analysis phase. Both functions, CSRRF_ANALYSIS and\nCSRRF_SOLVE must be run with the same rfinfo mode (LU or Cholesky factorization), otherwise the workflow will\nresult in an error.\n\nFor the LU factorization mode, the LU factors \\f$L_A\\f$ and \\f$U_A\\f$ must be passed in a bundle matrix \\f$T=(L_A-I)+U_A\\f$\nas returned by \\ref rocsolver_scsrrf_refactlu \"CSRRF_REFACTLU\" or \\ref rocsolver_scsrrf_sumlu \"CSRRF_SUMLU\". For the Cholesky mode,\nthe lower triangular part of \\f$T\\f$ must contain the Cholesky factor \\f$L_A\\f$; the strictly upper triangular\npart of \\f$T\\f$ will be ignored.\n\n@param[in]\nhandle      rocblas_handle.\n@param[in]\nn           rocblas_int. n >= 0.\nThe number of rows (and columns) of matrix A.\n@param[in]\nnrhs        rocblas_int. nrhs >= 0.\nThe number of right hand sides, i.e. the number of columns of matrix B.\n@param[in]\nnnzT        rocblas_int. nnzT >= 0.\nThe number of non-zero elements in T.\n@param[in]\nptrT        pointer to rocblas_int. Array on the GPU of dimension n+1.\nIt contains the positions of the beginning of each row in indT and valT.\nThe last element of ptrT is equal to nnzT.\n@param[in]\nindT        pointer to rocblas_int. Array on the GPU of dimension nnzT.\nIt contains the column indices of the non-zero elements of T. Indices are\nsorted by row and by column within each row.\n@param[in]\nvalT        pointer to type. Array on the GPU of dimension nnzT.\nThe values of the non-zero elements of T. The strictly upper triangular entries are\nnot referenced when working in Cholesky mode.\n@param[in]\npivP        pointer to rocblas_int. Array on the GPU of dimension n.\nContains the pivot indices representing the permutation matrix P, i.e. the\norder in which the rows of matrix A were re-arranged. When working in Cholesky mode,\nthis array is not referenced and can be null.\n@param[in]\npivQ        pointer to rocblas_int. Array on the GPU of dimension n.\nContains the pivot indices representing the permutation matrix Q, i.e. the\norder in which the columns of matrix A were re-arranged.\n@param[inout]\nB           pointer to type. Array on the GPU of dimension ldb*nrhs.\nOn entry the right hand side matrix B. On exit, the solution matrix X.\n@param[in]\nldb         rocblas_int. ldb >= n.\nThe leading dimension of B.\n@param[in]\nrfinfo      rocsolver_rfinfo.\nStructure that holds the meta data generated in the analysis phase."]
     pub fn rocsolver_scsrrf_solve(
         handle: rocblas_handle,
         n: rocblas_int,
diff --git a/rocsparse-sys/README b/rocsparse-sys/README
index 55e8f2a..2a478e1 100644
--- a/rocsparse-sys/README
+++ b/rocsparse-sys/README
@@ -1 +1 @@
-bindgen /opt/rocm/include/rocsparse/rocsparse.h -o src/rocsparse.rs --no-layout-tests --default-enum-style=newtype --no-derive-debug --allowlist-function "rocsparse_.*" --allowlist-var "ROCSPARSE_*" --must-use-type rocsparse_status -- -I/opt/rocm/include
\ No newline at end of file
+bindgen $Env:HIP_PATH/include/rocsparse/rocsparse.h -o src/rocsparse.rs --no-layout-tests --default-enum-style=newtype --no-derive-debug --allowlist-function "rocsparse_.*" --allowlist-var "ROCSPARSE_*" --must-use-type rocsparse_status -- -I"$Env:HIP_PATH/include"
\ No newline at end of file
diff --git a/rocsparse-sys/src/rocsparse.rs b/rocsparse-sys/src/rocsparse.rs
index 58ac24b..578f99d 100644
--- a/rocsparse-sys/src/rocsparse.rs
+++ b/rocsparse-sys/src/rocsparse.rs
@@ -1,4 +1,4 @@
-/* automatically generated by rust-bindgen 0.66.1 */
+/* automatically generated by rust-bindgen 0.69.4 */
 
 #[repr(C)]
 #[derive(Copy, Clone)]
@@ -55,6 +55,8 @@ pub struct _rocsparse_spvec_descr {
 }
 #[doc = " \\ingroup types_module\n  \\brief Generic API descriptor of the sparse vector.\n\n  \\details\n  The rocSPARSE sparse vector descriptor is a structure holding all properties of a sparse vector.\n  It must be initialized using rocsparse_create_spvec_descr() and the returned\n  descriptor must be passed to all subsequent generic API library calls that involve the sparse vector.\n  It should be destroyed at the end using rocsparse_destroy_spvec_descr()."]
 pub type rocsparse_spvec_descr = *mut _rocsparse_spvec_descr;
+#[doc = " \\ingroup types_module\n  \\brief Generic API descriptor of the sparse vector.\n\n  \\details\n  The rocSPARSE constant sparse vector descriptor is a structure holding all properties of a sparse vector.\n  It must be initialized using rocsparse_create_const_spvec_descr() and the returned\n  descriptor must be passed to all subsequent generic API library calls that involve the sparse vector.\n  It should be destroyed at the end using rocsparse_destroy_spvec_descr()."]
+pub type rocsparse_const_spvec_descr = *const _rocsparse_spvec_descr;
 #[repr(C)]
 #[derive(Copy, Clone)]
 pub struct _rocsparse_spmat_descr {
@@ -62,6 +64,8 @@ pub struct _rocsparse_spmat_descr {
 }
 #[doc = " \\ingroup types_module\n  \\brief Generic API descriptor of the sparse matrix.\n\n  \\details\n  The rocSPARSE sparse matrix descriptor is a structure holding all properties of a sparse matrix.\n  It must be initialized using rocsparse_create_coo_descr(), rocsparse_create_coo_aos_descr(),\n  rocsparse_create_bsr_descr(), rocsparse_create_csr_descr(), rocsparse_create_csc_descr(),\n  rocsparse_create_ell_descr(), or rocsparse_create_bell_descr() and the returned\n  descriptor must be passed to all subsequent generic API library calls that involve the sparse matrix.\n  It should be destroyed at the end using rocsparse_destroy_spmat_descr()."]
 pub type rocsparse_spmat_descr = *mut _rocsparse_spmat_descr;
+#[doc = " \\ingroup types_module\n  \\brief Generic API descriptor of the sparse matrix.\n\n  \\details\n  The rocSPARSE constant sparse matrix descriptor is a structure holding all properties of a sparse matrix.\n  It must be initialized using rocsparse_create__constcoo_descr(), rocsparse_create_const_bsr_descr(),\n  rocsparse_create_const_csr_descr(), rocsparse_create_const_csc_descr(),\n  or rocsparse_create_const_bell_descr() and the returned\n  descriptor must be passed to all subsequent generic API library calls that involve the sparse matrix.\n  It should be destroyed at the end using rocsparse_destroy_spmat_descr()."]
+pub type rocsparse_const_spmat_descr = *const _rocsparse_spmat_descr;
 #[repr(C)]
 #[derive(Copy, Clone)]
 pub struct _rocsparse_dnvec_descr {
@@ -69,6 +73,8 @@ pub struct _rocsparse_dnvec_descr {
 }
 #[doc = " \\ingroup types_module\n  \\brief Generic API descriptor of the dense vector.\n\n  \\details\n  The rocSPARSE dense vector descriptor is a structure holding all properties of a dense vector.\n  It must be initialized using rocsparse_create_dnvec_descr() and the returned\n  descriptor must be passed to all subsequent generic API library calls that involve the dense vector.\n  It should be destroyed at the end using rocsparse_destroy_dnvec_descr()."]
 pub type rocsparse_dnvec_descr = *mut _rocsparse_dnvec_descr;
+#[doc = " \\ingroup types_module\n  \\brief Generic API descriptor of the dense vector.\n\n  \\details\n  The rocSPARSE constant dense vector descriptor is a structure holding all properties of a dense vector.\n  It must be initialized using rocsparse_create_const_dnvec_descr() and the returned\n  descriptor must be passed to all subsequent generic API library calls that involve the dense vector.\n  It should be destroyed at the end using rocsparse_destroy_dnvec_descr()."]
+pub type rocsparse_const_dnvec_descr = *const _rocsparse_dnvec_descr;
 #[repr(C)]
 #[derive(Copy, Clone)]
 pub struct _rocsparse_dnmat_descr {
@@ -76,6 +82,8 @@ pub struct _rocsparse_dnmat_descr {
 }
 #[doc = " \\ingroup types_module\n  \\brief Generic API descriptor of the dense matrix.\n\n  \\details\n  The rocSPARSE dense matrix descriptor is a structure holding all properties of a dense matrix.\n  It must be initialized using rocsparse_create_dnmat_descr() and the returned\n  descriptor must be passed to all subsequent generic API library calls that involve the dense matrix.\n  It should be destroyed at the end using rocsparse_destroy_dnmat_descr()."]
 pub type rocsparse_dnmat_descr = *mut _rocsparse_dnmat_descr;
+#[doc = " \\ingroup types_module\n  \\brief Generic API descriptor of the dense matrix.\n\n  \\details\n  The rocSPARSE constant dense matrix descriptor is a structure holding all properties of a dense matrix.\n  It must be initialized using rocsparse_create_const_dnmat_descr() and the returned\n  descriptor must be passed to all subsequent generic API library calls that involve the dense matrix.\n  It should be destroyed at the end using rocsparse_destroy_dnmat_descr()."]
+pub type rocsparse_const_dnmat_descr = *const _rocsparse_dnmat_descr;
 #[repr(C)]
 #[derive(Copy, Clone)]
 pub struct _rocsparse_color_info {
@@ -99,7 +107,7 @@ impl rocsparse_operation_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief Specify whether the matrix is to be transposed or not.\n\n  \\details\n  The \\ref rocsparse_operation indicates the operation performed with the given matrix."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_operation_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_operation_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief Specify whether the matrix is to be transposed or not.\n\n  \\details\n  The \\ref rocsparse_operation indicates the operation performed with the given matrix."]
 pub use self::rocsparse_operation_ as rocsparse_operation;
 impl rocsparse_index_base_ {
@@ -113,7 +121,7 @@ impl rocsparse_index_base_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief Specify the matrix index base.\n\n  \\details\n  The \\ref rocsparse_index_base indicates the index base of the indices. For a\n  given \\ref rocsparse_mat_descr, the \\ref rocsparse_index_base can be set using\n  rocsparse_set_mat_index_base(). The current \\ref rocsparse_index_base of a matrix\n  can be obtained by rocsparse_get_mat_index_base()."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_index_base_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_index_base_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief Specify the matrix index base.\n\n  \\details\n  The \\ref rocsparse_index_base indicates the index base of the indices. For a\n  given \\ref rocsparse_mat_descr, the \\ref rocsparse_index_base can be set using\n  rocsparse_set_mat_index_base(). The current \\ref rocsparse_index_base of a matrix\n  can be obtained by rocsparse_get_mat_index_base()."]
 pub use self::rocsparse_index_base_ as rocsparse_index_base;
 impl rocsparse_matrix_type_ {
@@ -135,7 +143,7 @@ impl rocsparse_matrix_type_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief Specify the matrix type.\n\n  \\details\n  The \\ref rocsparse_matrix_type indices the type of a matrix. For a given\n  \\ref rocsparse_mat_descr, the \\ref rocsparse_matrix_type can be set using\n  rocsparse_set_mat_type(). The current \\ref rocsparse_matrix_type of a matrix can be\n  obtained by rocsparse_get_mat_type()."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_matrix_type_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_matrix_type_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief Specify the matrix type.\n\n  \\details\n  The \\ref rocsparse_matrix_type indices the type of a matrix. For a given\n  \\ref rocsparse_mat_descr, the \\ref rocsparse_matrix_type can be set using\n  rocsparse_set_mat_type(). The current \\ref rocsparse_matrix_type of a matrix can be\n  obtained by rocsparse_get_mat_type()."]
 pub use self::rocsparse_matrix_type_ as rocsparse_matrix_type;
 impl rocsparse_diag_type_ {
@@ -149,7 +157,7 @@ impl rocsparse_diag_type_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief Indicates if the diagonal entries are unity.\n\n  \\details\n  The \\ref rocsparse_diag_type indicates whether the diagonal entries of a matrix are\n  unity or not. If \\ref rocsparse_diag_type_unit is specified, all present diagonal\n  values will be ignored. For a given \\ref rocsparse_mat_descr, the\n  \\ref rocsparse_diag_type can be set using rocsparse_set_mat_diag_type(). The current\n  \\ref rocsparse_diag_type of a matrix can be obtained by\n  rocsparse_get_mat_diag_type()."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_diag_type_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_diag_type_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief Indicates if the diagonal entries are unity.\n\n  \\details\n  The \\ref rocsparse_diag_type indicates whether the diagonal entries of a matrix are\n  unity or not. If \\ref rocsparse_diag_type_unit is specified, all present diagonal\n  values will be ignored. For a given \\ref rocsparse_mat_descr, the\n  \\ref rocsparse_diag_type can be set using rocsparse_set_mat_diag_type(). The current\n  \\ref rocsparse_diag_type of a matrix can be obtained by\n  rocsparse_get_mat_diag_type()."]
 pub use self::rocsparse_diag_type_ as rocsparse_diag_type;
 impl rocsparse_fill_mode_ {
@@ -163,7 +171,7 @@ impl rocsparse_fill_mode_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief Specify the matrix fill mode.\n\n  \\details\n  The \\ref rocsparse_fill_mode indicates whether the lower or the upper part is stored\n  in a sparse triangular matrix. For a given \\ref rocsparse_mat_descr, the\n  \\ref rocsparse_fill_mode can be set using rocsparse_set_mat_fill_mode(). The current\n  \\ref rocsparse_fill_mode of a matrix can be obtained by\n  rocsparse_get_mat_fill_mode()."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_fill_mode_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_fill_mode_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief Specify the matrix fill mode.\n\n  \\details\n  The \\ref rocsparse_fill_mode indicates whether the lower or the upper part is stored\n  in a sparse triangular matrix. For a given \\ref rocsparse_mat_descr, the\n  \\ref rocsparse_fill_mode can be set using rocsparse_set_mat_fill_mode(). The current\n  \\ref rocsparse_fill_mode of a matrix can be obtained by\n  rocsparse_get_mat_fill_mode()."]
 pub use self::rocsparse_fill_mode_ as rocsparse_fill_mode;
 impl rocsparse_storage_mode_ {
@@ -175,10 +183,10 @@ impl rocsparse_storage_mode_ {
     pub const rocsparse_storage_mode_unsorted: rocsparse_storage_mode_ = rocsparse_storage_mode_(1);
 }
 #[repr(transparent)]
-#[doc = " \\ingroup types_module\n  \\brief Specify whether the matrix is stored sorted or not.\n\n  \\details\n  The \\ref rocsparse_storage_mode indicates whether the matrix is stored sorted or not.\n  For a given \\ref rocsparse_mat_descr, the \\ref rocsparse_storage_mode can be set\n  using rocsparse_set_storage_mode(). The current \\ref rocsparse_storage_mode of a\n  matrix can be obtained by rocsparse_get_mat_storage_mode()."]
+#[doc = " \\ingroup types_module\n  \\brief Specify whether the matrix is stored sorted or not.\n\n  \\details\n  The \\ref rocsparse_storage_mode indicates whether the matrix is stored sorted or not.\n  For a given \\ref rocsparse_mat_descr, the \\ref rocsparse_storage_mode can be set\n  using rocsparse_set_mat_storage_mode(). The current \\ref rocsparse_storage_mode of a\n  matrix can be obtained by rocsparse_get_mat_storage_mode()."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_storage_mode_(pub ::std::os::raw::c_uint);
-#[doc = " \\ingroup types_module\n  \\brief Specify whether the matrix is stored sorted or not.\n\n  \\details\n  The \\ref rocsparse_storage_mode indicates whether the matrix is stored sorted or not.\n  For a given \\ref rocsparse_mat_descr, the \\ref rocsparse_storage_mode can be set\n  using rocsparse_set_storage_mode(). The current \\ref rocsparse_storage_mode of a\n  matrix can be obtained by rocsparse_get_mat_storage_mode()."]
+pub struct rocsparse_storage_mode_(pub ::std::os::raw::c_int);
+#[doc = " \\ingroup types_module\n  \\brief Specify whether the matrix is stored sorted or not.\n\n  \\details\n  The \\ref rocsparse_storage_mode indicates whether the matrix is stored sorted or not.\n  For a given \\ref rocsparse_mat_descr, the \\ref rocsparse_storage_mode can be set\n  using rocsparse_set_mat_storage_mode(). The current \\ref rocsparse_storage_mode of a\n  matrix can be obtained by rocsparse_get_mat_storage_mode()."]
 pub use self::rocsparse_storage_mode_ as rocsparse_storage_mode;
 impl rocsparse_action_ {
     #[doc = "< Operate only on indices."]
@@ -191,7 +199,7 @@ impl rocsparse_action_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief Specify where the operation is performed on.\n\n  \\details\n  The \\ref rocsparse_action indicates whether the operation is performed on the full\n  matrix, or only on the sparsity pattern of the matrix."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_action_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_action_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief Specify where the operation is performed on.\n\n  \\details\n  The \\ref rocsparse_action indicates whether the operation is performed on the full\n  matrix, or only on the sparsity pattern of the matrix."]
 pub use self::rocsparse_action_ as rocsparse_action;
 impl rocsparse_direction_ {
@@ -205,7 +213,7 @@ impl rocsparse_direction_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief Specify the matrix direction.\n\n  \\details\n  The \\ref rocsparse_direction indicates whether a dense matrix should be parsed by\n  rows or by columns, assuming column-major storage."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_direction_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_direction_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief Specify the matrix direction.\n\n  \\details\n  The \\ref rocsparse_direction indicates whether a dense matrix should be parsed by\n  rows or by columns, assuming column-major storage."]
 pub use self::rocsparse_direction_ as rocsparse_direction;
 impl rocsparse_hyb_partition_ {
@@ -223,7 +231,7 @@ impl rocsparse_hyb_partition_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief HYB matrix partitioning type.\n\n  \\details\n  The \\ref rocsparse_hyb_partition type indicates how the hybrid format partitioning\n  between COO and ELL storage formats is performed."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_hyb_partition_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_hyb_partition_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief HYB matrix partitioning type.\n\n  \\details\n  The \\ref rocsparse_hyb_partition type indicates how the hybrid format partitioning\n  between COO and ELL storage formats is performed."]
 pub use self::rocsparse_hyb_partition_ as rocsparse_hyb_partition;
 impl rocsparse_analysis_policy_ {
@@ -239,7 +247,7 @@ impl rocsparse_analysis_policy_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief Specify policy in analysis functions.\n\n  \\details\n  The \\ref rocsparse_analysis_policy specifies whether gathered analysis data should be\n  re-used or not. If meta data from a previous e.g. rocsparse_csrilu0_analysis() call\n  is available, it can be re-used for subsequent calls to e.g.\n  rocsparse_csrsv_analysis() and greatly improve performance of the analysis function."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_analysis_policy_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_analysis_policy_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief Specify policy in analysis functions.\n\n  \\details\n  The \\ref rocsparse_analysis_policy specifies whether gathered analysis data should be\n  re-used or not. If meta data from a previous e.g. rocsparse_csrilu0_analysis() call\n  is available, it can be re-used for subsequent calls to e.g.\n  rocsparse_csrsv_analysis() and greatly improve performance of the analysis function."]
 pub use self::rocsparse_analysis_policy_ as rocsparse_analysis_policy;
 impl rocsparse_solve_policy_ {
@@ -249,7 +257,7 @@ impl rocsparse_solve_policy_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief Specify policy in triangular solvers and factorizations.\n\n  \\details\n  This is a placeholder."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_solve_policy_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_solve_policy_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief Specify policy in triangular solvers and factorizations.\n\n  \\details\n  This is a placeholder."]
 pub use self::rocsparse_solve_policy_ as rocsparse_solve_policy;
 impl rocsparse_pointer_mode_ {
@@ -263,7 +271,7 @@ impl rocsparse_pointer_mode_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief Indicates if the pointer is device pointer or host pointer.\n\n  \\details\n  The \\ref rocsparse_pointer_mode indicates whether scalar values are passed by\n  reference on the host or device. The \\ref rocsparse_pointer_mode can be changed by\n  rocsparse_set_pointer_mode(). The currently used pointer mode can be obtained by\n  rocsparse_get_pointer_mode()."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_pointer_mode_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_pointer_mode_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief Indicates if the pointer is device pointer or host pointer.\n\n  \\details\n  The \\ref rocsparse_pointer_mode indicates whether scalar values are passed by\n  reference on the host or device. The \\ref rocsparse_pointer_mode can be changed by\n  rocsparse_set_pointer_mode(). The currently used pointer mode can be obtained by\n  rocsparse_get_pointer_mode()."]
 pub use self::rocsparse_pointer_mode_ as rocsparse_pointer_mode;
 impl rocsparse_status_ {
@@ -318,10 +326,18 @@ impl rocsparse_status_ {
     #[doc = "< sorted storage required."]
     pub const rocsparse_status_requires_sorted_storage: rocsparse_status_ = rocsparse_status_(12);
 }
+impl rocsparse_status_ {
+    #[doc = "< exception being thrown."]
+    pub const rocsparse_status_thrown_exception: rocsparse_status_ = rocsparse_status_(13);
+}
+impl rocsparse_status_ {
+    #[doc = "< Nothing preventing function to proceed"]
+    pub const rocsparse_status_continue: rocsparse_status_ = rocsparse_status_(14);
+}
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief List of rocsparse status codes definition.\n\n  \\details\n  This is a list of the \\ref rocsparse_status types that are used by the rocSPARSE\n  library."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_status_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_status_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief List of rocsparse status codes definition.\n\n  \\details\n  This is a list of the \\ref rocsparse_status types that are used by the rocSPARSE\n  library."]
 pub use self::rocsparse_status_ as rocsparse_status;
 impl rocsparse_data_status_ {
@@ -364,7 +380,7 @@ impl rocsparse_data_status_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief List of rocsparse data status codes definition.\n\n  \\details\n  This is a list of the \\ref rocsparse_data_status types that are used by the rocSPARSE\n  library in the matrix check routines."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_data_status_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_data_status_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief List of rocsparse data status codes definition.\n\n  \\details\n  This is a list of the \\ref rocsparse_data_status types that are used by the rocSPARSE\n  library in the matrix check routines."]
 pub use self::rocsparse_data_status_ as rocsparse_data_status;
 impl rocsparse_indextype_ {
@@ -382,7 +398,7 @@ impl rocsparse_indextype_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief List of rocsparse index types.\n\n  \\details\n  Indicates the index width of a rocsparse index type."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_indextype_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_indextype_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief List of rocsparse index types.\n\n  \\details\n  Indicates the index width of a rocsparse index type."]
 pub use self::rocsparse_indextype_ as rocsparse_indextype;
 impl rocsparse_datatype_ {
@@ -420,7 +436,7 @@ impl rocsparse_datatype_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief List of rocsparse data types.\n\n  \\details\n  Indicates the precision width of data stored in a rocsparse type."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_datatype_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_datatype_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief List of rocsparse data types.\n\n  \\details\n  Indicates the precision width of data stored in a rocsparse type."]
 pub use self::rocsparse_datatype_ as rocsparse_datatype;
 impl rocsparse_format_ {
@@ -454,7 +470,7 @@ impl rocsparse_format_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief List of sparse matrix formats.\n\n  \\details\n  This is a list of supported \\ref rocsparse_format types that are used to describe a\n  sparse matrix."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_format_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_format_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief List of sparse matrix formats.\n\n  \\details\n  This is a list of supported \\ref rocsparse_format types that are used to describe a\n  sparse matrix."]
 pub use self::rocsparse_format_ as rocsparse_format;
 impl rocsparse_order_ {
@@ -468,7 +484,7 @@ impl rocsparse_order_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief List of dense matrix ordering.\n\n  \\details\n  This is a list of supported \\ref rocsparse_order types that are used to describe the\n  memory layout of a dense matrix"]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_order_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_order_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief List of dense matrix ordering.\n\n  \\details\n  This is a list of supported \\ref rocsparse_order types that are used to describe the\n  memory layout of a dense matrix"]
 pub use self::rocsparse_order_ as rocsparse_order;
 impl rocsparse_spmat_attribute_ {
@@ -492,9 +508,35 @@ impl rocsparse_spmat_attribute_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief List of sparse matrix attributes"]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_spmat_attribute_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_spmat_attribute_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief List of sparse matrix attributes"]
 pub use self::rocsparse_spmat_attribute_ as rocsparse_spmat_attribute;
+impl rocsparse_sparse_to_sparse_alg_ {
+    pub const rocsparse_sparse_to_sparse_alg_default: rocsparse_sparse_to_sparse_alg_ =
+        rocsparse_sparse_to_sparse_alg_(0);
+}
+#[repr(transparent)]
+#[doc = " \\ingroup types_module\n  \\brief List of sparse to sparse algorithms.\n\n  \\details\n  This is a list of supported \\ref rocsparse_sparse_to_sparse_alg types that are used to perform\n  sparse to sparse conversion."]
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocsparse_sparse_to_sparse_alg_(pub ::std::os::raw::c_int);
+#[doc = " \\ingroup types_module\n  \\brief List of sparse to sparse algorithms.\n\n  \\details\n  This is a list of supported \\ref rocsparse_sparse_to_sparse_alg types that are used to perform\n  sparse to sparse conversion."]
+pub use self::rocsparse_sparse_to_sparse_alg_ as rocsparse_sparse_to_sparse_alg;
+impl rocsparse_sparse_to_sparse_stage_ {
+    #[doc = "< Data analysis."]
+    pub const rocsparse_sparse_to_sparse_stage_analysis: rocsparse_sparse_to_sparse_stage_ =
+        rocsparse_sparse_to_sparse_stage_(0);
+}
+impl rocsparse_sparse_to_sparse_stage_ {
+    #[doc = "< Performs the actual conversion."]
+    pub const rocsparse_sparse_to_sparse_stage_compute: rocsparse_sparse_to_sparse_stage_ =
+        rocsparse_sparse_to_sparse_stage_(1);
+}
+#[repr(transparent)]
+#[doc = " \\ingroup types_module\n  \\brief List of sparse_to_sparse stages.\n\n  \\details\n  This is a list of possible stages during sparse_to_sparse conversion. Typical order is\n  rocsparse_sparse_to_sparse_buffer_size, rocsparse_sparse_to_sparse_preprocess, rocsparse_sparse_to_sparse_compute."]
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocsparse_sparse_to_sparse_stage_(pub ::std::os::raw::c_int);
+#[doc = " \\ingroup types_module\n  \\brief List of sparse_to_sparse stages.\n\n  \\details\n  This is a list of possible stages during sparse_to_sparse conversion. Typical order is\n  rocsparse_sparse_to_sparse_buffer_size, rocsparse_sparse_to_sparse_preprocess, rocsparse_sparse_to_sparse_compute."]
+pub use self::rocsparse_sparse_to_sparse_stage_ as rocsparse_sparse_to_sparse_stage;
 impl rocsparse_itilu0_alg_ {
     #[doc = "< ASynchronous ITILU0 algorithm with in-place storage"]
     pub const rocsparse_itilu0_alg_default: rocsparse_itilu0_alg_ = rocsparse_itilu0_alg_(0);
@@ -515,7 +557,7 @@ impl rocsparse_itilu0_alg_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief List of Iterative ILU0 algorithms.\n\n  \\details\n  This is a list of supported \\ref rocsparse_itilu0_alg types that are used to perform\n  the iterative ILU0 algorithm."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_itilu0_alg_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_itilu0_alg_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief List of Iterative ILU0 algorithms.\n\n  \\details\n  This is a list of supported \\ref rocsparse_itilu0_alg types that are used to perform\n  the iterative ILU0 algorithm."]
 pub use self::rocsparse_itilu0_alg_ as rocsparse_itilu0_alg;
 impl rocsparse_gtsv_interleaved_alg_ {
@@ -537,13 +579,25 @@ impl rocsparse_gtsv_interleaved_alg_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief List of interleaved gtsv algorithms.\n\n  \\details\n  This is a list of supported \\ref rocsparse_gtsv_interleaved_alg types that are used to perform\n  interleaved tridiagonal solve."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_gtsv_interleaved_alg_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_gtsv_interleaved_alg_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief List of interleaved gtsv algorithms.\n\n  \\details\n  This is a list of supported \\ref rocsparse_gtsv_interleaved_alg types that are used to perform\n  interleaved tridiagonal solve."]
 pub use self::rocsparse_gtsv_interleaved_alg_ as rocsparse_gtsv_interleaved_alg;
-impl rocsparse_spmv_stage_ {
-    #[doc = "< Automatic stage detection."]
-    pub const rocsparse_spmv_stage_auto: rocsparse_spmv_stage_ = rocsparse_spmv_stage_(0);
+impl rocsparse_check_spmat_stage_ {
+    #[doc = "< Returns the required buffer size."]
+    pub const rocsparse_check_spmat_stage_buffer_size: rocsparse_check_spmat_stage_ =
+        rocsparse_check_spmat_stage_(0);
 }
+impl rocsparse_check_spmat_stage_ {
+    #[doc = "< Performs check."]
+    pub const rocsparse_check_spmat_stage_compute: rocsparse_check_spmat_stage_ =
+        rocsparse_check_spmat_stage_(1);
+}
+#[repr(transparent)]
+#[doc = " \\ingroup types_module\n  \\brief List of check_matrix stages.\n\n  \\details\n  This is a list of possible stages during check_matrix computation. Typical order is\n  rocsparse_check_spmat_stage_buffer_size, rocsparse_check_spmat_stage_compute."]
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocsparse_check_spmat_stage_(pub ::std::os::raw::c_int);
+#[doc = " \\ingroup types_module\n  \\brief List of check_matrix stages.\n\n  \\details\n  This is a list of possible stages during check_matrix computation. Typical order is\n  rocsparse_check_spmat_stage_buffer_size, rocsparse_check_spmat_stage_compute."]
+pub use self::rocsparse_check_spmat_stage_ as rocsparse_check_spmat_stage;
 impl rocsparse_spmv_stage_ {
     #[doc = "< Returns the required buffer size."]
     pub const rocsparse_spmv_stage_buffer_size: rocsparse_spmv_stage_ = rocsparse_spmv_stage_(1);
@@ -559,7 +613,7 @@ impl rocsparse_spmv_stage_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief List of SpMV stages.\n\n  \\details\n  This is a list of possible stages during SpMV computation. Typical order is\n  rocsparse_spmv_buffer_size, rocsparse_spmv_preprocess, rocsparse_spmv_compute."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_spmv_stage_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_spmv_stage_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief List of SpMV stages.\n\n  \\details\n  This is a list of possible stages during SpMV computation. Typical order is\n  rocsparse_spmv_buffer_size, rocsparse_spmv_preprocess, rocsparse_spmv_compute."]
 pub use self::rocsparse_spmv_stage_ as rocsparse_spmv_stage;
 impl rocsparse_spmv_alg_ {
@@ -590,10 +644,14 @@ impl rocsparse_spmv_alg_ {
     #[doc = "< BSR SpMV algorithm 1 for BSR matrices."]
     pub const rocsparse_spmv_alg_bsr: rocsparse_spmv_alg_ = rocsparse_spmv_alg_(6);
 }
+impl rocsparse_spmv_alg_ {
+    #[doc = "< CSR SpMV algorithm 3 (LRB) for CSR matrices."]
+    pub const rocsparse_spmv_alg_csr_lrb: rocsparse_spmv_alg_ = rocsparse_spmv_alg_(7);
+}
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief List of SpMV algorithms.\n\n  \\details\n  This is a list of supported \\ref rocsparse_spmv_alg types that are used to perform\n  matrix vector product."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_spmv_alg_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_spmv_alg_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief List of SpMV algorithms.\n\n  \\details\n  This is a list of supported \\ref rocsparse_spmv_alg types that are used to perform\n  matrix vector product."]
 pub use self::rocsparse_spmv_alg_ as rocsparse_spmv_alg;
 impl rocsparse_spsv_alg_ {
@@ -603,13 +661,9 @@ impl rocsparse_spsv_alg_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief List of SpSV algorithms.\n\n  \\details\n  This is a list of supported \\ref rocsparse_spsv_alg types that are used to perform\n  triangular solve."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_spsv_alg_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_spsv_alg_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief List of SpSV algorithms.\n\n  \\details\n  This is a list of supported \\ref rocsparse_spsv_alg types that are used to perform\n  triangular solve."]
 pub use self::rocsparse_spsv_alg_ as rocsparse_spsv_alg;
-impl rocsparse_spsv_stage_ {
-    #[doc = "< Automatic stage detection."]
-    pub const rocsparse_spsv_stage_auto: rocsparse_spsv_stage_ = rocsparse_spsv_stage_(0);
-}
 impl rocsparse_spsv_stage_ {
     #[doc = "< Returns the required buffer size."]
     pub const rocsparse_spsv_stage_buffer_size: rocsparse_spsv_stage_ = rocsparse_spsv_stage_(1);
@@ -625,7 +679,7 @@ impl rocsparse_spsv_stage_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief List of SpSV stages.\n\n  \\details\n  This is a list of possible stages during SpSV computation. Typical order is\n  rocsparse_spsv_buffer_size, rocsparse_spsv_preprocess, rocsparse_spsv_compute."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_spsv_stage_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_spsv_stage_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief List of SpSV stages.\n\n  \\details\n  This is a list of possible stages during SpSV computation. Typical order is\n  rocsparse_spsv_buffer_size, rocsparse_spsv_preprocess, rocsparse_spsv_compute."]
 pub use self::rocsparse_spsv_stage_ as rocsparse_spsv_stage;
 impl rocsparse_spitsv_alg_ {
@@ -635,13 +689,9 @@ impl rocsparse_spitsv_alg_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief List of SpITSV algorithms.\n\n  \\details\n  This is a list of supported \\ref rocsparse_spitsv_alg types that are used to perform\n  triangular solve."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_spitsv_alg_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_spitsv_alg_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief List of SpITSV algorithms.\n\n  \\details\n  This is a list of supported \\ref rocsparse_spitsv_alg types that are used to perform\n  triangular solve."]
 pub use self::rocsparse_spitsv_alg_ as rocsparse_spitsv_alg;
-impl rocsparse_spitsv_stage_ {
-    #[doc = "< Automatic stage detection."]
-    pub const rocsparse_spitsv_stage_auto: rocsparse_spitsv_stage_ = rocsparse_spitsv_stage_(0);
-}
 impl rocsparse_spitsv_stage_ {
     #[doc = "< Returns the required buffer size."]
     pub const rocsparse_spitsv_stage_buffer_size: rocsparse_spitsv_stage_ =
@@ -659,7 +709,7 @@ impl rocsparse_spitsv_stage_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief List of SpITSV stages.\n\n  \\details\n  This is a list of possible stages during SpITSV computation. Typical order is\n  buffer_size, preprocess, compute."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_spitsv_stage_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_spitsv_stage_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief List of SpITSV stages.\n\n  \\details\n  This is a list of possible stages during SpITSV computation. Typical order is\n  buffer_size, preprocess, compute."]
 pub use self::rocsparse_spitsv_stage_ as rocsparse_spitsv_stage;
 impl rocsparse_spsm_alg_ {
@@ -669,13 +719,9 @@ impl rocsparse_spsm_alg_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief List of SpSM algorithms.\n\n  \\details\n  This is a list of supported \\ref rocsparse_spsm_alg types that are used to perform\n  triangular solve."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_spsm_alg_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_spsm_alg_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief List of SpSM algorithms.\n\n  \\details\n  This is a list of supported \\ref rocsparse_spsm_alg types that are used to perform\n  triangular solve."]
 pub use self::rocsparse_spsm_alg_ as rocsparse_spsm_alg;
-impl rocsparse_spsm_stage_ {
-    #[doc = "< Automatic stage detection."]
-    pub const rocsparse_spsm_stage_auto: rocsparse_spsm_stage_ = rocsparse_spsm_stage_(0);
-}
 impl rocsparse_spsm_stage_ {
     #[doc = "< Returns the required buffer size."]
     pub const rocsparse_spsm_stage_buffer_size: rocsparse_spsm_stage_ = rocsparse_spsm_stage_(1);
@@ -691,7 +737,7 @@ impl rocsparse_spsm_stage_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief List of SpSM stages.\n\n  \\details\n  This is a list of possible stages during SpSM computation. Typical order is\n  rocsparse_spsm_buffer_size, rocsparse_spsm_preprocess, rocsparse_spsm_compute."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_spsm_stage_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_spsm_stage_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief List of SpSM stages.\n\n  \\details\n  This is a list of possible stages during SpSM computation. Typical order is\n  rocsparse_spsm_buffer_size, rocsparse_spsm_preprocess, rocsparse_spsm_compute."]
 pub use self::rocsparse_spsm_stage_ as rocsparse_spsm_stage;
 impl rocsparse_spmm_alg_ {
@@ -733,17 +779,21 @@ impl rocsparse_spmm_alg_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief List of SpMM algorithms.\n\n  \\details\n  This is a list of supported \\ref rocsparse_spmm_alg types that are used to perform\n  matrix vector product."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_spmm_alg_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_spmm_alg_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief List of SpMM algorithms.\n\n  \\details\n  This is a list of supported \\ref rocsparse_spmm_alg types that are used to perform\n  matrix vector product."]
 pub use self::rocsparse_spmm_alg_ as rocsparse_spmm_alg;
 impl rocsparse_sddmm_alg_ {
     #[doc = "< Default sddmm algorithm for the given format."]
     pub const rocsparse_sddmm_alg_default: rocsparse_sddmm_alg_ = rocsparse_sddmm_alg_(0);
 }
+impl rocsparse_sddmm_alg_ {
+    #[doc = "< Sddmm algorithm using dense blas operations."]
+    pub const rocsparse_sddmm_alg_dense: rocsparse_sddmm_alg_ = rocsparse_sddmm_alg_(1);
+}
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief List of sddmm algorithms.\n\n  \\details\n  This is a list of supported \\ref rocsparse_sddmm_alg types that are used to perform\n  matrix vector product."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_sddmm_alg_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_sddmm_alg_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief List of sddmm algorithms.\n\n  \\details\n  This is a list of supported \\ref rocsparse_sddmm_alg types that are used to perform\n  matrix vector product."]
 pub use self::rocsparse_sddmm_alg_ as rocsparse_sddmm_alg;
 impl rocsparse_sparse_to_dense_alg_ {
@@ -753,7 +803,7 @@ impl rocsparse_sparse_to_dense_alg_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief List of sparse to dense algorithms.\n\n  \\details\n  This is a list of supported \\ref rocsparse_sparse_to_dense_alg types that are used to perform\n  sparse to dense conversion."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_sparse_to_dense_alg_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_sparse_to_dense_alg_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief List of sparse to dense algorithms.\n\n  \\details\n  This is a list of supported \\ref rocsparse_sparse_to_dense_alg types that are used to perform\n  sparse to dense conversion."]
 pub use self::rocsparse_sparse_to_dense_alg_ as rocsparse_sparse_to_dense_alg;
 impl rocsparse_dense_to_sparse_alg_ {
@@ -763,13 +813,9 @@ impl rocsparse_dense_to_sparse_alg_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief List of dense to sparse algorithms.\n\n  \\details\n  This is a list of supported \\ref rocsparse_dense_to_sparse_alg types that are used to perform\n  dense to sparse conversion."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_dense_to_sparse_alg_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_dense_to_sparse_alg_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief List of dense to sparse algorithms.\n\n  \\details\n  This is a list of supported \\ref rocsparse_dense_to_sparse_alg types that are used to perform\n  dense to sparse conversion."]
 pub use self::rocsparse_dense_to_sparse_alg_ as rocsparse_dense_to_sparse_alg;
-impl rocsparse_spmm_stage_ {
-    #[doc = "< Automatic stage detection."]
-    pub const rocsparse_spmm_stage_auto: rocsparse_spmm_stage_ = rocsparse_spmm_stage_(0);
-}
 impl rocsparse_spmm_stage_ {
     #[doc = "< Returns the required buffer size."]
     pub const rocsparse_spmm_stage_buffer_size: rocsparse_spmm_stage_ = rocsparse_spmm_stage_(1);
@@ -785,13 +831,9 @@ impl rocsparse_spmm_stage_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief List of SpMM stages.\n\n  \\details\n  This is a list of possible stages during SpMM computation. Typical order is\n  rocsparse_spmm_buffer_size, rocsparse_spmm_preprocess, rocsparse_spmm_compute."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_spmm_stage_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_spmm_stage_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief List of SpMM stages.\n\n  \\details\n  This is a list of possible stages during SpMM computation. Typical order is\n  rocsparse_spmm_buffer_size, rocsparse_spmm_preprocess, rocsparse_spmm_compute."]
 pub use self::rocsparse_spmm_stage_ as rocsparse_spmm_stage;
-impl rocsparse_spgemm_stage_ {
-    #[doc = "< Automatic stage detection."]
-    pub const rocsparse_spgemm_stage_auto: rocsparse_spgemm_stage_ = rocsparse_spgemm_stage_(0);
-}
 impl rocsparse_spgemm_stage_ {
     #[doc = "< Returns the required buffer size."]
     pub const rocsparse_spgemm_stage_buffer_size: rocsparse_spgemm_stage_ =
@@ -816,7 +858,7 @@ impl rocsparse_spgemm_stage_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief List of SpGEMM stages.\n\n  \\details\n  This is a list of possible stages during SpGEMM computation. Typical order is\n  rocsparse_spgemm_buffer_size, rocsparse_spgemm_nnz, rocsparse_spgemm_compute."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_spgemm_stage_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_spgemm_stage_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief List of SpGEMM stages.\n\n  \\details\n  This is a list of possible stages during SpGEMM computation. Typical order is\n  rocsparse_spgemm_buffer_size, rocsparse_spgemm_nnz, rocsparse_spgemm_compute."]
 pub use self::rocsparse_spgemm_stage_ as rocsparse_spgemm_stage;
 impl rocsparse_spgemm_alg_ {
@@ -826,7 +868,7 @@ impl rocsparse_spgemm_alg_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief List of SpGEMM algorithms.\n\n  \\details\n  This is a list of supported \\ref rocsparse_spgemm_alg types that are used to perform\n  sparse matrix sparse matrix product."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_spgemm_alg_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_spgemm_alg_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief List of SpGEMM algorithms.\n\n  \\details\n  This is a list of supported \\ref rocsparse_spgemm_alg types that are used to perform\n  sparse matrix sparse matrix product."]
 pub use self::rocsparse_spgemm_alg_ as rocsparse_spgemm_alg;
 impl rocsparse_gpsv_interleaved_alg_ {
@@ -842,7 +884,7 @@ impl rocsparse_gpsv_interleaved_alg_ {
 #[repr(transparent)]
 #[doc = " \\ingroup types_module\n  \\brief List of gpsv algorithms.\n\n  \\details\n  This is a list of supported \\ref rocsparse_gpsv_interleaved_alg types that are used to solve\n  pentadiagonal linear systems."]
 #[derive(Copy, Clone, Hash, PartialEq, Eq)]
-pub struct rocsparse_gpsv_interleaved_alg_(pub ::std::os::raw::c_uint);
+pub struct rocsparse_gpsv_interleaved_alg_(pub ::std::os::raw::c_int);
 #[doc = " \\ingroup types_module\n  \\brief List of gpsv algorithms.\n\n  \\details\n  This is a list of supported \\ref rocsparse_gpsv_interleaved_alg types that are used to solve\n  pentadiagonal linear systems."]
 pub use self::rocsparse_gpsv_interleaved_alg_ as rocsparse_gpsv_interleaved_alg;
 extern "C" {
@@ -855,6 +897,16 @@ extern "C" {
     #[doc = " \\ingroup aux_module\n  \\brief Destroy a rocsparse handle\n\n  \\details\n  \\p rocsparse_destroy_handle destroys the rocSPARSE library context and releases all\n  resources used by the rocSPARSE library.\n\n  @param[in]\n  handle  the handle to the rocSPARSE library context.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle \\p handle is invalid.\n  \\retval rocsparse_status_internal_error an internal error occurred."]
     pub fn rocsparse_destroy_handle(handle: rocsparse_handle) -> rocsparse_status;
 }
+extern "C" {
+    #[doc = " \\ingroup aux_module\n  \\brief Return the string representation of a rocSPARSE status code enum name\n\n  \\details\n  \\p rocsparse_get_status_name takes a rocSPARSE status as input and returns the string representation of this status.\n  If the status is not recognized, the function returns \"Unrecognized status code\"\n\n  @param[in]\n  status  a rocSPARSE status\n\n  \\retval pointer to null terminated string"]
+    pub fn rocsparse_get_status_name(status: rocsparse_status) -> *const ::std::os::raw::c_char;
+}
+extern "C" {
+    #[doc = " \\ingroup aux_module\n  \\brief Return the rocSPARSE status code description as a string\n\n  \\details\n  \\p rocsparse_get_status_description takes a rocSPARSE status as input and returns the status description as a string.\n  If the status is not recognized, the function returns \"Unrecognized status code\"\n\n  @param[in]\n  status  a rocSPARSE status\n\n  \\retval pointer to null terminated string"]
+    pub fn rocsparse_get_status_description(
+        status: rocsparse_status,
+    ) -> *const ::std::os::raw::c_char;
+}
 extern "C" {
     #[must_use]
     #[doc = " \\ingroup aux_module\n  \\brief Specify user defined HIP stream\n\n  \\details\n  \\p rocsparse_set_stream specifies the stream to be used by the rocSPARSE library\n  context and all subsequent function calls.\n\n  @param[inout]\n  handle  the handle to the rocSPARSE library context.\n  @param[in]\n  stream  the stream to be used by the rocSPARSE library context.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle \\p handle is invalid.\n\n  \\par Example\n  This example illustrates, how a user defined stream can be used in rocSPARSE.\n  \\code{.c}\n      // Create rocSPARSE handle\n      rocsparse_handle handle;\n      rocsparse_create_handle(&handle);\n\n      // Create stream\n      hipStream_t stream;\n      hipStreamCreate(&stream);\n\n      // Set stream to rocSPARSE handle\n      rocsparse_set_stream(handle, stream);\n\n      // Do some work\n      // ...\n\n      // Clean up\n      rocsparse_destroy_handle(handle);\n      hipStreamDestroy(stream);\n  \\endcode"]
@@ -1034,7 +1086,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup aux_module\n  \\brief Create a sparse vector descriptor\n  \\details\n  \\p rocsparse_create_spvec_descr creates a sparse vector descriptor. It should be\n  destroyed at the end using rocsparse_destroy_mat_descr().\n\n  @param[out]\n  descr   the pointer to the sparse vector descriptor.\n  @param[in]\n  size   size of the sparse vector.\n  @param[in]\n  nnz   number of non-zeros in sparse vector.\n  @param[in]\n  indices   indices of the sparse vector where non-zeros occur (must be array of length \\p nnz ).\n  @param[in]\n  values   non-zero values in the sparse vector (must be array of length \\p nnz ).\n  @param[in]\n  idx_type   \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[in]\n  idx_base   \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  data_type   \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n              \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p indices or \\p values is invalid.\n  \\retval rocsparse_status_invalid_size if \\p size or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_value if \\p idx_type or \\p idx_base or \\p data_type is invalid."]
+    #[doc = " \\ingroup aux_module\n  \\brief Create a sparse vector descriptor\n  \\details\n  \\p rocsparse_create_spvec_descr creates a sparse vector descriptor. It should be\n  destroyed at the end using rocsparse_destroy_mat_descr().\n\n  @param[out]\n  descr   the pointer to the sparse vector descriptor.\n  @param[in]\n  size   size of the sparse vector.\n  @param[in]\n  nnz   number of non-zeros in sparse vector.\n  @param[in]\n  indices   indices of the sparse vector where non-zeros occur (must be array of length \\p nnz ).\n  @param[in]\n  values   non-zero values in the sparse vector (must be array of length \\p nnz ).\n  @param[in]\n  idx_type   \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[in]\n  idx_base   \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  data_type   \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n              \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p indices or \\p values is invalid.\n  \\retval rocsparse_status_invalid_size if \\p size or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_value if \\p idx_type or \\p idx_base or \\p data_type is invalid.\n/\n/**@{"]
     pub fn rocsparse_create_spvec_descr(
         descr: *mut rocsparse_spvec_descr,
         size: i64,
@@ -1048,12 +1100,25 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup aux_module\n  \\brief Destroy a sparse vector descriptor\n\n  \\details\n  \\p rocsparse_destroy_spvec_descr destroys a sparse vector descriptor and releases all\n  resources used by the descriptor.\n\n  @param[in]\n  descr   the matrix descriptor.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer \\p descr is invalid."]
-    pub fn rocsparse_destroy_spvec_descr(descr: rocsparse_spvec_descr) -> rocsparse_status;
+    pub fn rocsparse_create_const_spvec_descr(
+        descr: *mut rocsparse_const_spvec_descr,
+        size: i64,
+        nnz: i64,
+        indices: *const ::std::os::raw::c_void,
+        values: *const ::std::os::raw::c_void,
+        idx_type: rocsparse_indextype,
+        idx_base: rocsparse_index_base,
+        data_type: rocsparse_datatype,
+    ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup aux_module\n  \\brief Get the fields of the sparse vector descriptor\n  \\details\n  \\p rocsparse_spvec_get gets the fields of the sparse vector descriptor\n\n  @param[in]\n  descr   the pointer to the sparse vector descriptor.\n  @param[out]\n  size   size of the sparse vector.\n  @param[out]\n  nnz   number of non-zeros in sparse vector.\n  @param[out]\n  indices   indices of the sparse vector where non-zeros occur (must be array of length \\p nnz ).\n  @param[out]\n  values   non-zero values in the sparse vector (must be array of length \\p nnz ).\n  @param[out]\n  idx_type   \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[out]\n  idx_base   \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[out]\n  data_type   \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n              \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p indices or \\p values is invalid.\n  \\retval rocsparse_status_invalid_size if \\p size or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_value if \\p idx_type or \\p idx_base or \\p data_type is invalid."]
+    #[doc = " \\ingroup aux_module\n  \\brief Destroy a sparse vector descriptor\n\n  \\details\n  \\p rocsparse_destroy_spvec_descr destroys a sparse vector descriptor and releases all\n  resources used by the descriptor.\n\n  @param[in]\n  descr   the matrix descriptor.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer \\p descr is invalid."]
+    pub fn rocsparse_destroy_spvec_descr(descr: rocsparse_const_spvec_descr) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup aux_module\n  \\brief Get the fields of the sparse vector descriptor\n  \\details\n  \\p rocsparse_spvec_get gets the fields of the sparse vector descriptor\n\n  @param[in]\n  descr   the pointer to the sparse vector descriptor.\n  @param[out]\n  size   size of the sparse vector.\n  @param[out]\n  nnz   number of non-zeros in sparse vector.\n  @param[out]\n  indices   indices of the sparse vector where non-zeros occur (must be array of length \\p nnz ).\n  @param[out]\n  values   non-zero values in the sparse vector (must be array of length \\p nnz ).\n  @param[out]\n  idx_type   \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[out]\n  idx_base   \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[out]\n  data_type   \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n              \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p indices or \\p values is invalid.\n  \\retval rocsparse_status_invalid_size if \\p size or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_value if \\p idx_type or \\p idx_base or \\p data_type is invalid.\n/\n/**@{"]
     pub fn rocsparse_spvec_get(
         descr: rocsparse_spvec_descr,
         size: *mut i64,
@@ -1065,22 +1130,42 @@ extern "C" {
         data_type: *mut rocsparse_datatype,
     ) -> rocsparse_status;
 }
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_const_spvec_get(
+        descr: rocsparse_const_spvec_descr,
+        size: *mut i64,
+        nnz: *mut i64,
+        indices: *mut *const ::std::os::raw::c_void,
+        values: *mut *const ::std::os::raw::c_void,
+        idx_type: *mut rocsparse_indextype,
+        idx_base: *mut rocsparse_index_base,
+        data_type: *mut rocsparse_datatype,
+    ) -> rocsparse_status;
+}
 extern "C" {
     #[must_use]
     #[doc = " \\ingroup aux_module\n  \\brief Get the index base stored in the sparse vector descriptor\n\n  @param[in]\n  descr   the pointer to the sparse vector descriptor.\n  @param[out]\n  idx_base   \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr is invalid.\n  \\retval rocsparse_status_invalid_value if \\p idx_base is invalid."]
     pub fn rocsparse_spvec_get_index_base(
-        descr: rocsparse_spvec_descr,
+        descr: rocsparse_const_spvec_descr,
         idx_base: *mut rocsparse_index_base,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup aux_module\n  \\brief Get the values array stored in the sparse vector descriptor\n\n  @param[in]\n  descr   the pointer to the sparse vector descriptor.\n  @param[out]\n  values   non-zero values in the sparse vector (must be array of length \\p nnz ).\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p values is invalid."]
+    #[doc = " \\ingroup aux_module\n  \\brief Get the values array stored in the sparse vector descriptor\n\n  @param[in]\n  descr   the pointer to the sparse vector descriptor.\n  @param[out]\n  values   non-zero values in the sparse vector (must be array of length \\p nnz ).\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p values is invalid.\n/\n/**@{"]
     pub fn rocsparse_spvec_get_values(
         descr: rocsparse_spvec_descr,
         values: *mut *mut ::std::os::raw::c_void,
     ) -> rocsparse_status;
 }
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_const_spvec_get_values(
+        descr: rocsparse_const_spvec_descr,
+        values: *mut *const ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
 extern "C" {
     #[must_use]
     #[doc = " \\ingroup aux_module\n  \\brief Set the values array in the sparse vector descriptor\n\n  @param[inout]\n  descr   the pointer to the sparse vector descriptor.\n  @param[in]\n  values   non-zero values in the sparse vector (must be array of length \\p nnz ).\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p values is invalid."]
@@ -1091,7 +1176,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup aux_module\n  \\brief Create a sparse COO matrix descriptor\n  \\details\n  \\p rocsparse_create_coo_descr creates a sparse COO matrix descriptor. It should be\n  destroyed at the end using \\p rocsparse_destroy_spmat_descr.\n\n  @param[out]\n  descr       the pointer to the sparse COO matrix descriptor.\n  @param[in]\n  rows        number of rows in the COO matrix.\n  @param[in]\n  cols        number of columns in the COO matrix\n  @param[in]\n  nnz         number of non-zeros in the COO matrix.\n  @param[in]\n  coo_row_ind row indices of the COO matrix (must be array of length \\p nnz ).\n  @param[in]\n  coo_col_ind column indices of the COO matrix (must be array of length \\p nnz ).\n  @param[in]\n  coo_val     values of the COO matrix (must be array of length \\p nnz ).\n  @param[in]\n  idx_type    \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  data_type   \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n              \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p coo_row_ind or \\p coo_col_ind or \\p coo_val is invalid.\n  \\retval rocsparse_status_invalid_size if \\p rows or \\p cols or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_value if \\p idx_type or \\p idx_base or \\p data_type is invalid."]
+    #[doc = " \\ingroup aux_module\n  \\brief Create a sparse COO matrix descriptor\n  \\details\n  \\p rocsparse_create_coo_descr creates a sparse COO matrix descriptor. It should be\n  destroyed at the end using \\p rocsparse_destroy_spmat_descr.\n\n  @param[out]\n  descr       the pointer to the sparse COO matrix descriptor.\n  @param[in]\n  rows        number of rows in the COO matrix.\n  @param[in]\n  cols        number of columns in the COO matrix\n  @param[in]\n  nnz         number of non-zeros in the COO matrix.\n  @param[in]\n  coo_row_ind row indices of the COO matrix (must be array of length \\p nnz ).\n  @param[in]\n  coo_col_ind column indices of the COO matrix (must be array of length \\p nnz ).\n  @param[in]\n  coo_val     values of the COO matrix (must be array of length \\p nnz ).\n  @param[in]\n  idx_type    \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  data_type   \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n              \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p coo_row_ind or \\p coo_col_ind or \\p coo_val is invalid.\n  \\retval rocsparse_status_invalid_size if \\p rows or \\p cols or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_value if \\p idx_type or \\p idx_base or \\p data_type is invalid.\n/\n/**@{"]
     pub fn rocsparse_create_coo_descr(
         descr: *mut rocsparse_spmat_descr,
         rows: i64,
@@ -1105,6 +1190,21 @@ extern "C" {
         data_type: rocsparse_datatype,
     ) -> rocsparse_status;
 }
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_create_const_coo_descr(
+        descr: *mut rocsparse_const_spmat_descr,
+        rows: i64,
+        cols: i64,
+        nnz: i64,
+        coo_row_ind: *const ::std::os::raw::c_void,
+        coo_col_ind: *const ::std::os::raw::c_void,
+        coo_val: *const ::std::os::raw::c_void,
+        idx_type: rocsparse_indextype,
+        idx_base: rocsparse_index_base,
+        data_type: rocsparse_datatype,
+    ) -> rocsparse_status;
+}
 extern "C" {
     #[must_use]
     #[doc = " \\ingroup aux_module\n  \\brief Create a sparse COO AoS matrix descriptor\n  \\details\n  \\p rocsparse_create_coo_aos_descr creates a sparse COO AoS matrix descriptor. It should be\n  destroyed at the end using \\p rocsparse_destroy_spmat_descr.\n\n  @param[out]\n  descr       the pointer to the sparse COO AoS matrix descriptor.\n  @param[in]\n  rows        number of rows in the COO AoS matrix.\n  @param[in]\n  cols        number of columns in the COO AoS matrix\n  @param[in]\n  nnz         number of non-zeros in the COO AoS matrix.\n  @param[in]\n  coo_ind     <row, column> indices of the COO AoS matrix (must be array of length \\p nnz ).\n  @param[in]\n  coo_val     values of the COO AoS matrix (must be array of length \\p nnz ).\n  @param[in]\n  idx_type    \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  data_type   \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n              \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p coo_ind or \\p coo_val is invalid.\n  \\retval rocsparse_status_invalid_size if \\p rows or \\p cols or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_value if \\p idx_type or \\p idx_base or \\p data_type is invalid."]
@@ -1141,7 +1241,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup aux_module\n  \\brief Create a sparse CSR matrix descriptor\n  \\details\n  \\p rocsparse_create_csr_descr creates a sparse CSR matrix descriptor. It should be\n  destroyed at the end using \\p rocsparse_destroy_spmat_descr.\n\n  @param[out]\n  descr        the pointer to the sparse CSR matrix descriptor.\n  @param[in]\n  rows         number of rows in the CSR matrix.\n  @param[in]\n  cols         number of columns in the CSR matrix\n  @param[in]\n  nnz          number of non-zeros in the CSR matrix.\n  @param[in]\n  csr_row_ptr  row offsets of the CSR matrix (must be array of length \\p rows+1 ).\n  @param[in]\n  csr_col_ind  column indices of the CSR matrix (must be array of length \\p nnz ).\n  @param[in]\n  csr_val      values of the CSR matrix (must be array of length \\p nnz ).\n  @param[in]\n  row_ptr_type \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[in]\n  col_ind_type \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[in]\n  idx_base     \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  data_type    \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n               \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p csr_row_ptr or \\p csr_col_ind or \\p csr_val is invalid.\n  \\retval rocsparse_status_invalid_size if \\p rows or \\p cols or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_value if \\p row_ptr_type or \\p col_ind_type or \\p idx_base or \\p data_type is invalid."]
+    #[doc = " \\ingroup aux_module\n  \\brief Create a sparse CSR matrix descriptor\n  \\details\n  \\p rocsparse_create_csr_descr creates a sparse CSR matrix descriptor. It should be\n  destroyed at the end using \\p rocsparse_destroy_spmat_descr.\n\n  @param[out]\n  descr        the pointer to the sparse CSR matrix descriptor.\n  @param[in]\n  rows         number of rows in the CSR matrix.\n  @param[in]\n  cols         number of columns in the CSR matrix\n  @param[in]\n  nnz          number of non-zeros in the CSR matrix.\n  @param[in]\n  csr_row_ptr  row offsets of the CSR matrix (must be array of length \\p rows+1 ).\n  @param[in]\n  csr_col_ind  column indices of the CSR matrix (must be array of length \\p nnz ).\n  @param[in]\n  csr_val      values of the CSR matrix (must be array of length \\p nnz ).\n  @param[in]\n  row_ptr_type \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[in]\n  col_ind_type \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[in]\n  idx_base     \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  data_type    \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n               \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p csr_row_ptr or \\p csr_col_ind or \\p csr_val is invalid.\n  \\retval rocsparse_status_invalid_size if \\p rows or \\p cols or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_value if \\p row_ptr_type or \\p col_ind_type or \\p idx_base or \\p data_type is invalid.\n/\n/**@{"]
     pub fn rocsparse_create_csr_descr(
         descr: *mut rocsparse_spmat_descr,
         rows: i64,
@@ -1158,7 +1258,23 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup aux_module\n  \\brief Create a sparse CSC matrix descriptor\n  \\details\n  \\p rocsparse_create_csc_descr creates a sparse CSC matrix descriptor. It should be\n  destroyed at the end using \\p rocsparse_destroy_spmat_descr.\n\n  @param[out]\n  descr       the pointer to the sparse CSC matrix descriptor.\n  @param[in]\n  rows         number of rows in the CSC matrix.\n  @param[in]\n  cols         number of columns in the CSC matrix\n  @param[in]\n  nnz          number of non-zeros in the CSC matrix.\n  @param[in]\n  csc_col_ptr  column offsets of the CSC matrix (must be array of length \\p cols+1 ).\n  @param[in]\n  csc_row_ind  row indices of the CSC matrix (must be array of length \\p nnz ).\n  @param[in]\n  csc_val      values of the CSC matrix (must be array of length \\p nnz ).\n  @param[in]\n  col_ptr_type \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[in]\n  row_ind_type \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[in]\n  idx_base     \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  data_type    \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n               \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p csc_col_ptr or \\p csc_row_ind or \\p csc_val is invalid.\n  \\retval rocsparse_status_invalid_size if \\p rows or \\p cols or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_value if \\p col_ptr_type or \\p row_ind_type or \\p idx_base or \\p data_type is invalid."]
+    pub fn rocsparse_create_const_csr_descr(
+        descr: *mut rocsparse_const_spmat_descr,
+        rows: i64,
+        cols: i64,
+        nnz: i64,
+        csr_row_ptr: *const ::std::os::raw::c_void,
+        csr_col_ind: *const ::std::os::raw::c_void,
+        csr_val: *const ::std::os::raw::c_void,
+        row_ptr_type: rocsparse_indextype,
+        col_ind_type: rocsparse_indextype,
+        idx_base: rocsparse_index_base,
+        data_type: rocsparse_datatype,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup aux_module\n  \\brief Create a sparse CSC matrix descriptor\n  \\details\n  \\p rocsparse_create_csc_descr creates a sparse CSC matrix descriptor. It should be\n  destroyed at the end using \\p rocsparse_destroy_spmat_descr.\n\n  @param[out]\n  descr       the pointer to the sparse CSC matrix descriptor.\n  @param[in]\n  rows         number of rows in the CSC matrix.\n  @param[in]\n  cols         number of columns in the CSC matrix\n  @param[in]\n  nnz          number of non-zeros in the CSC matrix.\n  @param[in]\n  csc_col_ptr  column offsets of the CSC matrix (must be array of length \\p cols+1 ).\n  @param[in]\n  csc_row_ind  row indices of the CSC matrix (must be array of length \\p nnz ).\n  @param[in]\n  csc_val      values of the CSC matrix (must be array of length \\p nnz ).\n  @param[in]\n  col_ptr_type \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[in]\n  row_ind_type \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[in]\n  idx_base     \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  data_type    \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n               \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p csc_col_ptr or \\p csc_row_ind or \\p csc_val is invalid.\n  \\retval rocsparse_status_invalid_size if \\p rows or \\p cols or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_value if \\p col_ptr_type or \\p row_ind_type or \\p idx_base or \\p data_type is invalid.\n/\n/**@{"]
     pub fn rocsparse_create_csc_descr(
         descr: *mut rocsparse_spmat_descr,
         rows: i64,
@@ -1173,6 +1289,22 @@ extern "C" {
         data_type: rocsparse_datatype,
     ) -> rocsparse_status;
 }
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_create_const_csc_descr(
+        descr: *mut rocsparse_const_spmat_descr,
+        rows: i64,
+        cols: i64,
+        nnz: i64,
+        csc_col_ptr: *const ::std::os::raw::c_void,
+        csc_row_ind: *const ::std::os::raw::c_void,
+        csc_val: *const ::std::os::raw::c_void,
+        col_ptr_type: rocsparse_indextype,
+        row_ind_type: rocsparse_indextype,
+        idx_base: rocsparse_index_base,
+        data_type: rocsparse_datatype,
+    ) -> rocsparse_status;
+}
 extern "C" {
     #[must_use]
     #[doc = " \\ingroup aux_module\n  \\brief Create a sparse ELL matrix descriptor\n  \\details\n  \\p rocsparse_create_ell_descr creates a sparse ELL matrix descriptor. It should be\n  destroyed at the end using \\p rocsparse_destroy_spmat_descr.\n\n  @param[out]\n  descr       the pointer to the sparse ELL matrix descriptor.\n  @param[in]\n  rows        number of rows in the ELL matrix.\n  @param[in]\n  cols        number of columns in the ELL matrix\n  @param[in]\n  ell_col_ind column indices of the ELL matrix (must be array of length \\p rows*ell_width ).\n  @param[in]\n  ell_val     values of the ELL matrix (must be array of length \\p rows*ell_width ).\n  @param[in]\n  ell_width   width of the ELL matrix.\n  @param[in]\n  idx_type    \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  data_type   \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n              \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p ell_col_ind or \\p ell_val is invalid.\n  \\retval rocsparse_status_invalid_size if \\p rows or \\p cols or \\p ell_width is invalid.\n  \\retval rocsparse_status_invalid_value if \\p idx_type or \\p idx_base or \\p data_type is invalid."]
@@ -1190,7 +1322,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup aux_module\n  \\brief Create a sparse blocked ELL matrix descriptor\n  \\details\n  \\p rocsparse_create_bell_descr creates a sparse blocked ELL matrix descriptor. It should be\n  destroyed at the end using \\p rocsparse_destroy_spmat_descr.\n\n  @param[out]\n  descr         the pointer to the sparse blocked ELL matrix descriptor.\n  @param[in]\n  rows          number of rows in the blocked ELL matrix.\n  @param[in]\n  cols          number of columns in the blocked ELL matrix\n  @param[in]\n  ell_block_dir \\ref rocsparse_direction_row or \\ref rocsparse_direction_column.\n  @param[in]\n  ell_block_dim block dimension of the sparse blocked ELL matrix.\n  @param[in]\n  ell_cols      column indices of the blocked ELL matrix (must be array of length \\p rows*ell_width ).\n  @param[in]\n  ell_col_ind   column indices of the blocked ELL matrix (must be array of length \\p rows*ell_width ).\n  @param[in]\n  ell_val       values of the blocked ELL matrix (must be array of length \\p rows*ell_width ).\n  @param[in]\n  idx_type      \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[in]\n  idx_base      \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  data_type     \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n                \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p ell_cols or \\p ell_col_ind or \\p ell_val is invalid.\n  \\retval rocsparse_status_invalid_size if \\p rows or \\p cols is invalid.\n  \\retval rocsparse_status_invalid_value if \\p idx_type or \\p idx_base or \\p data_type is invalid."]
+    #[doc = " \\ingroup aux_module\n  \\brief Create a sparse blocked ELL matrix descriptor\n  \\details\n  \\p rocsparse_create_bell_descr creates a sparse blocked ELL matrix descriptor. It should be\n  destroyed at the end using \\p rocsparse_destroy_spmat_descr.\n\n  @param[out]\n  descr         the pointer to the sparse blocked ELL matrix descriptor.\n  @param[in]\n  rows          number of rows in the blocked ELL matrix.\n  @param[in]\n  cols          number of columns in the blocked ELL matrix\n  @param[in]\n  ell_block_dir \\ref rocsparse_direction_row or \\ref rocsparse_direction_column.\n  @param[in]\n  ell_block_dim block dimension of the sparse blocked ELL matrix.\n  @param[in]\n  ell_cols      column indices of the blocked ELL matrix (must be array of length \\p rows*ell_width ).\n  @param[in]\n  ell_col_ind   column indices of the blocked ELL matrix (must be array of length \\p rows*ell_width ).\n  @param[in]\n  ell_val       values of the blocked ELL matrix (must be array of length \\p rows*ell_width ).\n  @param[in]\n  idx_type      \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[in]\n  idx_base      \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  data_type     \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n                \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p ell_cols or \\p ell_col_ind or \\p ell_val is invalid.\n  \\retval rocsparse_status_invalid_size if \\p rows or \\p cols is invalid.\n  \\retval rocsparse_status_invalid_value if \\p idx_type or \\p idx_base or \\p data_type is invalid.\n/\n/**@{"]
     pub fn rocsparse_create_bell_descr(
         descr: *mut rocsparse_spmat_descr,
         rows: i64,
@@ -1207,12 +1339,28 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup aux_module\n  \\brief Destroy a sparse matrix descriptor\n\n  \\details\n  \\p rocsparse_destroy_spmat_descr destroys a sparse matrix descriptor and releases all\n  resources used by the descriptor.\n\n  @param[in]\n  descr   the matrix descriptor.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer \\p descr is invalid."]
-    pub fn rocsparse_destroy_spmat_descr(descr: rocsparse_spmat_descr) -> rocsparse_status;
+    pub fn rocsparse_create_const_bell_descr(
+        descr: *mut rocsparse_const_spmat_descr,
+        rows: i64,
+        cols: i64,
+        ell_block_dir: rocsparse_direction,
+        ell_block_dim: i64,
+        ell_cols: i64,
+        ell_col_ind: *const ::std::os::raw::c_void,
+        ell_val: *const ::std::os::raw::c_void,
+        idx_type: rocsparse_indextype,
+        idx_base: rocsparse_index_base,
+        data_type: rocsparse_datatype,
+    ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup aux_module\n  \\brief Get the fields of the sparse COO matrix descriptor\n  \\details\n  \\p rocsparse_coo_get gets the fields of the sparse COO matrix descriptor\n\n  @param[in]\n  descr       the pointer to the sparse COO matrix descriptor.\n  @param[out]\n  rows        number of rows in the sparse COO matrix.\n  @param[out]\n  cols        number of columns in the sparse COO matrix.\n  @param[out]\n  nnz         number of non-zeros in sparse COO matrix.\n  @param[out]\n  coo_row_ind row indices of the COO matrix (must be array of length \\p nnz ).\n  @param[out]\n  coo_col_ind column indices of the COO matrix (must be array of length \\p nnz ).\n  @param[out]\n  coo_val     values of the COO matrix (must be array of length \\p nnz ).\n  @param[out]\n  idx_type    \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[out]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[out]\n  data_type   \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n              \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p coo_row_ind or \\p coo_col_ind or \\p coo_val is invalid.\n  \\retval rocsparse_status_invalid_size if \\p rows or \\p cols or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_value if \\p idx_type or \\p idx_base or \\p data_type is invalid."]
+    #[doc = " \\ingroup aux_module\n  \\brief Destroy a sparse matrix descriptor\n\n  \\details\n  \\p rocsparse_destroy_spmat_descr destroys a sparse matrix descriptor and releases all\n  resources used by the descriptor.\n\n  @param[in]\n  descr   the matrix descriptor.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer \\p descr is invalid."]
+    pub fn rocsparse_destroy_spmat_descr(descr: rocsparse_const_spmat_descr) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup aux_module\n  \\brief Get the fields of the sparse COO matrix descriptor\n  \\details\n  \\p rocsparse_coo_get gets the fields of the sparse COO matrix descriptor\n\n  @param[in]\n  descr       the pointer to the sparse COO matrix descriptor.\n  @param[out]\n  rows        number of rows in the sparse COO matrix.\n  @param[out]\n  cols        number of columns in the sparse COO matrix.\n  @param[out]\n  nnz         number of non-zeros in sparse COO matrix.\n  @param[out]\n  coo_row_ind row indices of the COO matrix (must be array of length \\p nnz ).\n  @param[out]\n  coo_col_ind column indices of the COO matrix (must be array of length \\p nnz ).\n  @param[out]\n  coo_val     values of the COO matrix (must be array of length \\p nnz ).\n  @param[out]\n  idx_type    \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[out]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[out]\n  data_type   \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n              \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p coo_row_ind or \\p coo_col_ind or \\p coo_val is invalid.\n  \\retval rocsparse_status_invalid_size if \\p rows or \\p cols or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_value if \\p idx_type or \\p idx_base or \\p data_type is invalid.\n/\n/**@{"]
     pub fn rocsparse_coo_get(
         descr: rocsparse_spmat_descr,
         rows: *mut i64,
@@ -1228,7 +1376,22 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup aux_module\n  \\brief Get the fields of the sparse COO AoS matrix descriptor\n  \\details\n  \\p rocsparse_coo_aos_get gets the fields of the sparse COO AoS matrix descriptor\n\n  @param[in]\n  descr       the pointer to the sparse COO AoS matrix descriptor.\n  @param[out]\n  rows        number of rows in the sparse COO AoS matrix.\n  @param[out]\n  cols        number of columns in the sparse COO AoS matrix.\n  @param[out]\n  nnz         number of non-zeros in sparse COO AoS matrix.\n  @param[out]\n  coo_ind     <row, columns> indices of the COO AoS matrix (must be array of length \\p nnz ).\n  @param[out]\n  coo_val     values of the COO AoS matrix (must be array of length \\p nnz ).\n  @param[out]\n  idx_type    \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[out]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[out]\n  data_type   \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n              \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p coo_ind or \\p coo_val is invalid.\n  \\retval rocsparse_status_invalid_size if \\p rows or \\p cols or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_value if \\p idx_type or \\p idx_base or \\p data_type is invalid."]
+    pub fn rocsparse_const_coo_get(
+        descr: rocsparse_const_spmat_descr,
+        rows: *mut i64,
+        cols: *mut i64,
+        nnz: *mut i64,
+        coo_row_ind: *mut *const ::std::os::raw::c_void,
+        coo_col_ind: *mut *const ::std::os::raw::c_void,
+        coo_val: *mut *const ::std::os::raw::c_void,
+        idx_type: *mut rocsparse_indextype,
+        idx_base: *mut rocsparse_index_base,
+        data_type: *mut rocsparse_datatype,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup aux_module\n  \\brief Get the fields of the sparse COO AoS matrix descriptor\n  \\details\n  \\p rocsparse_coo_aos_get gets the fields of the sparse COO AoS matrix descriptor\n\n  @param[in]\n  descr       the pointer to the sparse COO AoS matrix descriptor.\n  @param[out]\n  rows        number of rows in the sparse COO AoS matrix.\n  @param[out]\n  cols        number of columns in the sparse COO AoS matrix.\n  @param[out]\n  nnz         number of non-zeros in sparse COO AoS matrix.\n  @param[out]\n  coo_ind     <row, columns> indices of the COO AoS matrix (must be array of length \\p nnz ).\n  @param[out]\n  coo_val     values of the COO AoS matrix (must be array of length \\p nnz ).\n  @param[out]\n  idx_type    \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[out]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[out]\n  data_type   \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n              \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p coo_ind or \\p coo_val is invalid.\n  \\retval rocsparse_status_invalid_size if \\p rows or \\p cols or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_value if \\p idx_type or \\p idx_base or \\p data_type is invalid.\n/\n/**@{"]
     pub fn rocsparse_coo_aos_get(
         descr: rocsparse_spmat_descr,
         rows: *mut i64,
@@ -1243,7 +1406,21 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup aux_module\n  \\brief Get the fields of the sparse CSR matrix descriptor\n  \\details\n  \\p rocsparse_csr_get gets the fields of the sparse CSR matrix descriptor\n\n  @param[in]\n  descr        the pointer to the sparse CSR matrix descriptor.\n  @param[out]\n  rows         number of rows in the CSR matrix.\n  @param[out]\n  cols         number of columns in the CSR matrix\n  @param[out]\n  nnz          number of non-zeros in the CSR matrix.\n  @param[out]\n  csr_row_ptr  row offsets of the CSR matrix (must be array of length \\p rows+1 ).\n  @param[out]\n  csr_col_ind  column indices of the CSR matrix (must be array of length \\p nnz ).\n  @param[out]\n  csr_val      values of the CSR matrix (must be array of length \\p nnz ).\n  @param[out]\n  row_ptr_type \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[out]\n  col_ind_type \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[out]\n  idx_base     \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[out]\n  data_type    \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n               \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p csr_row_ptr or \\p csr_col_ind or \\p csr_val is invalid.\n  \\retval rocsparse_status_invalid_size if \\p rows or \\p cols or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_value if \\p row_ptr_type or \\p col_ind_type or \\p idx_base or \\p data_type is invalid."]
+    pub fn rocsparse_const_coo_aos_get(
+        descr: rocsparse_const_spmat_descr,
+        rows: *mut i64,
+        cols: *mut i64,
+        nnz: *mut i64,
+        coo_ind: *mut *const ::std::os::raw::c_void,
+        coo_val: *mut *const ::std::os::raw::c_void,
+        idx_type: *mut rocsparse_indextype,
+        idx_base: *mut rocsparse_index_base,
+        data_type: *mut rocsparse_datatype,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = "@}*/\n/*! \\ingroup aux_module\n  \\brief Get the fields of the sparse CSR matrix descriptor\n  \\details\n  \\p rocsparse_csr_get gets the fields of the sparse CSR matrix descriptor\n\n  @param[in]\n  descr        the pointer to the sparse CSR matrix descriptor.\n  @param[out]\n  rows         number of rows in the CSR matrix.\n  @param[out]\n  cols         number of columns in the CSR matrix\n  @param[out]\n  nnz          number of non-zeros in the CSR matrix.\n  @param[out]\n  csr_row_ptr  row offsets of the CSR matrix (must be array of length \\p rows+1 ).\n  @param[out]\n  csr_col_ind  column indices of the CSR matrix (must be array of length \\p nnz ).\n  @param[out]\n  csr_val      values of the CSR matrix (must be array of length \\p nnz ).\n  @param[out]\n  row_ptr_type \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[out]\n  col_ind_type \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[out]\n  idx_base     \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[out]\n  data_type    \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n               \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p csr_row_ptr or \\p csr_col_ind or \\p csr_val is invalid.\n  \\retval rocsparse_status_invalid_size if \\p rows or \\p cols or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_value if \\p row_ptr_type or \\p col_ind_type or \\p idx_base or \\p data_type is invalid.\n/\n/**@{"]
     pub fn rocsparse_csr_get(
         descr: rocsparse_spmat_descr,
         rows: *mut i64,
@@ -1260,7 +1437,56 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup aux_module\n  \\brief Get the fields of the sparse ELL matrix descriptor\n  \\details\n  \\p rocsparse_ell_get gets the fields of the sparse ELL matrix descriptor\n\n  @param[in]\n  descr       the pointer to the sparse ELL matrix descriptor.\n  @param[out]\n  rows        number of rows in the ELL matrix.\n  @param[out]\n  cols        number of columns in the ELL matrix\n  @param[out]\n  ell_col_ind column indices of the ELL matrix (must be array of length \\p rows*ell_width ).\n  @param[out]\n  ell_val     values of the ELL matrix (must be array of length \\p rows*ell_width ).\n  @param[out]\n  ell_width   width of the ELL matrix.\n  @param[out]\n  idx_type    \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[out]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[out]\n  data_type   \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n              \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p ell_col_ind or \\p ell_val is invalid.\n  \\retval rocsparse_status_invalid_size if \\p rows or \\p cols or \\p ell_width is invalid.\n  \\retval rocsparse_status_invalid_value if \\p idx_type or \\p idx_base or \\p data_type is invalid."]
+    pub fn rocsparse_const_csr_get(
+        descr: rocsparse_const_spmat_descr,
+        rows: *mut i64,
+        cols: *mut i64,
+        nnz: *mut i64,
+        csr_row_ptr: *mut *const ::std::os::raw::c_void,
+        csr_col_ind: *mut *const ::std::os::raw::c_void,
+        csr_val: *mut *const ::std::os::raw::c_void,
+        row_ptr_type: *mut rocsparse_indextype,
+        col_ind_type: *mut rocsparse_indextype,
+        idx_base: *mut rocsparse_index_base,
+        data_type: *mut rocsparse_datatype,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup aux_module\n  \\brief Get the fields of the sparse CSC matrix descriptor\n  \\details\n  \\p rocsparse_csc_get gets the fields of the sparse CSC matrix descriptor\n\n  @param[in]\n  descr        the pointer to the sparse CSC matrix descriptor.\n  @param[out]\n  rows         number of rows in the CSC matrix.\n  @param[out]\n  cols         number of columns in the CSC matrix\n  @param[out]\n  nnz          number of non-zeros in the CSC matrix.\n  @param[out]\n  csc_col_ptr  column offsets of the CSC matrix (must be array of length \\p cols+1 ).\n  @param[out]\n  csc_row_ind  row indices of the CSC matrix (must be array of length \\p nnz ).\n  @param[out]\n  csc_val      values of the CSC matrix (must be array of length \\p nnz ).\n  @param[out]\n  col_ptr_type \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[out]\n  row_ind_type \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[out]\n  idx_base     \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[out]\n  data_type    \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n               \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p csc_col_ptr or \\p csc_row_ind or \\p csr_val is invalid.\n  \\retval rocsparse_status_invalid_size if \\p rows or \\p cols or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_value if \\p col_ptr_type or \\p row_ind_type or \\p idx_base or \\p data_type is invalid.\n/\n/**@{"]
+    pub fn rocsparse_csc_get(
+        descr: rocsparse_spmat_descr,
+        rows: *mut i64,
+        cols: *mut i64,
+        nnz: *mut i64,
+        csc_col_ptr: *mut *mut ::std::os::raw::c_void,
+        csc_row_ind: *mut *mut ::std::os::raw::c_void,
+        csc_val: *mut *mut ::std::os::raw::c_void,
+        col_ptr_type: *mut rocsparse_indextype,
+        row_ind_type: *mut rocsparse_indextype,
+        idx_base: *mut rocsparse_index_base,
+        data_type: *mut rocsparse_datatype,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_const_csc_get(
+        descr: rocsparse_const_spmat_descr,
+        rows: *mut i64,
+        cols: *mut i64,
+        nnz: *mut i64,
+        csc_col_ptr: *mut *const ::std::os::raw::c_void,
+        csc_row_ind: *mut *const ::std::os::raw::c_void,
+        csc_val: *mut *const ::std::os::raw::c_void,
+        col_ptr_type: *mut rocsparse_indextype,
+        row_ind_type: *mut rocsparse_indextype,
+        idx_base: *mut rocsparse_index_base,
+        data_type: *mut rocsparse_datatype,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup aux_module\n  \\brief Get the fields of the sparse ELL matrix descriptor\n  \\details\n  \\p rocsparse_ell_get gets the fields of the sparse ELL matrix descriptor\n\n  @param[in]\n  descr       the pointer to the sparse ELL matrix descriptor.\n  @param[out]\n  rows        number of rows in the ELL matrix.\n  @param[out]\n  cols        number of columns in the ELL matrix\n  @param[out]\n  ell_col_ind column indices of the ELL matrix (must be array of length \\p rows*ell_width ).\n  @param[out]\n  ell_val     values of the ELL matrix (must be array of length \\p rows*ell_width ).\n  @param[out]\n  ell_width   width of the ELL matrix.\n  @param[out]\n  idx_type    \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[out]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[out]\n  data_type   \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n              \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p ell_col_ind or \\p ell_val is invalid.\n  \\retval rocsparse_status_invalid_size if \\p rows or \\p cols or \\p ell_width is invalid.\n  \\retval rocsparse_status_invalid_value if \\p idx_type or \\p idx_base or \\p data_type is invalid.\n/\n/**@{"]
     pub fn rocsparse_ell_get(
         descr: rocsparse_spmat_descr,
         rows: *mut i64,
@@ -1275,7 +1501,21 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup aux_module\n  \\brief Get the fields of the sparse blocked ELL matrix descriptor\n  \\details\n  \\p rocsparse_bell_get gets the fields of the sparse blocked ELL matrix descriptor\n\n  @param[in]\n  descr         the pointer to the sparse blocked ELL matrix descriptor.\n  @param[out]\n  rows          number of rows in the blocked ELL matrix.\n  @param[out]\n  cols          number of columns in the blocked ELL matrix\n  @param[out]\n  ell_block_dir \\ref rocsparse_direction_row or \\ref rocsparse_direction_column.\n  @param[out]\n  ell_block_dim block dimension of the sparse blocked ELL matrix.\n  @param[out]\n  ell_cols      column indices of the blocked ELL matrix (must be array of length \\p rows*ell_width ).\n  @param[out]\n  ell_col_ind   column indices of the blocked ELL matrix (must be array of length \\p rows*ell_width ).\n  @param[out]\n  ell_val       values of the blocked ELL matrix (must be array of length \\p rows*ell_width ).\n  @param[out]\n  idx_type      \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[out]\n  idx_base      \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[out]\n  data_type     \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n                \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p ell_cols or \\p ell_col_ind or \\p ell_val is invalid.\n  \\retval rocsparse_status_invalid_size if \\p rows or \\p cols or \\p ell_block_dim is invalid.\n  \\retval rocsparse_status_invalid_value if \\p ell_block_dir or \\p idx_type or \\p idx_base or \\p data_type is invalid."]
+    pub fn rocsparse_const_ell_get(
+        descr: rocsparse_const_spmat_descr,
+        rows: *mut i64,
+        cols: *mut i64,
+        ell_col_ind: *mut *const ::std::os::raw::c_void,
+        ell_val: *mut *const ::std::os::raw::c_void,
+        ell_width: *mut i64,
+        idx_type: *mut rocsparse_indextype,
+        idx_base: *mut rocsparse_index_base,
+        data_type: *mut rocsparse_datatype,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup aux_module\n  \\brief Get the fields of the sparse blocked ELL matrix descriptor\n  \\details\n  \\p rocsparse_bell_get gets the fields of the sparse blocked ELL matrix descriptor\n\n  @param[in]\n  descr         the pointer to the sparse blocked ELL matrix descriptor.\n  @param[out]\n  rows          number of rows in the blocked ELL matrix.\n  @param[out]\n  cols          number of columns in the blocked ELL matrix\n  @param[out]\n  ell_block_dir \\ref rocsparse_direction_row or \\ref rocsparse_direction_column.\n  @param[out]\n  ell_block_dim block dimension of the sparse blocked ELL matrix.\n  @param[out]\n  ell_cols      column indices of the blocked ELL matrix (must be array of length \\p rows*ell_width ).\n  @param[out]\n  ell_col_ind   column indices of the blocked ELL matrix (must be array of length \\p rows*ell_width ).\n  @param[out]\n  ell_val       values of the blocked ELL matrix (must be array of length \\p rows*ell_width ).\n  @param[out]\n  idx_type      \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[out]\n  idx_base      \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[out]\n  data_type     \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n                \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p ell_cols or \\p ell_col_ind or \\p ell_val is invalid.\n  \\retval rocsparse_status_invalid_size if \\p rows or \\p cols or \\p ell_block_dim is invalid.\n  \\retval rocsparse_status_invalid_value if \\p ell_block_dir or \\p idx_type or \\p idx_base or \\p data_type is invalid.\n/\n/**@{"]
     pub fn rocsparse_bell_get(
         descr: rocsparse_spmat_descr,
         rows: *mut i64,
@@ -1290,6 +1530,59 @@ extern "C" {
         data_type: *mut rocsparse_datatype,
     ) -> rocsparse_status;
 }
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_const_bell_get(
+        descr: rocsparse_const_spmat_descr,
+        rows: *mut i64,
+        cols: *mut i64,
+        ell_block_dir: *mut rocsparse_direction,
+        ell_block_dim: *mut i64,
+        ell_cols: *mut i64,
+        ell_col_ind: *mut *const ::std::os::raw::c_void,
+        ell_val: *mut *const ::std::os::raw::c_void,
+        idx_type: *mut rocsparse_indextype,
+        idx_base: *mut rocsparse_index_base,
+        data_type: *mut rocsparse_datatype,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup aux_module\n  \\brief Get the fields of the sparse BSR matrix descriptor\n  \\details\n  \\p rocsparse_bsr_get gets the fields of the sparse BSR matrix descriptor\n\n  @param[in]\n  descr        the pointer to the sparse BSR matrix descriptor.\n  @param[out]\n  brows         number of rows in the BSR matrix.\n  @param[out]\n  bcols         number of columns in the BSR matrix\n  @param[out]\n  bnnz          number of non-zeros in the BSR matrix.\n  @param[out]\n  bdir          storage layout of the dense block matrices.\n  @param[out]\n  bdim          block dimension.\n  @param[out]\n  bsr_row_ptr  row offsets of the BSR matrix (must be array of length \\p brows+1 ).\n  @param[out]\n  bsr_col_ind  column indices of the BSR matrix (must be array of length \\p bnnz ).\n  @param[out]\n  bsr_val      values of the BSR matrix (must be array of length \\p bnnz * \\p bdim * \\p bdim ).\n  @param[out]\n  row_ptr_type \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[out]\n  col_ind_type \\ref rocsparse_indextype_i32 or \\ref rocsparse_indextype_i64.\n  @param[out]\n  idx_base     \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[out]\n  data_type    \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n               \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p csr_row_ptr or \\p csr_col_ind or \\p csr_val is invalid.\n  \\retval rocsparse_status_invalid_size if \\p rows or \\p cols or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_value if \\p row_ptr_type or \\p col_ind_type or \\p idx_base or \\p data_type is invalid.\n/\n/**@{"]
+    pub fn rocsparse_bsr_get(
+        descr: rocsparse_spmat_descr,
+        brows: *mut i64,
+        bcols: *mut i64,
+        bnnz: *mut i64,
+        bdir: *mut rocsparse_direction,
+        bdim: *mut i64,
+        bsr_row_ptr: *mut *mut ::std::os::raw::c_void,
+        bsr_col_ind: *mut *mut ::std::os::raw::c_void,
+        bsr_val: *mut *mut ::std::os::raw::c_void,
+        row_ptr_type: *mut rocsparse_indextype,
+        col_ind_type: *mut rocsparse_indextype,
+        idx_base: *mut rocsparse_index_base,
+        data_type: *mut rocsparse_datatype,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_const_bsr_get(
+        descr: rocsparse_const_spmat_descr,
+        brows: *mut i64,
+        bcols: *mut i64,
+        bnnz: *mut i64,
+        bdir: *mut rocsparse_direction,
+        bdim: *mut i64,
+        bsr_row_ptr: *mut *const ::std::os::raw::c_void,
+        bsr_col_ind: *mut *const ::std::os::raw::c_void,
+        bsr_val: *mut *const ::std::os::raw::c_void,
+        row_ptr_type: *mut rocsparse_indextype,
+        col_ind_type: *mut rocsparse_indextype,
+        idx_base: *mut rocsparse_index_base,
+        data_type: *mut rocsparse_datatype,
+    ) -> rocsparse_status;
+}
 extern "C" {
     #[must_use]
     #[doc = " \\ingroup aux_module\n  \\brief Set the row indices, column indices and values array in the sparse COO matrix descriptor\n\n  @param[inout]\n  descr   the pointer to the sparse vector descriptor.\n  @param[in]\n  coo_row_ind row indices of the COO matrix (must be array of length \\p nnz ).\n  @param[in]\n  coo_col_ind column indices of the COO matrix (must be array of length \\p nnz ).\n  @param[in]\n  coo_val     values of the COO matrix (must be array of length \\p nnz ).\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p coo_row_ind or \\p coo_col_ind or \\p coo_val is invalid."]
@@ -1352,7 +1645,7 @@ extern "C" {
     #[must_use]
     #[doc = " \\ingroup aux_module\n  \\brief Get the number of rows, columns and non-zeros from the sparse matrix descriptor\n\n  @param[in]\n  descr       the pointer to the sparse matrix descriptor.\n  @param[out]\n  rows        number of rows in the sparse matrix.\n  @param[out]\n  cols        number of columns in the sparse matrix.\n  @param[out]\n  nnz         number of non-zeros in sparse matrix.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr is invalid.\n  \\retval rocsparse_status_invalid_size if \\p rows or \\p cols or \\p nnz is invalid."]
     pub fn rocsparse_spmat_get_size(
-        descr: rocsparse_spmat_descr,
+        descr: rocsparse_const_spmat_descr,
         rows: *mut i64,
         cols: *mut i64,
         nnz: *mut i64,
@@ -1362,7 +1655,7 @@ extern "C" {
     #[must_use]
     #[doc = " \\ingroup aux_module\n  \\brief Get the sparse matrix format from the sparse matrix descriptor\n\n  @param[in]\n  descr       the pointer to the sparse matrix descriptor.\n  @param[out]\n  format      \\ref rocsparse_format_coo or \\ref rocsparse_format_coo_aos or\n              \\ref rocsparse_format_csr or \\ref rocsparse_format_csc or\n              \\ref rocsparse_format_ell\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr is invalid.\n  \\retval rocsparse_status_invalid_value if \\p format is invalid."]
     pub fn rocsparse_spmat_get_format(
-        descr: rocsparse_spmat_descr,
+        descr: rocsparse_const_spmat_descr,
         format: *mut rocsparse_format,
     ) -> rocsparse_status;
 }
@@ -1370,18 +1663,25 @@ extern "C" {
     #[must_use]
     #[doc = " \\ingroup aux_module\n  \\brief Get the sparse matrix index base from the sparse matrix descriptor\n\n  @param[in]\n  descr       the pointer to the sparse matrix descriptor.\n  @param[out]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr is invalid.\n  \\retval rocsparse_status_invalid_value if \\p idx_base is invalid."]
     pub fn rocsparse_spmat_get_index_base(
-        descr: rocsparse_spmat_descr,
+        descr: rocsparse_const_spmat_descr,
         idx_base: *mut rocsparse_index_base,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup aux_module\n  \\brief Get the values array from the sparse matrix descriptor\n\n  @param[in]\n  descr     the pointer to the sparse matrix descriptor.\n  @param[out]\n  values    values array of the sparse matrix.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p values is invalid."]
+    #[doc = " \\ingroup aux_module\n  \\brief Get the values array from the sparse matrix descriptor\n\n  @param[in]\n  descr     the pointer to the sparse matrix descriptor.\n  @param[out]\n  values    values array of the sparse matrix.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p values is invalid.\n/\n/**@{"]
     pub fn rocsparse_spmat_get_values(
         descr: rocsparse_spmat_descr,
         values: *mut *mut ::std::os::raw::c_void,
     ) -> rocsparse_status;
 }
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_const_spmat_get_values(
+        descr: rocsparse_const_spmat_descr,
+        values: *mut *const ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
 extern "C" {
     #[must_use]
     #[doc = " \\ingroup aux_module\n  \\brief Set the values array in the sparse matrix descriptor\n\n  @param[inout]\n  descr     the pointer to the sparse matrix descriptor.\n  @param[in]\n  values    values array of the sparse matrix.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p values is invalid."]
@@ -1394,7 +1694,7 @@ extern "C" {
     #[must_use]
     #[doc = " \\ingroup aux_module\n  \\brief Get the strided batch count from the sparse matrix descriptor\n\n  @param[in]\n  descr       the pointer to the sparse matrix descriptor.\n  @param[out]\n  batch_count batch_count of the sparse matrix.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr is invalid.\n  \\retval rocsparse_status_invalid_size if \\p batch_count is invalid."]
     pub fn rocsparse_spmat_get_strided_batch(
-        descr: rocsparse_spmat_descr,
+        descr: rocsparse_const_spmat_descr,
         batch_count: *mut ::std::os::raw::c_int,
     ) -> rocsparse_status;
 }
@@ -1439,7 +1739,7 @@ extern "C" {
     #[must_use]
     #[doc = " \\ingroup aux_module\n  \\brief Get the requested attribute data from the sparse matrix descriptor\n\n  @param[in]\n  descr       the pointer to the sparse matrix descriptor.\n  @param[in]\n  attribute \\ref rocsparse_spmat_fill_mode or \\ref rocsparse_spmat_diag_type or\n            \\ref rocsparse_spmat_matrix_type or \\ref rocsparse_spmat_storage_mode\n  @param[out]\n  data      attribute data\n  @param[in]\n  data_size attribute data size.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p data is invalid.\n  \\retval rocsparse_status_invalid_value if \\p attribute is invalid.\n  \\retval rocsparse_status_invalid_size if \\p data_size is invalid."]
     pub fn rocsparse_spmat_get_attribute(
-        descr: rocsparse_spmat_descr,
+        descr: rocsparse_const_spmat_descr,
         attribute: rocsparse_spmat_attribute,
         data: *mut ::std::os::raw::c_void,
         data_size: usize,
@@ -1457,7 +1757,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup aux_module\n  \\brief Create a dense vector descriptor\n  \\details\n  \\p rocsparse_create_dnvec_descr creates a dense vector descriptor. It should be\n  destroyed at the end using rocsparse_destroy_dnvec_descr().\n\n  @param[out]\n  descr   the pointer to the dense vector descriptor.\n  @param[in]\n  size   size of the dense vector.\n  @param[in]\n  values   non-zero values in the dense vector (must be array of length \\p size ).\n  @param[in]\n  data_type   \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n              \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p values is invalid.\n  \\retval rocsparse_status_invalid_size if \\p size is invalid.\n  \\retval rocsparse_status_invalid_value if \\p data_type is invalid."]
+    #[doc = " \\ingroup aux_module\n  \\brief Create a dense vector descriptor\n  \\details\n  \\p rocsparse_create_dnvec_descr creates a dense vector descriptor. It should be\n  destroyed at the end using rocsparse_destroy_dnvec_descr().\n\n  @param[out]\n  descr   the pointer to the dense vector descriptor.\n  @param[in]\n  size   size of the dense vector.\n  @param[in]\n  values   non-zero values in the dense vector (must be array of length \\p size ).\n  @param[in]\n  data_type   \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n              \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p values is invalid.\n  \\retval rocsparse_status_invalid_size if \\p size is invalid.\n  \\retval rocsparse_status_invalid_value if \\p data_type is invalid.\n/\n/**@{"]
     pub fn rocsparse_create_dnvec_descr(
         descr: *mut rocsparse_dnvec_descr,
         size: i64,
@@ -1467,12 +1767,21 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup aux_module\n  \\brief Destroy a dense vector descriptor\n\n  \\details\n  \\p rocsparse_destroy_dnvec_descr destroys a dense vector descriptor and releases all\n  resources used by the descriptor.\n\n  @param[in]\n  descr   the matrix descriptor.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer \\p descr is invalid."]
-    pub fn rocsparse_destroy_dnvec_descr(descr: rocsparse_dnvec_descr) -> rocsparse_status;
+    pub fn rocsparse_create_const_dnvec_descr(
+        descr: *mut rocsparse_const_dnvec_descr,
+        size: i64,
+        values: *const ::std::os::raw::c_void,
+        data_type: rocsparse_datatype,
+    ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup aux_module\n  \\brief Get the fields of the dense vector descriptor\n  \\details\n  \\p rocsparse_dnvec_get gets the fields of the dense vector descriptor\n\n  @param[in]\n  descr   the pointer to the dense vector descriptor.\n  @param[out]\n  size   size of the dense vector.\n  @param[out]\n  values   non-zero values in the dense vector (must be array of length \\p size ).\n  @param[out]\n  data_type   \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n              \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p values is invalid.\n  \\retval rocsparse_status_invalid_size if \\p size is invalid.\n  \\retval rocsparse_status_invalid_value if \\p data_type is invalid."]
+    #[doc = " \\ingroup aux_module\n  \\brief Destroy a dense vector descriptor\n\n  \\details\n  \\p rocsparse_destroy_dnvec_descr destroys a dense vector descriptor and releases all\n  resources used by the descriptor.\n\n  @param[in]\n  descr   the matrix descriptor.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer \\p descr is invalid."]
+    pub fn rocsparse_destroy_dnvec_descr(descr: rocsparse_const_dnvec_descr) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup aux_module\n  \\brief Get the fields of the dense vector descriptor\n  \\details\n  \\p rocsparse_dnvec_get gets the fields of the dense vector descriptor\n\n  @param[in]\n  descr   the pointer to the dense vector descriptor.\n  @param[out]\n  size   size of the dense vector.\n  @param[out]\n  values   non-zero values in the dense vector (must be array of length \\p size ).\n  @param[out]\n  data_type   \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n              \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p values is invalid.\n  \\retval rocsparse_status_invalid_size if \\p size is invalid.\n  \\retval rocsparse_status_invalid_value if \\p data_type is invalid.\n/\n/**@{"]
     pub fn rocsparse_dnvec_get(
         descr: rocsparse_dnvec_descr,
         size: *mut i64,
@@ -1482,12 +1791,28 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup aux_module\n  \\brief Get the values array from a dense vector descriptor\n\n  @param[in]\n  descr   the matrix descriptor.\n  @param[out]\n  values   non-zero values in the dense vector (must be array of length \\p size ).\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer \\p descr or \\p values is invalid."]
+    pub fn rocsparse_const_dnvec_get(
+        descr: rocsparse_const_dnvec_descr,
+        size: *mut i64,
+        values: *mut *const ::std::os::raw::c_void,
+        data_type: *mut rocsparse_datatype,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup aux_module\n  \\brief Get the values array from a dense vector descriptor\n\n  @param[in]\n  descr   the matrix descriptor.\n  @param[out]\n  values   non-zero values in the dense vector (must be array of length \\p size ).\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer \\p descr or \\p values is invalid.\n/\n/**@{"]
     pub fn rocsparse_dnvec_get_values(
         descr: rocsparse_dnvec_descr,
         values: *mut *mut ::std::os::raw::c_void,
     ) -> rocsparse_status;
 }
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_const_dnvec_get_values(
+        descr: rocsparse_const_dnvec_descr,
+        values: *mut *const ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
 extern "C" {
     #[must_use]
     #[doc = " \\ingroup aux_module\n  \\brief Set the values array in a dense vector descriptor\n\n  @param[inout]\n  descr   the matrix descriptor.\n  @param[in]\n  values   non-zero values in the dense vector (must be array of length \\p size ).\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer \\p descr or \\p values is invalid."]
@@ -1498,7 +1823,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup aux_module\n  \\brief Create a dense matrix descriptor\n  \\details\n  \\p rocsparse_create_dnmat_descr creates a dense matrix descriptor. It should be\n  destroyed at the end using rocsparse_destroy_dnmat_descr().\n\n  @param[out]\n  descr     the pointer to the dense matrix descriptor.\n  @param[in]\n  rows      number of rows in the dense matrix.\n  @param[in]\n  cols      number of columns in the dense matrix.\n  @param[in]\n  ld        leading dimension of the dense matrix.\n  @param[in]\n  values    non-zero values in the dense vector (must be array of length\n            \\p ld*rows if \\p order=rocsparse_order_column or \\p ld*cols if \\p order=rocsparse_order_row ).\n  @param[in]\n  data_type \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n            \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n  @param[in]\n  order     \\ref rocsparse_order_row or \\ref rocsparse_order_column.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p values is invalid.\n  \\retval rocsparse_status_invalid_size if \\p rows or \\p cols or \\p ld is invalid.\n  \\retval rocsparse_status_invalid_value if \\p data_type or \\p order is invalid."]
+    #[doc = " \\ingroup aux_module\n  \\brief Create a dense matrix descriptor\n  \\details\n  \\p rocsparse_create_dnmat_descr creates a dense matrix descriptor. It should be\n  destroyed at the end using rocsparse_destroy_dnmat_descr().\n\n  @param[out]\n  descr     the pointer to the dense matrix descriptor.\n  @param[in]\n  rows      number of rows in the dense matrix.\n  @param[in]\n  cols      number of columns in the dense matrix.\n  @param[in]\n  ld        leading dimension of the dense matrix.\n  @param[in]\n  values    non-zero values in the dense vector (must be array of length\n            \\p ld*rows if \\p order=rocsparse_order_column or \\p ld*cols if \\p order=rocsparse_order_row ).\n  @param[in]\n  data_type \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n            \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n  @param[in]\n  order     \\ref rocsparse_order_row or \\ref rocsparse_order_column.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p values is invalid.\n  \\retval rocsparse_status_invalid_size if \\p rows or \\p cols or \\p ld is invalid.\n  \\retval rocsparse_status_invalid_value if \\p data_type or \\p order is invalid.\n/\n/**@{"]
     pub fn rocsparse_create_dnmat_descr(
         descr: *mut rocsparse_dnmat_descr,
         rows: i64,
@@ -1511,12 +1836,24 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup aux_module\n  \\brief Destroy a dense matrix descriptor\n\n  \\details\n  \\p rocsparse_destroy_dnmat_descr destroys a dense matrix descriptor and releases all\n  resources used by the descriptor.\n\n  @param[in]\n  descr   the matrix descriptor.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer \\p descr is invalid."]
-    pub fn rocsparse_destroy_dnmat_descr(descr: rocsparse_dnmat_descr) -> rocsparse_status;
+    pub fn rocsparse_create_const_dnmat_descr(
+        descr: *mut rocsparse_const_dnmat_descr,
+        rows: i64,
+        cols: i64,
+        ld: i64,
+        values: *const ::std::os::raw::c_void,
+        data_type: rocsparse_datatype,
+        order: rocsparse_order,
+    ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup aux_module\n  \\brief Get the fields of the dense matrix descriptor\n\n  @param[in]\n  descr   the pointer to the dense matrix descriptor.\n  @param[out]\n  rows   number of rows in the dense matrix.\n  @param[out]\n  cols   number of columns in the dense matrix.\n  @param[out]\n  ld        leading dimension of the dense matrix.\n  @param[out]\n  values    non-zero values in the dense matrix (must be array of length\n            \\p ld*rows if \\p order=rocsparse_order_column or \\p ld*cols if \\p order=rocsparse_order_row ).\n  @param[out]\n  data_type   \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n              \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n  @param[out]\n  order     \\ref rocsparse_order_row or \\ref rocsparse_order_column.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p values is invalid.\n  \\retval rocsparse_status_invalid_size if \\p rows or \\p cols or \\p ld is invalid.\n  \\retval rocsparse_status_invalid_value if \\p data_type or \\p order is invalid."]
+    #[doc = " \\ingroup aux_module\n  \\brief Destroy a dense matrix descriptor\n\n  \\details\n  \\p rocsparse_destroy_dnmat_descr destroys a dense matrix descriptor and releases all\n  resources used by the descriptor.\n\n  @param[in]\n  descr   the matrix descriptor.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer \\p descr is invalid."]
+    pub fn rocsparse_destroy_dnmat_descr(descr: rocsparse_const_dnmat_descr) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup aux_module\n  \\brief Get the fields of the dense matrix descriptor\n\n  @param[in]\n  descr   the pointer to the dense matrix descriptor.\n  @param[out]\n  rows   number of rows in the dense matrix.\n  @param[out]\n  cols   number of columns in the dense matrix.\n  @param[out]\n  ld        leading dimension of the dense matrix.\n  @param[out]\n  values    non-zero values in the dense matrix (must be array of length\n            \\p ld*rows if \\p order=rocsparse_order_column or \\p ld*cols if \\p order=rocsparse_order_row ).\n  @param[out]\n  data_type   \\ref rocsparse_datatype_f32_r, \\ref rocsparse_datatype_f64_r,\n              \\ref rocsparse_datatype_f32_c or \\ref rocsparse_datatype_f64_c.\n  @param[out]\n  order     \\ref rocsparse_order_row or \\ref rocsparse_order_column.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p values is invalid.\n  \\retval rocsparse_status_invalid_size if \\p rows or \\p cols or \\p ld is invalid.\n  \\retval rocsparse_status_invalid_value if \\p data_type or \\p order is invalid.\n/\n/**@{"]
     pub fn rocsparse_dnmat_get(
         descr: rocsparse_dnmat_descr,
         rows: *mut i64,
@@ -1529,12 +1866,31 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup aux_module\n  \\brief Get the values array from the dense matrix descriptor\n\n  @param[in]\n  descr   the pointer to the dense matrix descriptor.\n  @param[out]\n  values    non-zero values in the dense matrix (must be array of length\n            \\p ld*rows if \\p order=rocsparse_order_column or \\p ld*cols if \\p order=rocsparse_order_row ).\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p values is invalid."]
+    pub fn rocsparse_const_dnmat_get(
+        descr: rocsparse_const_dnmat_descr,
+        rows: *mut i64,
+        cols: *mut i64,
+        ld: *mut i64,
+        values: *mut *const ::std::os::raw::c_void,
+        data_type: *mut rocsparse_datatype,
+        order: *mut rocsparse_order,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup aux_module\n  \\brief Get the values array from the dense matrix descriptor\n\n  @param[in]\n  descr   the pointer to the dense matrix descriptor.\n  @param[out]\n  values    non-zero values in the dense matrix (must be array of length\n            \\p ld*rows if \\p order=rocsparse_order_column or \\p ld*cols if \\p order=rocsparse_order_row ).\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr or \\p values is invalid.\n/\n/**@{"]
     pub fn rocsparse_dnmat_get_values(
         descr: rocsparse_dnmat_descr,
         values: *mut *mut ::std::os::raw::c_void,
     ) -> rocsparse_status;
 }
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_const_dnmat_get_values(
+        descr: rocsparse_const_dnmat_descr,
+        values: *mut *const ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
 extern "C" {
     #[must_use]
     #[doc = " \\ingroup aux_module\n  \\brief Set the values array in a dense matrix descriptor\n\n  @param[inout]\n  descr   the matrix descriptor.\n  @param[in]\n  values    non-zero values in the dense matrix (must be array of length\n            \\p ld*rows if \\p order=rocsparse_order_column or \\p ld*cols if \\p order=rocsparse_order_row ).\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer \\p descr or \\p values is invalid."]
@@ -1547,7 +1903,7 @@ extern "C" {
     #[must_use]
     #[doc = " \\ingroup aux_module\n  \\brief Get the batch count and batch stride from the dense matrix descriptor\n\n  @param[in]\n  descr        the pointer to the dense matrix descriptor.\n  @param[out]\n  batch_count  the batch count in the dense matrix.\n  @param[out]\n  batch_stride the batch stride in the dense matrix.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_pointer if \\p descr is invalid.\n  \\retval rocsparse_status_invalid_size if \\p batch_count or \\p batch_stride is invalid."]
     pub fn rocsparse_dnmat_get_strided_batch(
-        descr: rocsparse_dnmat_descr,
+        descr: rocsparse_const_dnmat_descr,
         batch_count: *mut ::std::os::raw::c_int,
         batch_stride: *mut i64,
     ) -> rocsparse_status;
@@ -1562,3751 +1918,2347 @@ extern "C" {
     ) -> rocsparse_status;
 }
 extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_csr_buffer_size computes the required buffer size needed when calling \\p rocsparse_check_matrix_csr\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  n           number of columns of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_scheck_matrix_csr(), rocsparse_dcheck_matrix_csr(),\n              rocsparse_ccheck_matrix_csr() and rocsparse_zcheck_matrix_csr().\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_size \\p m \\p n or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p csr_val, \\p csr_row_ptr, \\p csr_col_ind or \\p buffer_size pointer\n          is invalid.\n/\n/**@{"]
-    pub fn rocsparse_scheck_matrix_csr_buffer_size(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        csr_val: *const f32,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
+    #[doc = " \\ingroup aux_module\n  \\brief Enable debug kernel launch.\n \\details If the debug kernel launch is enabled then hip errors are checked before and after every kernel launch.\n \\note This routine ignores the environment variable ROCSPARSE_DEBUG_KERNEL_LAUNCH."]
+    pub fn rocsparse_enable_debug_kernel_launch();
+}
+extern "C" {
+    #[doc = " \\ingroup aux_module\n  \\brief Disable debug kernel launch.\n  \\note This routine ignores the environment variable ROCSPARSE_DEBUG_KERNEL_LAUNCH."]
+    pub fn rocsparse_disable_debug_kernel_launch();
+}
+extern "C" {
+    #[doc = " \\ingroup aux_module\n \\return 1 if enabled, 0 otherwise."]
+    pub fn rocsparse_state_debug_kernel_launch() -> ::std::os::raw::c_int;
+}
+extern "C" {
+    #[doc = " \\ingroup aux_module\n  \\brief Enable debug arguments.\n \\details If the debug arguments is enabled then argument descriptors are internally available when an argument checking occurs. It provide information to the user depending of the setup of the verbosity\n \\ref rocsparse_enable_debug_arguments_verbose, \\ref rocsparse_disable_debug_arguments_verbose and \\ref rocsparse_state_debug_arguments_verbose.\n \\note This routine ignores the environment variable ROCSPARSE_DEBUG_ARGUMENTS.\n \\note This routine enables debug arguments verbose with \\ref rocsparse_enable_debug_arguments_verbose."]
+    pub fn rocsparse_enable_debug_arguments();
+}
+extern "C" {
+    #[doc = " \\ingroup aux_module\n  \\brief Disable debug arguments.\n  \\note This routine ignores the environment variable ROCSPARSE_DEBUG_ARGUMENTS.\n  \\note This routines disables debug arguments verbose."]
+    pub fn rocsparse_disable_debug_arguments();
+}
+extern "C" {
+    #[doc = " \\ingroup aux_module\n \\return 1 if enabled, 0 otherwise."]
+    pub fn rocsparse_state_debug_arguments() -> ::std::os::raw::c_int;
+}
+extern "C" {
+    #[doc = " \\ingroup aux_module\n  \\brief Enable debug arguments verbose.\n  \\details The debug argument verbose displays information related to argument descriptors created from argument checking failures.\n  \\note This routine ignores the environment variable ROCSPARSE_DEBUG_ARGUMENTS_VERBOSE)"]
+    pub fn rocsparse_enable_debug_arguments_verbose();
+}
+extern "C" {
+    #[doc = " \\ingroup aux_module\n  \\brief Disable debug arguments verbose.\n  \\note This routine ignores the environment variable ROCSPARSE_DEBUG_ARGUMENTS_VERBOSE)"]
+    pub fn rocsparse_disable_debug_arguments_verbose();
+}
+extern "C" {
+    #[doc = " \\ingroup aux_module\n \\brief Get state of debug arguments verbose.\n \\return 1 if enabled, 0 otherwise."]
+    pub fn rocsparse_state_debug_arguments_verbose() -> ::std::os::raw::c_int;
+}
+extern "C" {
+    #[doc = " \\ingroup aux_module\n  \\brief Enable debug.\n \\details If the debug is enabled then code traces are generated when unsuccessful status returns occur. It provides information to the user depending of the set of the verbosity\n (\\ref rocsparse_enable_debug_verbose, \\ref rocsparse_disable_debug_verbose and \\ref rocsparse_state_debug_verbose).\n  \\note This routine ignores the environment variable ROCSPARSE_DEBUG.\n \\note \\ref rocsparse_enable_debug_verbose and \\ref rocsparse_enable_debug_arguments are called."]
+    pub fn rocsparse_enable_debug();
+}
+extern "C" {
+    #[doc = " \\ingroup aux_module\n  \\brief Disable debug.\n  \\note This routine also disables debug arguments with \\ref rocsparse_disable_debug_arguments.\n  \\note This routine ignores the environment variable ROCSPARSE_DEBUG."]
+    pub fn rocsparse_disable_debug();
+}
+extern "C" {
+    #[doc = " \\ingroup aux_module\n \\brief Get state of  debug.\n \\return 1 if enabled, 0 otherwise."]
+    pub fn rocsparse_state_debug() -> ::std::os::raw::c_int;
+}
+extern "C" {
+    #[doc = " \\ingroup aux_module\n  \\brief Enable debug verbose.\n  \\details The debug verbose displays a stack of code traces showing where the code is handling a unsuccessful status.\n  \\note This routine enables debug arguments verbose with \\ref rocsparse_enable_debug_arguments_verbose.\n  \\note This routine ignores the environment variable ROCSPARSE_DEBUG_VERBOSE."]
+    pub fn rocsparse_enable_debug_verbose();
+}
+extern "C" {
+    #[doc = " \\ingroup aux_module\n  \\brief Disable debug verbose.\n  \\note This routine disables debug arguments verbose with  \\ref rocsparse_disable_debug_arguments.\n  \\note This routine ignores the environment variable ROCSPARSE_DEBUG_VERBOSE."]
+    pub fn rocsparse_disable_debug_verbose();
+}
+extern "C" {
+    #[doc = " \\ingroup aux_module\n \\brief Get state of  debug verbose.\n \\return 1 if enabled, 0 otherwise."]
+    pub fn rocsparse_state_debug_verbose() -> ::std::os::raw::c_int;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_dcheck_matrix_csr_buffer_size(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        csr_val: *const f64,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccheck_matrix_csr_buffer_size(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        csr_val: *const rocsparse_float_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcheck_matrix_csr_buffer_size(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        csr_val: *const rocsparse_double_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_csr checks if the input CSR matrix is valid.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  n           number of columns of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  data_status modified to indicate the status of the data\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_size \\p m \\p n or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p csr_val, \\p csr_row_ptr, \\p csr_col_ind, \\p temp_buffer or \\p data_status pointer\n          is invalid.\n/\n/**@{"]
-    pub fn rocsparse_scheck_matrix_csr(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        csr_val: *const f32,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        data_status: *mut rocsparse_data_status,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcheck_matrix_csr(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        csr_val: *const f64,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        data_status: *mut rocsparse_data_status,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccheck_matrix_csr(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        csr_val: *const rocsparse_float_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        data_status: *mut rocsparse_data_status,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcheck_matrix_csr(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        csr_val: *const rocsparse_double_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        data_status: *mut rocsparse_data_status,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_coo_buffer_size computes the required buffer size needed when\n  calling \\p rocsparse_check_matrix_coo\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  n           number of columns of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  coo_val     array of \\p nnz elements of the sparse COO matrix.\n  @param[in]\n  coo_row_ind array of \\p nnz elements containing the row indices of the sparse\n              COO matrix.\n  @param[in]\n  coo_col_ind array of \\p nnz elements containing the column indices of the sparse\n              COO matrix.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_scheck_matrix_coo(), rocsparse_dcheck_matrix_coo(),\n              rocsparse_ccheck_matrix_coo() and rocsparse_zcheck_matrix_coo().\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_size \\p m \\p n or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p coo_val, \\p coo_row_ind, \\p coo_col_ind or \\p buffer_size pointer\n          is invalid.\n/\n/**@{"]
-    pub fn rocsparse_scheck_matrix_coo_buffer_size(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        coo_val: *const f32,
-        coo_row_ind: *const rocsparse_int,
-        coo_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcheck_matrix_coo_buffer_size(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        coo_val: *const f64,
-        coo_row_ind: *const rocsparse_int,
-        coo_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccheck_matrix_coo_buffer_size(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        coo_val: *const rocsparse_float_complex,
-        coo_row_ind: *const rocsparse_int,
-        coo_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcheck_matrix_coo_buffer_size(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        coo_val: *const rocsparse_double_complex,
-        coo_row_ind: *const rocsparse_int,
-        coo_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_coo checks if the input COO matrix is valid.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse COO matrix.\n  @param[in]\n  n           number of columns of the sparse COO matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse COO matrix.\n  @param[in]\n  coo_val     array of \\p nnz elements of the sparse COO matrix.\n  @param[in]\n  coo_row_ind array of \\p nnz elements containing the row indices of the sparse\n              COO matrix.\n  @param[in]\n  coo_col_ind array of \\p nnz elements containing the column indices of the sparse\n              COO matrix.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  data_status modified to indicate the status of the data\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_size \\p m \\p n or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p coo_val, \\p coo_row_ind, \\p coo_col_ind, \\p temp_buffer or \\p data_status  pointer\n          is invalid.\n/\n/**@{"]
-    pub fn rocsparse_scheck_matrix_coo(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        coo_val: *const f32,
-        coo_row_ind: *const rocsparse_int,
-        coo_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        data_status: *mut rocsparse_data_status,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcheck_matrix_coo(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        coo_val: *const f64,
-        coo_row_ind: *const rocsparse_int,
-        coo_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        data_status: *mut rocsparse_data_status,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccheck_matrix_coo(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        coo_val: *const rocsparse_float_complex,
-        coo_row_ind: *const rocsparse_int,
-        coo_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        data_status: *mut rocsparse_data_status,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcheck_matrix_coo(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        coo_val: *const rocsparse_double_complex,
-        coo_row_ind: *const rocsparse_int,
-        coo_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        data_status: *mut rocsparse_data_status,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_gebsr_buffer_size computes the required buffer size needed when\n  calling \\p rocsparse_check_matrix_gebsr\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir          matrix storage of GEBSR blocks.\n  @param[in]\n  mb           number of block rows of the sparse GEBSR matrix.\n  @param[in]\n  nb           number of block columns of the sparse GEBSR matrix.\n  @param[in]\n  nnzb         number of non-zero blocks of the sparse GEBSR matrix.\n  @param[in]\n  row_block_dim row block dimension of the sparse GEBSR matrix.\n  @param[in]\n  col_block_dim column block dimension of the sparse GEBSR matrix.\n  @param[in]\n  bsr_val     array of \\p nnzb elements of the sparse GEBSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every row of the\n              sparse GEBSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the column indices of the sparse\n              GEBSR matrix.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_scheck_matrix_gebsr(), rocsparse_dcheck_matrix_gebsr(),\n              rocsparse_ccheck_matrix_gebsr() and rocsparse_zcheck_matrix_gebsr().\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p dir or \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_size \\p mb \\p nb \\p nnzb \\p row_block_dim or \\p col_block_dim is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p bsr_val, \\p bsr_row_ptr, \\p bsr_col_ind or \\p buffer_size pointer\n          is invalid.\n/\n/**@{"]
-    pub fn rocsparse_scheck_matrix_gebsr_buffer_size(
+    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse BSR matrix into a sparse CSR matrix\n\n  \\details\n  \\p rocsparse_bsr2csr converts a BSR matrix into a CSR matrix. It is assumed,\n  that \\p csr_val, \\p csr_col_ind and \\p csr_row_ptr are allocated. Allocation size\n  for \\p csr_row_ptr is computed by the number of block rows multiplied by the block\n  dimension plus one. Allocation for \\p csr_val and \\p csr_col_ind is computed by the\n  the number of blocks in the BSR matrix multiplied by the block dimension squared.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         the storage format of the blocks, \\ref rocsparse_direction_row or \\ref rocsparse_direction_column\n  @param[in]\n  mb          number of block rows in the sparse BSR matrix.\n  @param[in]\n  nb          number of block columns in the sparse BSR matrix.\n  @param[in]\n  bsr_descr   descriptor of the sparse BSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  bsr_val     array of \\p nnzb*block_dim*block_dim containing the values of the sparse BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of the\n              sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the block column indices of the sparse BSR matrix.\n  @param[in]\n  block_dim   size of the blocks in the sparse BSR matrix.\n  @param[in]\n  csr_descr   descriptor of the sparse CSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  csr_val     array of \\p nnzb*block_dim*block_dim elements containing the values of the sparse CSR matrix.\n  @param[out]\n  csr_row_ptr array of \\p m+1 where \\p m=mb*block_dim elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[out]\n  csr_col_ind array of \\p nnzb*block_dim*block_dim elements containing the column indices of the sparse CSR matrix.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb or \\p nb or \\p block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p bsr_val,\n              \\p bsr_row_ptr, \\p bsr_col_ind, \\p csr_val, \\p csr_row_ptr or\n              \\p csr_col_ind pointer is invalid.\n\n  \\par Example\n  This example converts a BSR matrix into an CSR matrix.\n  \\code{.c}\n      //     1 4 0 0 0 0\n      // A = 0 2 3 0 0 0\n      //     5 0 0 7 8 0\n      //     0 0 9 0 6 0\n\n      rocsparse_int mb   = 2;\n      rocsparse_int nb   = 3;\n      rocsparse_int block_dim = 2;\n      rocsparse_int m = Mb * block_dim;\n      rocsparse_int n = Nb * block_dim;\n\n      bsr_row_ptr[mb+1]                 = {0, 2, 5};                                                    // device memory\n      bsr_col_ind[nnzb]                 = {0, 1, 0, 1, 2};                                              // device memory\n      bsr_val[nnzb*block_dim*block_dim] = {1, 0, 4, 2, 0, 3, 0, 0, 5, 0, 0, 0, 0, 9, 7, 0, 8, 6, 0, 0}; // device memory\n\n      rocsparse_int nnzb = bsr_row_ptr[mb] - bsr_row_ptr[0];\n\n      // Create CSR arrays on device\n      rocsparse_int* csr_row_ptr;\n      rocsparse_int* csr_col_ind;\n      float* csr_val;\n      hipMalloc((void**)&csr_row_ptr, sizeof(rocsparse_int) * (m + 1));\n      hipMalloc((void**)&csr_col_ind, sizeof(rocsparse_int) * nnzb * block_dim * block_dim);\n      hipMalloc((void**)&csr_val, sizeof(float) * nnzb * block_dim * block_dim);\n\n      // Create rocsparse handle\n      rocsparse_local_handle handle;\n\n      rocsparse_mat_descr bsr_descr = nullptr;\n      rocsparse_create_mat_descr(&bsr_descr);\n\n      rocsparse_mat_descr csr_descr = nullptr;\n      rocsparse_create_mat_descr(&csr_descr);\n\n      rocsparse_set_mat_index_base(bsr_descr, rocsparse_index_base_zero);\n      rocsparse_set_mat_index_base(csr_descr, rocsparse_index_base_zero);\n\n      // Format conversion\n      rocsparse_sbsr2csr(handle,\n                         rocsparse_direction_column,\n                         mb,\n                         nb,\n                         bsr_descr,\n                         bsr_val,\n                         bsr_row_ptr,\n                         bsr_col_ind,\n                         block_dim,\n                         csr_descr,\n                         csr_val,\n                         csr_row_ptr,\n                         csr_col_ind);\n  \\endcode\n/\n/**@{"]
+    pub fn rocsparse_sbsr2csr(
         handle: rocsparse_handle,
         dir: rocsparse_direction,
         mb: rocsparse_int,
         nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
+        bsr_descr: rocsparse_mat_descr,
         bsr_val: *const f32,
         bsr_row_ptr: *const rocsparse_int,
         bsr_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        buffer_size: *mut usize,
+        block_dim: rocsparse_int,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *mut f32,
+        csr_row_ptr: *mut rocsparse_int,
+        csr_col_ind: *mut rocsparse_int,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_dcheck_matrix_gebsr_buffer_size(
+    pub fn rocsparse_dbsr2csr(
         handle: rocsparse_handle,
         dir: rocsparse_direction,
         mb: rocsparse_int,
         nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
+        bsr_descr: rocsparse_mat_descr,
         bsr_val: *const f64,
         bsr_row_ptr: *const rocsparse_int,
         bsr_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        buffer_size: *mut usize,
+        block_dim: rocsparse_int,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *mut f64,
+        csr_row_ptr: *mut rocsparse_int,
+        csr_col_ind: *mut rocsparse_int,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_ccheck_matrix_gebsr_buffer_size(
+    pub fn rocsparse_cbsr2csr(
         handle: rocsparse_handle,
         dir: rocsparse_direction,
         mb: rocsparse_int,
         nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
+        bsr_descr: rocsparse_mat_descr,
         bsr_val: *const rocsparse_float_complex,
         bsr_row_ptr: *const rocsparse_int,
         bsr_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        buffer_size: *mut usize,
+        block_dim: rocsparse_int,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *mut rocsparse_float_complex,
+        csr_row_ptr: *mut rocsparse_int,
+        csr_col_ind: *mut rocsparse_int,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_zcheck_matrix_gebsr_buffer_size(
+    pub fn rocsparse_zbsr2csr(
         handle: rocsparse_handle,
         dir: rocsparse_direction,
         mb: rocsparse_int,
         nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
+        bsr_descr: rocsparse_mat_descr,
         bsr_val: *const rocsparse_double_complex,
         bsr_row_ptr: *const rocsparse_int,
         bsr_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        buffer_size: *mut usize,
+        block_dim: rocsparse_int,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *mut rocsparse_double_complex,
+        csr_row_ptr: *mut rocsparse_int,
+        csr_col_ind: *mut rocsparse_int,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_gebsr checks if the input GEBSR matrix is valid.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir          matrix storage of GEBSR blocks.\n  @param[in]\n  mb           number of block rows of the sparse GEBSR matrix.\n  @param[in]\n  nb           number of block columns of the sparse GEBSR matrix.\n  @param[in]\n  nnzb         number of non-zero blocks of the sparse GEBSR matrix.\n  @param[in]\n  row_block_dim row block dimension of the sparse GEBSR matrix.\n  @param[in]\n  col_block_dim column block dimension of the sparse GEBSR matrix.\n  @param[in]\n  bsr_val     array of \\p nnzb elements of the sparse GEBSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every row of the\n              sparse GEBSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the column indices of the sparse\n              GEBSR matrix.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  data_status modified to indicate the status of the data\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p dir or \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_size \\p mb \\p nb \\p nnzb \\p row_block_dim or \\p col_block_dim is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p bsr_val, \\p bsr_row_ptr, \\p bsr_col_ind, \\p temp_buffer or \\p data_status pointer\n          is invalid.\n/\n/**@{"]
-    pub fn rocsparse_scheck_matrix_gebsr(
+    #[doc = " \\ingroup conv_module\n  \\brief Pads a value to the diagonal of the last block (if the last block is a diagonal block) in the sparse BSR matrix\n  when the matrix expands outside m x m\n\n  \\details When converting from a CSR matrix to a BSR matrix the resulting BSR matrix will be larger when m < mb * block_dim.\n  In these situations, the CSR to BSR conversion will expand the BSR matrix to have zeros when outside m x m. This routine\n  converts the resulting BSR matrix to one that has a value on the last diagonal blocks diagonal if this last block is a diagonal\n  block in the BSR matrix.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse BSR matrix.\n  @param[in]\n  mb          number of block rows of the sparse BSR matrix.\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse BSR matrix.\n  @param[in]\n  block_dim   block dimension of the sparse BSR matrix.\n  @param[in]\n  value       scalar value that is set on the diagonal of the last block when the matrix expands outside of \\p m x \\p m\n  @param[in]\n  bsr_descr   descriptor of the sparse BSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[inout]\n  bsr_val     array of \\p nnzb blocks of the sparse BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of\n              the sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the block column indices of the sparse\n              BSR matrix.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p mb, \\p nnzb or \\p block_dim is\n              invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p bsr_descr, \\p bsr_val,\n              \\p bsr_row_ind, \\p bsr_col_ind, pointer is invalid.\n/\n/**@{"]
+    pub fn rocsparse_sbsrpad_value(
         handle: rocsparse_handle,
-        dir: rocsparse_direction,
+        m: rocsparse_int,
         mb: rocsparse_int,
-        nb: rocsparse_int,
         nnzb: rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        bsr_val: *const f32,
+        block_dim: rocsparse_int,
+        value: f32,
+        bsr_descr: rocsparse_mat_descr,
+        bsr_val: *mut f32,
         bsr_row_ptr: *const rocsparse_int,
         bsr_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        data_status: *mut rocsparse_data_status,
-        temp_buffer: *mut ::std::os::raw::c_void,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_dcheck_matrix_gebsr(
+    pub fn rocsparse_dbsrpad_value(
         handle: rocsparse_handle,
-        dir: rocsparse_direction,
+        m: rocsparse_int,
         mb: rocsparse_int,
-        nb: rocsparse_int,
         nnzb: rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        bsr_val: *const f64,
+        block_dim: rocsparse_int,
+        value: f64,
+        bsr_descr: rocsparse_mat_descr,
+        bsr_val: *mut f64,
         bsr_row_ptr: *const rocsparse_int,
         bsr_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        data_status: *mut rocsparse_data_status,
-        temp_buffer: *mut ::std::os::raw::c_void,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_ccheck_matrix_gebsr(
+    pub fn rocsparse_cbsrpad_value(
         handle: rocsparse_handle,
-        dir: rocsparse_direction,
+        m: rocsparse_int,
         mb: rocsparse_int,
-        nb: rocsparse_int,
         nnzb: rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        bsr_val: *const rocsparse_float_complex,
+        block_dim: rocsparse_int,
+        value: rocsparse_float_complex,
+        bsr_descr: rocsparse_mat_descr,
+        bsr_val: *mut rocsparse_float_complex,
         bsr_row_ptr: *const rocsparse_int,
         bsr_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        data_status: *mut rocsparse_data_status,
-        temp_buffer: *mut ::std::os::raw::c_void,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_zcheck_matrix_gebsr(
+    pub fn rocsparse_zbsrpad_value(
         handle: rocsparse_handle,
-        dir: rocsparse_direction,
+        m: rocsparse_int,
         mb: rocsparse_int,
-        nb: rocsparse_int,
         nnzb: rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        bsr_val: *const rocsparse_double_complex,
+        block_dim: rocsparse_int,
+        value: rocsparse_double_complex,
+        bsr_descr: rocsparse_mat_descr,
+        bsr_val: *mut rocsparse_double_complex,
         bsr_row_ptr: *const rocsparse_int,
         bsr_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        data_status: *mut rocsparse_data_status,
-        temp_buffer: *mut ::std::os::raw::c_void,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_gebsc_buffer_size computes the required buffer size needed when\n  calling \\p rocsparse_check_matrix_gebsc\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir          matrix storage of GEBSC blocks.\n  @param[in]\n  mb           number of block rows of the sparse GEBSC matrix.\n  @param[in]\n  nb           number of block columns of the sparse GEBSC matrix.\n  @param[in]\n  nnzb         number of non-zero blocks of the sparse GEBSC matrix.\n  @param[in]\n  row_block_dim row block dimension of the sparse GEBSC matrix.\n  @param[in]\n  col_block_dim column block dimension of the sparse GEBSC matrix.\n  @param[in]\n  bsc_val     array of \\p nnzb elements of the sparse GEBSC matrix.\n  @param[in]\n  bsc_col_ptr array of \\p nb+1 elements that point to the start of every column of the\n              sparse GEBSC matrix.\n  @param[in]\n  bsc_row_ind array of \\p nnzb elements containing the row indices of the sparse\n              GEBSC matrix.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_scheck_matrix_gebsc(), rocsparse_dcheck_matrix_gebsc(),\n              rocsparse_ccheck_matrix_gebsc() and rocsparse_zcheck_matrix_gebsc().\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p dir or \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_size \\p mb \\p nb \\p nnzb \\p row_block_dim or \\p col_block_dim is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p bsc_val, \\p bsc_col_ptr, \\p bsc_row_ind or \\p buffer_size pointer\n          is invalid.\n/\n/**@{"]
-    pub fn rocsparse_scheck_matrix_gebsc_buffer_size(
+    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse COO matrix into a sparse CSR matrix\n\n  \\details\n  \\p rocsparse_coo2csr converts the COO array containing the row indices into a\n  CSR array of row offsets, that point to the start of every row.\n  It is assumed that the COO row index array is sorted.\n\n  \\note It can also be used, to convert a COO array containing the column indices into\n  a CSC array of column offsets, that point to the start of every column. Then, it is\n  assumed that the COO column index array is sorted, instead.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  coo_row_ind array of \\p nnz elements containing the row indices of the sparse COO\n              matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[out]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p coo_row_ind or \\p csr_row_ptr\n              pointer is invalid.\n\n  \\par Example\n  This example converts a COO matrix into a CSR matrix.\n  \\code{.c}\n      //     1 2 0 3 0\n      // A = 0 4 5 0 0\n      //     6 0 0 7 8\n\n      rocsparse_int m   = 3;\n      rocsparse_int n   = 5;\n      rocsparse_int nnz = 8;\n\n      coo_row_ind[nnz] = {0, 0, 0, 1, 1, 2, 2, 2}; // device memory\n      coo_col_ind[nnz] = {0, 1, 3, 1, 2, 0, 3, 4}; // device memory\n      coo_val[nnz]     = {1, 2, 3, 4, 5, 6, 7, 8}; // device memory\n\n      // Allocate CSR matrix arrays\n      rocsparse_int* csr_row_ptr;\n      rocsparse_int* csr_col_ind;\n      float* csr_val;\n\n      hipMalloc((void**)&csr_row_ptr, sizeof(rocsparse_int) * (m + 1));\n      hipMalloc((void**)&csr_col_ind, sizeof(rocsparse_int) * nnz);\n      hipMalloc((void**)&csr_val, sizeof(float) * nnz);\n\n      // Convert the coo row indices into csr row offsets\n      rocsparse_coo2csr(handle,\n                        coo_row_ind,\n                        nnz,\n                        m,\n                        csr_row_ptr,\n                        rocsparse_index_base_zero);\n\n      // Copy the column and value arrays\n      hipMemcpy(csr_col_ind,\n                coo_col_ind,\n                sizeof(rocsparse_int) * nnz,\n                hipMemcpyDeviceToDevice);\n\n      hipMemcpy(csr_val,\n                coo_val,\n                sizeof(float) * nnz,\n                hipMemcpyDeviceToDevice);\n  \\endcode"]
+    pub fn rocsparse_coo2csr(
         handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        bsc_val: *const f32,
-        bsc_col_ptr: *const rocsparse_int,
-        bsc_row_ind: *const rocsparse_int,
+        coo_row_ind: *const rocsparse_int,
+        nnz: rocsparse_int,
+        m: rocsparse_int,
+        csr_row_ptr: *mut rocsparse_int,
         idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        buffer_size: *mut usize,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_dcheck_matrix_gebsc_buffer_size(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        bsc_val: *const f64,
-        bsc_col_ptr: *const rocsparse_int,
-        bsc_row_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccheck_matrix_gebsc_buffer_size(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        bsc_val: *const rocsparse_float_complex,
-        bsc_col_ptr: *const rocsparse_int,
-        bsc_row_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcheck_matrix_gebsc_buffer_size(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        bsc_val: *const rocsparse_double_complex,
-        bsc_col_ptr: *const rocsparse_int,
-        bsc_row_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_gebsc checks if the input GEBSC matrix is valid.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir          matrix storage of GEBSC blocks.\n  @param[in]\n  mb           number of block rows of the sparse GEBSC matrix.\n  @param[in]\n  nb           number of block columns of the sparse GEBSC matrix.\n  @param[in]\n  nnzb         number of non-zero blocks of the sparse GEBSC matrix.\n  @param[in]\n  row_block_dim row block dimension of the sparse GEBSC matrix.\n  @param[in]\n  col_block_dim column block dimension of the sparse GEBSC matrix.\n  @param[in]\n  bsc_val     array of \\p nnzb elements of the sparse GEBSC matrix.\n  @param[in]\n  bsc_col_ptr array of \\p nb+1 elements that point to the start of every column of the\n              sparse GEBSC matrix.\n  @param[in]\n  bsc_row_ind array of \\p nnzb elements containing the row indices of the sparse\n              GEBSC matrix.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  data_status modified to indicate the status of the data\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p dir or \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_size \\p mb \\p nb \\p nnzb \\p row_block_dim or \\p col_block_dim is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p bsc_val, \\p bsc_col_ptr, \\p bsc_row_ind, \\p temp_buffer or \\p data_status pointer\n          is invalid.\n/\n/**@{"]
-    pub fn rocsparse_scheck_matrix_gebsc(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        bsc_val: *const f32,
-        bsc_col_ptr: *const rocsparse_int,
-        bsc_row_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        data_status: *mut rocsparse_data_status,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcheck_matrix_gebsc(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        bsc_val: *const f64,
-        bsc_col_ptr: *const rocsparse_int,
-        bsc_row_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        data_status: *mut rocsparse_data_status,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccheck_matrix_gebsc(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        bsc_val: *const rocsparse_float_complex,
-        bsc_col_ptr: *const rocsparse_int,
-        bsc_row_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        data_status: *mut rocsparse_data_status,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcheck_matrix_gebsc(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        bsc_val: *const rocsparse_double_complex,
-        bsc_col_ptr: *const rocsparse_int,
-        bsc_row_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        data_status: *mut rocsparse_data_status,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_csc_buffer_size computes the required buffer size needed when\n  calling \\p rocsparse_check_matrix_csc\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse CSC matrix.\n  @param[in]\n  n           number of columns of the sparse CSC matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSC matrix.\n  @param[in]\n  csc_val     array of \\p nnz elements of the sparse CSC matrix.\n  @param[in]\n  csc_col_ptr array of \\p m+1 elements that point to the start of every column of the\n              sparse CSC matrix.\n  @param[in]\n  csc_row_ind array of \\p nnz elements containing the row indices of the sparse\n              CSC matrix.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_scheck_matrix_csc(), rocsparse_dcheck_matrix_csc(),\n              rocsparse_ccheck_matrix_csc() and rocsparse_zcheck_matrix_csc().\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_size \\p m \\p n or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p csc_val, \\p csc_col_ptr, \\p csc_row_ind or \\p buffer_size pointer\n          is invalid.\n/\n/**@{"]
-    pub fn rocsparse_scheck_matrix_csc_buffer_size(
+    #[doc = " \\ingroup conv_module\n  \\brief\n  This function converts the sparse matrix in COO format into a dense matrix.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  m           number of rows of the dense matrix \\p A.\n\n  @param[in]\n  n           number of columns of the dense matrix \\p A.\n\n  @param[in]\n  nnz         number of non-zero entries of the sparse COO matrix.\n  @param[in]\n  descr       the descriptor of the dense matrix \\p A, the supported matrix type is \\ref rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[in]\n  coo_val     array of nnz nonzero elements of matrix \\p A.\n  @param[in]\n  coo_row_ind integer array of nnz row indices of the non-zero elements of matrix \\p A.\n\n  @param[in]\n  coo_col_ind integer array of nnz column indices of the non-zero elements of matrix \\p A.\n  @param[out]\n  A           array of dimensions (\\p ld, \\p n)\n\n  @param[out]\n  ld          leading dimension of dense array \\p A.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p nnz or \\p ld is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p A or \\p coo_val \\p coo_col_ind or \\p coo_row_ind\n              pointer is invalid.\n/\n/**@{"]
+    pub fn rocsparse_scoo2dense(
         handle: rocsparse_handle,
         m: rocsparse_int,
         n: rocsparse_int,
         nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        coo_val: *const f32,
+        coo_row_ind: *const rocsparse_int,
+        coo_col_ind: *const rocsparse_int,
+        A: *mut f32,
+        ld: rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcoo2dense(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        coo_val: *const f64,
+        coo_row_ind: *const rocsparse_int,
+        coo_col_ind: *const rocsparse_int,
+        A: *mut f64,
+        ld: rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccoo2dense(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        coo_val: *const rocsparse_float_complex,
+        coo_row_ind: *const rocsparse_int,
+        coo_col_ind: *const rocsparse_int,
+        A: *mut rocsparse_float_complex,
+        ld: rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcoo2dense(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        coo_val: *const rocsparse_double_complex,
+        coo_row_ind: *const rocsparse_int,
+        coo_col_ind: *const rocsparse_int,
+        A: *mut rocsparse_double_complex,
+        ld: rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Sort a sparse COO matrix\n\n  \\details\n  \\p coosort_buffer_size returns the size of the temporary storage buffer that is\n  required by rocsparse_coosort_by_row() and rocsparse_coosort_by_column(). The\n  temporary storage buffer has to be allocated by the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  m               number of rows of the sparse COO matrix.\n  @param[in]\n  n               number of columns of the sparse COO matrix.\n  @param[in]\n  nnz             number of non-zero entries of the sparse COO matrix.\n  @param[in]\n  coo_row_ind     array of \\p nnz elements containing the row indices of the sparse\n                  COO matrix.\n  @param[in]\n  coo_col_ind     array of \\p nnz elements containing the column indices of the sparse\n                  COO matrix.\n  @param[out]\n  buffer_size     number of bytes of the temporary storage buffer required by\n                  rocsparse_coosort_by_row() and rocsparse_coosort_by_column().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p coo_row_ind, \\p coo_col_ind or\n              \\p buffer_size pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred."]
+    pub fn rocsparse_coosort_buffer_size(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        coo_row_ind: *const rocsparse_int,
+        coo_col_ind: *const rocsparse_int,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Sort a sparse COO matrix by row\n\n  \\details\n  \\p rocsparse_coosort_by_row sorts a matrix in COO format by row. The sorted\n  permutation vector \\p perm can be used to obtain sorted \\p coo_val array. In this\n  case, \\p perm must be initialized as the identity permutation, see\n  rocsparse_create_identity_permutation().\n\n  \\p rocsparse_coosort_by_row requires extra temporary storage buffer that has to be\n  allocated by the user. Storage buffer size can be determined by\n  rocsparse_coosort_buffer_size().\n\n  \\note\n  \\p perm can be \\p NULL if a sorted permutation vector is not required.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  m               number of rows of the sparse COO matrix.\n  @param[in]\n  n               number of columns of the sparse COO matrix.\n  @param[in]\n  nnz             number of non-zero entries of the sparse COO matrix.\n  @param[inout]\n  coo_row_ind     array of \\p nnz elements containing the row indices of the sparse\n                  COO matrix.\n  @param[inout]\n  coo_col_ind     array of \\p nnz elements containing the column indices of the sparse\n                  COO matrix.\n  @param[inout]\n  perm            array of \\p nnz integers containing the unsorted map indices, can be\n                  \\p NULL.\n  @param[in]\n  temp_buffer     temporary storage buffer allocated by the user, size is returned by\n                  rocsparse_coosort_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p coo_row_ind, \\p coo_col_ind or\n              \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n\n  \\par Example\n  The following example sorts a \\f$3 \\times 3\\f$ COO matrix by row indices.\n  \\code{.c}\n      //     1 2 3\n      // A = 4 5 6\n      //     7 8 9\n      rocsparse_int m   = 3;\n      rocsparse_int n   = 3;\n      rocsparse_int nnz = 9;\n\n      coo_row_ind[nnz] = {0, 1, 2, 0, 1, 2, 0, 1, 2}; // device memory\n      coo_col_ind[nnz] = {0, 0, 0, 1, 1, 1, 2, 2, 2}; // device memory\n      coo_val[nnz]     = {1, 4, 7, 2, 5, 8, 3, 6, 9}; // device memory\n\n      // Create permutation vector perm as the identity map\n      rocsparse_int* perm;\n      hipMalloc((void**)&perm, sizeof(rocsparse_int) * nnz);\n      rocsparse_create_identity_permutation(handle, nnz, perm);\n\n      // Allocate temporary buffer\n      size_t buffer_size;\n      void* temp_buffer;\n      rocsparse_coosort_buffer_size(handle,\n                                    m,\n                                    n,\n                                    nnz,\n                                    coo_row_ind,\n                                    coo_col_ind,\n                                    &buffer_size);\n      hipMalloc(&temp_buffer, buffer_size);\n\n      // Sort the COO matrix\n      rocsparse_coosort_by_row(handle,\n                               m,\n                               n,\n                               nnz,\n                               coo_row_ind,\n                               coo_col_ind,\n                               perm,\n                               temp_buffer);\n\n      // Gather sorted coo_val array\n      float* coo_val_sorted;\n      hipMalloc((void**)&coo_val_sorted, sizeof(float) * nnz);\n      rocsparse_sgthr(handle, nnz, coo_val, coo_val_sorted, perm, rocsparse_index_base_zero);\n\n      // Clean up\n      hipFree(temp_buffer);\n      hipFree(perm);\n      hipFree(coo_val);\n  \\endcode"]
+    pub fn rocsparse_coosort_by_row(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        coo_row_ind: *mut rocsparse_int,
+        coo_col_ind: *mut rocsparse_int,
+        perm: *mut rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Sort a sparse COO matrix by column\n\n  \\details\n  \\p rocsparse_coosort_by_column sorts a matrix in COO format by column. The sorted\n  permutation vector \\p perm can be used to obtain sorted \\p coo_val array. In this\n  case, \\p perm must be initialized as the identity permutation, see\n  rocsparse_create_identity_permutation().\n\n  \\p rocsparse_coosort_by_column requires extra temporary storage buffer that has to be\n  allocated by the user. Storage buffer size can be determined by\n  rocsparse_coosort_buffer_size().\n\n  \\note\n  \\p perm can be \\p NULL if a sorted permutation vector is not required.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  m               number of rows of the sparse COO matrix.\n  @param[in]\n  n               number of columns of the sparse COO matrix.\n  @param[in]\n  nnz             number of non-zero entries of the sparse COO matrix.\n  @param[inout]\n  coo_row_ind     array of \\p nnz elements containing the row indices of the sparse\n                  COO matrix.\n  @param[inout]\n  coo_col_ind     array of \\p nnz elements containing the column indices of the sparse\n                  COO matrix.\n  @param[inout]\n  perm            array of \\p nnz integers containing the unsorted map indices, can be\n                  \\p NULL.\n  @param[in]\n  temp_buffer     temporary storage buffer allocated by the user, size is returned by\n                  rocsparse_coosort_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p coo_row_ind, \\p coo_col_ind or\n              \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n\n  \\par Example\n  The following example sorts a \\f$3 \\times 3\\f$ COO matrix by column indices.\n  \\code{.c}\n      //     1 2 3\n      // A = 4 5 6\n      //     7 8 9\n      rocsparse_int m   = 3;\n      rocsparse_int n   = 3;\n      rocsparse_int nnz = 9;\n\n      coo_row_ind[nnz] = {0, 0, 0, 1, 1, 1, 2, 2, 2}; // device memory\n      coo_col_ind[nnz] = {0, 1, 2, 0, 1, 2, 0, 1, 2}; // device memory\n      coo_val[nnz]     = {1, 2, 3, 4, 5, 6, 7, 8, 9}; // device memory\n\n      // Create permutation vector perm as the identity map\n      rocsparse_int* perm;\n      hipMalloc((void**)&perm, sizeof(rocsparse_int) * nnz);\n      rocsparse_create_identity_permutation(handle, nnz, perm);\n\n      // Allocate temporary buffer\n      size_t buffer_size;\n      void* temp_buffer;\n      rocsparse_coosort_buffer_size(handle,\n                                    m,\n                                    n,\n                                    nnz,\n                                    coo_row_ind,\n                                    coo_col_ind,\n                                    &buffer_size);\n      hipMalloc(&temp_buffer, buffer_size);\n\n      // Sort the COO matrix\n      rocsparse_coosort_by_column(handle,\n                                  m,\n                                  n,\n                                  nnz,\n                                  coo_row_ind,\n                                  coo_col_ind,\n                                  perm,\n                                  temp_buffer);\n\n      // Gather sorted coo_val array\n      float* coo_val_sorted;\n      hipMalloc((void**)&coo_val_sorted, sizeof(float) * nnz);\n      rocsparse_sgthr(handle, nnz, coo_val, coo_val_sorted, perm, rocsparse_index_base_zero);\n\n      // Clean up\n      hipFree(temp_buffer);\n      hipFree(perm);\n      hipFree(coo_val);\n  \\endcode"]
+    pub fn rocsparse_coosort_by_column(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        coo_row_ind: *mut rocsparse_int,
+        coo_col_ind: *mut rocsparse_int,
+        perm: *mut rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief\n  This function converts the sparse matrix in CSC format into a dense matrix.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  m           number of rows of the dense matrix \\p A.\n\n  @param[in]\n  n           number of columns of the dense matrix \\p A.\n\n  @param[in]\n  descr       the descriptor of the dense matrix \\p A, the supported matrix type is \\ref rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[in]\n  csc_val     array of nnz ( = \\p csc_col_ptr[m] - \\p csc_col_ptr[0] ) nonzero elements of matrix \\p A.\n  @param[in]\n  csc_col_ptr integer array of m+1 elements that contains the start of every row and the end of the last row plus one.\n  @param[in]\n  csc_row_ind integer array of nnz ( = \\p csc_col_ptr[m] - csc_col_ptr[0] ) column indices of the non-zero elements of matrix \\p A.\n\n  @param[out]\n  A           array of dimensions (\\p ld, \\p n)\n\n  @param[out]\n  ld          leading dimension of dense array \\p A.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p ld is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p A or \\p csc_val \\p csc_col_ptr or \\p csc_row_ind\n              pointer is invalid.\n/\n/**@{"]
+    pub fn rocsparse_scsc2dense(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr: rocsparse_mat_descr,
         csc_val: *const f32,
         csc_col_ptr: *const rocsparse_int,
         csc_row_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        buffer_size: *mut usize,
+        A: *mut f32,
+        ld: rocsparse_int,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_dcheck_matrix_csc_buffer_size(
+    pub fn rocsparse_dcsc2dense(
         handle: rocsparse_handle,
         m: rocsparse_int,
         n: rocsparse_int,
-        nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
         csc_val: *const f64,
         csc_col_ptr: *const rocsparse_int,
         csc_row_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        buffer_size: *mut usize,
+        A: *mut f64,
+        ld: rocsparse_int,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_ccheck_matrix_csc_buffer_size(
+    pub fn rocsparse_ccsc2dense(
         handle: rocsparse_handle,
         m: rocsparse_int,
         n: rocsparse_int,
-        nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
         csc_val: *const rocsparse_float_complex,
         csc_col_ptr: *const rocsparse_int,
         csc_row_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        buffer_size: *mut usize,
+        A: *mut rocsparse_float_complex,
+        ld: rocsparse_int,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_zcheck_matrix_csc_buffer_size(
+    pub fn rocsparse_zcsc2dense(
         handle: rocsparse_handle,
         m: rocsparse_int,
         n: rocsparse_int,
-        nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
         csc_val: *const rocsparse_double_complex,
         csc_col_ptr: *const rocsparse_int,
         csc_row_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
+        A: *mut rocsparse_double_complex,
+        ld: rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Sort a sparse CSC matrix\n\n  \\details\n  \\p rocsparse_cscsort_buffer_size returns the size of the temporary storage buffer\n  required by rocsparse_cscsort(). The temporary storage buffer must be allocated by\n  the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  m               number of rows of the sparse CSC matrix.\n  @param[in]\n  n               number of columns of the sparse CSC matrix.\n  @param[in]\n  nnz             number of non-zero entries of the sparse CSC matrix.\n  @param[in]\n  csc_col_ptr     array of \\p n+1 elements that point to the start of every column of\n                  the sparse CSC matrix.\n  @param[in]\n  csc_row_ind     array of \\p nnz elements containing the row indices of the sparse\n                  CSC matrix.\n  @param[out]\n  buffer_size     number of bytes of the temporary storage buffer required by\n                  rocsparse_cscsort().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csc_col_ptr, \\p csc_row_ind or\n              \\p buffer_size pointer is invalid."]
+    pub fn rocsparse_cscsort_buffer_size(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        csc_col_ptr: *const rocsparse_int,
+        csc_row_ind: *const rocsparse_int,
         buffer_size: *mut usize,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_csc checks if the input CSC matrix is valid.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse CSC matrix.\n  @param[in]\n  n           number of columns of the sparse CSC matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSC matrix.\n  @param[in]\n  csc_val     array of \\p nnz elements of the sparse CSC matrix.\n  @param[in]\n  csc_col_ptr array of \\p m+1 elements that point to the start of every column of the\n              sparse CSC matrix.\n  @param[in]\n  csc_row_ind array of \\p nnz elements containing the row indices of the sparse\n              CSC matrix.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  data_status modified to indicate the status of the data\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_size \\p m \\p n or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p csc_val, \\p csc_col_ptr, \\p csc_row_ind, \\p temp_buffer or \\p data_status pointer\n          is invalid.\n/\n/**@{"]
-    pub fn rocsparse_scheck_matrix_csc(
+    #[doc = " \\ingroup conv_module\n  \\brief Sort a sparse CSC matrix\n\n  \\details\n  \\p rocsparse_cscsort sorts a matrix in CSC format. The sorted permutation vector\n  \\p perm can be used to obtain sorted \\p csc_val array. In this case, \\p perm must be\n  initialized as the identity permutation, see rocsparse_create_identity_permutation().\n\n  \\p rocsparse_cscsort requires extra temporary storage buffer that has to be allocated by\n  the user. Storage buffer size can be determined by rocsparse_cscsort_buffer_size().\n\n  \\note\n  \\p perm can be \\p NULL if a sorted permutation vector is not required.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  m               number of rows of the sparse CSC matrix.\n  @param[in]\n  n               number of columns of the sparse CSC matrix.\n  @param[in]\n  nnz             number of non-zero entries of the sparse CSC matrix.\n  @param[in]\n  descr           descriptor of the sparse CSC matrix. Currently, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csc_col_ptr     array of \\p n+1 elements that point to the start of every column of\n                  the sparse CSC matrix.\n  @param[inout]\n  csc_row_ind     array of \\p nnz elements containing the row indices of the sparse\n                  CSC matrix.\n  @param[inout]\n  perm            array of \\p nnz integers containing the unsorted map indices, can be\n                  \\p NULL.\n  @param[in]\n  temp_buffer     temporary storage buffer allocated by the user, size is returned by\n                  rocsparse_cscsort_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p csc_col_ptr, \\p csc_row_ind\n              or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  The following example sorts a \\f$3 \\times 3\\f$ CSC matrix.\n  \\code{.c}\n      //     1 2 3\n      // A = 4 5 6\n      //     7 8 9\n      rocsparse_int m   = 3;\n      rocsparse_int n   = 3;\n      rocsparse_int nnz = 9;\n\n      csc_col_ptr[m + 1] = {0, 3, 6, 9};                // device memory\n      csc_row_ind[nnz]   = {2, 0, 1, 0, 1, 2, 0, 2, 1}; // device memory\n      csc_val[nnz]       = {7, 1, 4, 2, 5, 8, 3, 9, 6}; // device memory\n\n      // Create permutation vector perm as the identity map\n      rocsparse_int* perm;\n      hipMalloc((void**)&perm, sizeof(rocsparse_int) * nnz);\n      rocsparse_create_identity_permutation(handle, nnz, perm);\n\n      // Allocate temporary buffer\n      size_t buffer_size;\n      void* temp_buffer;\n      rocsparse_cscsort_buffer_size(handle, m, n, nnz, csc_col_ptr, csc_row_ind, &buffer_size);\n      hipMalloc(&temp_buffer, buffer_size);\n\n      // Sort the CSC matrix\n      rocsparse_cscsort(handle, m, n, nnz, descr, csc_col_ptr, csc_row_ind, perm, temp_buffer);\n\n      // Gather sorted csc_val array\n      float* csc_val_sorted;\n      hipMalloc((void**)&csc_val_sorted, sizeof(float) * nnz);\n      rocsparse_sgthr(handle, nnz, csc_val, csc_val_sorted, perm, rocsparse_index_base_zero);\n\n      // Clean up\n      hipFree(temp_buffer);\n      hipFree(perm);\n      hipFree(csc_val);\n  \\endcode"]
+    pub fn rocsparse_cscsort(
         handle: rocsparse_handle,
         m: rocsparse_int,
         n: rocsparse_int,
         nnz: rocsparse_int,
-        csc_val: *const f32,
+        descr: rocsparse_mat_descr,
         csc_col_ptr: *const rocsparse_int,
-        csc_row_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        data_status: *mut rocsparse_data_status,
+        csc_row_ind: *mut rocsparse_int,
+        perm: *mut rocsparse_int,
         temp_buffer: *mut ::std::os::raw::c_void,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_dcheck_matrix_csc(
+    #[doc = " \\ingroup conv_module\n  \\brief\n  This function computes the number of nonzero block columns per row and the total number of nonzero blocks in a sparse\n  BSR matrix given a sparse CSR matrix as input.\n\n  \\details\n  The routine does support asynchronous execution if the pointer mode is set to device.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  dir         direction that specified whether to count nonzero elements by \\ref rocsparse_direction_row or by\n              \\ref rocsparse_direction_row.\n\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n\n  @param[in]\n  n           number of columns of the sparse CSR matrix.\n\n  @param[in]\n  csr_descr    descriptor of the sparse CSR matrix. Currently, only\n               \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_row_ptr integer array containing \\p m+1 elements that point to the start of each row of the CSR matrix\n\n  @param[in]\n  csr_col_ind integer array of the column indices for each non-zero element in the CSR matrix\n\n  @param[in]\n  block_dim   the block dimension of the BSR matrix. Between 1 and min(m, n)\n\n  @param[in]\n  bsr_descr    descriptor of the sparse BSR matrix. Currently, only\n               \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  bsr_row_ptr integer array containing \\p mb+1 elements that point to the start of each block row of the BSR matrix\n\n  @param[out]\n  bsr_nnz     total number of nonzero elements in device or host memory.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_row_ptr or \\p csr_col_ind or \\p bsr_row_ptr or \\p bsr_nnz\n              pointer is invalid."]
+    pub fn rocsparse_csr2bsr_nnz(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        csr_descr: rocsparse_mat_descr,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        bsr_descr: rocsparse_mat_descr,
+        bsr_row_ptr: *mut rocsparse_int,
+        bsr_nnz: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse CSR matrix into a sparse BSR matrix\n\n  \\details\n  \\p rocsparse_csr2bsr converts a CSR matrix into a BSR matrix. It is assumed,\n  that \\p bsr_val, \\p bsr_col_ind and \\p bsr_row_ptr are allocated. Allocation size\n  for \\p bsr_row_ptr is computed as \\p mb+1 where \\p mb is the number of block rows in\n  the BSR matrix. Allocation size for \\p bsr_val and \\p bsr_col_ind is computed using\n  \\p csr2bsr_nnz() which also fills in \\p bsr_row_ptr.\n\n  \\p rocsparse_csr2bsr requires extra temporary storage that is allocated internally if \\p block_dim>16\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  dir          the storage format of the blocks, \\ref rocsparse_direction_row or \\ref rocsparse_direction_column\n  @param[in]\n  m            number of rows in the sparse CSR matrix.\n  @param[in]\n  n            number of columns in the sparse CSR matrix.\n  @param[in]\n  csr_descr    descriptor of the sparse CSR matrix. Currently, only\n               \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val      array of \\p nnz elements containing the values of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr  array of \\p m+1 elements that point to the start of every row of the\n               sparse CSR matrix.\n  @param[in]\n  csr_col_ind  array of \\p nnz elements containing the column indices of the sparse CSR matrix.\n  @param[in]\n  block_dim    size of the blocks in the sparse BSR matrix.\n  @param[in]\n  bsr_descr    descriptor of the sparse BSR matrix. Currently, only\n               \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  bsr_val      array of \\p nnzb*block_dim*block_dim containing the values of the sparse BSR matrix.\n  @param[out]\n  bsr_row_ptr  array of \\p mb+1 elements that point to the start of every block row of the\n               sparse BSR matrix.\n  @param[out]\n  bsr_col_ind  array of \\p nnzb elements containing the block column indices of the sparse BSR matrix.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p bsr_val,\n              \\p bsr_row_ptr, \\p bsr_col_ind, \\p csr_val, \\p csr_row_ptr or\n              \\p csr_col_ind pointer is invalid.\n\n  \\par Example\n  This example converts a CSR matrix into an BSR matrix.\n  \\code{.c}\n      //     1 4 0 0 0 0\n      // A = 0 2 3 0 0 0\n      //     5 0 0 7 8 0\n      //     0 0 9 0 6 0\n\n      rocsparse_int m   = 4;\n      rocsparse_int n   = 6;\n      rocsparse_int block_dim = 2;\n      rocsparse_int nnz = 9;\n      rocsparse_int mb = (m + block_dim - 1) / block_dim;\n      rocsparse_int nb = (n + block_dim - 1) / block_dim;\n\n      csr_row_ptr[m+1]  = {0, 2, 4, 7, 9};             // device memory\n      csr_col_ind[nnz]  = {0, 1, 1, 2, 0, 3, 4, 2, 4}; // device memory\n      csr_val[nnz]      = {1, 4, 2, 3, 5, 7, 8, 9, 6}; // device memory\n\n      hipMalloc(&bsr_row_ptr, sizeof(rocsparse_int) *(mb + 1));\n      rocsparse_int nnzb;\n      rocsparse_int* nnzTotalHostPtr = &nnzb;\n      csr2bsr_nnz(handle,\n                  rocsparse_direction_row,\n                  m,\n                  n,\n                  csr_descr,\n                  csr_row_ptr,\n                  csr_col_ind,\n                  block_dim,\n                  bsr_descr,\n                  bsr_row_ptr,\n                  nnzTotalHostPtr);\n      nnzb = *nnzTotalDevHostPtr;\n      hipMalloc(&bsr_col_ind, sizeof(int)*nnzb);\n      hipMalloc(&bsr_val, sizeof(float)*(block_dim * block_dim) * nnzb);\n      scsr2bsr(handle,\n               rocsparse_direction_row,\n               m,\n               n,\n               csr_descr,\n               csr_val,\n               csr_row_ptr,\n               csr_col_ind,\n               block_dim,\n               bsr_descr,\n               bsr_val,\n               bsr_row_ptr,\n               bsr_col_ind);\n  \\endcode\n/\n/**@{"]
+    pub fn rocsparse_scsr2bsr(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *const f32,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        bsr_descr: rocsparse_mat_descr,
+        bsr_val: *mut f32,
+        bsr_row_ptr: *mut rocsparse_int,
+        bsr_col_ind: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcsr2bsr(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *const f64,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        bsr_descr: rocsparse_mat_descr,
+        bsr_val: *mut f64,
+        bsr_row_ptr: *mut rocsparse_int,
+        bsr_col_ind: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccsr2bsr(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_float_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        bsr_descr: rocsparse_mat_descr,
+        bsr_val: *mut rocsparse_float_complex,
+        bsr_row_ptr: *mut rocsparse_int,
+        bsr_col_ind: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcsr2bsr(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_double_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        bsr_descr: rocsparse_mat_descr,
+        bsr_val: *mut rocsparse_double_complex,
+        bsr_row_ptr: *mut rocsparse_int,
+        bsr_col_ind: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse CSR matrix into a sparse COO matrix\n\n  \\details\n  \\p rocsparse_csr2coo converts the CSR array containing the row offsets, that point\n  to the start of every row, into a COO array of row indices.\n\n  \\note\n  It can also be used to convert a CSC array containing the column offsets into a COO\n  array of column indices.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row\n              of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[out]\n  coo_row_ind array of \\p nnz elements containing the row indices of the sparse COO\n              matrix.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_row_ptr or \\p coo_row_ind\n              pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n\n  \\par Example\n  This example converts a CSR matrix into a COO matrix.\n  \\code{.c}\n      //     1 2 0 3 0\n      // A = 0 4 5 0 0\n      //     6 0 0 7 8\n\n      rocsparse_int m   = 3;\n      rocsparse_int n   = 5;\n      rocsparse_int nnz = 8;\n\n      csr_row_ptr[m+1] = {0, 3, 5, 8};             // device memory\n      csr_col_ind[nnz] = {0, 1, 3, 1, 2, 0, 3, 4}; // device memory\n      csr_val[nnz]     = {1, 2, 3, 4, 5, 6, 7, 8}; // device memory\n\n      // Allocate COO matrix arrays\n      rocsparse_int* coo_row_ind;\n      rocsparse_int* coo_col_ind;\n      float* coo_val;\n\n      hipMalloc((void**)&coo_row_ind, sizeof(rocsparse_int) * nnz);\n      hipMalloc((void**)&coo_col_ind, sizeof(rocsparse_int) * nnz);\n      hipMalloc((void**)&coo_val, sizeof(float) * nnz);\n\n      // Convert the csr row offsets into coo row indices\n      rocsparse_csr2coo(handle,\n                        csr_row_ptr,\n                        nnz,\n                        m,\n                        coo_row_ind,\n                        rocsparse_index_base_zero);\n\n      // Copy the column and value arrays\n      hipMemcpy(coo_col_ind,\n                csr_col_ind,\n                sizeof(rocsparse_int) * nnz,\n                hipMemcpyDeviceToDevice);\n\n      hipMemcpy(coo_val,\n                csr_val,\n                sizeof(float) * nnz,\n                hipMemcpyDeviceToDevice);\n  \\endcode"]
+    pub fn rocsparse_csr2coo(
+        handle: rocsparse_handle,
+        csr_row_ptr: *const rocsparse_int,
+        nnz: rocsparse_int,
+        m: rocsparse_int,
+        coo_row_ind: *mut rocsparse_int,
+        idx_base: rocsparse_index_base,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse CSR matrix into a sparse CSC matrix\n\n  \\details\n  \\p rocsparse_csr2csc_buffer_size returns the size of the temporary storage buffer\n  required by rocsparse_scsr2csc(), rocsparse_dcsr2csc(), rocsparse_ccsr2csc() and\n  rocsparse_zcsr2csc(). The temporary storage buffer must be allocated by the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  n           number of columns of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[in]\n  copy_values \\ref rocsparse_action_symbolic or \\ref rocsparse_action_numeric.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_scsr2csc(), rocsparse_dcsr2csc(), rocsparse_ccsr2csc() and\n              rocsparse_zcsr2csc().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_row_ptr, \\p csr_col_ind or\n              \\p buffer_size pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred."]
+    pub fn rocsparse_csr2csc_buffer_size(
         handle: rocsparse_handle,
         m: rocsparse_int,
         n: rocsparse_int,
         nnz: rocsparse_int,
-        csc_val: *const f64,
-        csc_col_ptr: *const rocsparse_int,
-        csc_row_ind: *const rocsparse_int,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        copy_values: rocsparse_action,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse CSR matrix into a sparse CSC matrix\n\n  \\details\n  \\p rocsparse_csr2csc converts a CSR matrix into a CSC matrix. \\p rocsparse_csr2csc\n  can also be used to convert a CSC matrix into a CSR matrix. \\p copy_values decides\n  whether \\p csc_val is being filled during conversion (\\ref rocsparse_action_numeric)\n  or not (\\ref rocsparse_action_symbolic).\n\n  \\p rocsparse_csr2csc requires extra temporary storage buffer that has to be allocated\n  by the user. Storage buffer size can be determined by rocsparse_csr2csc_buffer_size().\n\n  \\note\n  The resulting matrix can also be seen as the transpose of the input matrix.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  n           number of columns of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[out]\n  csc_val     array of \\p nnz elements of the sparse CSC matrix.\n  @param[out]\n  csc_row_ind array of \\p nnz elements containing the row indices of the sparse CSC\n              matrix.\n  @param[out]\n  csc_col_ptr array of \\p n+1 elements that point to the start of every column of the\n              sparse CSC matrix.\n  @param[in]\n  copy_values \\ref rocsparse_action_symbolic or \\ref rocsparse_action_numeric.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user, size is returned by\n              rocsparse_csr2csc_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_val, \\p csr_row_ptr,\n              \\p csr_col_ind, \\p csc_val, \\p csc_row_ind, \\p csc_col_ptr or\n              \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n\n  \\par Example\n  This example computes the transpose of a CSR matrix.\n  \\code{.c}\n      //     1 2 0 3 0\n      // A = 0 4 5 0 0\n      //     6 0 0 7 8\n\n      rocsparse_int m_A   = 3;\n      rocsparse_int n_A   = 5;\n      rocsparse_int nnz_A = 8;\n\n      csr_row_ptr_A[m+1] = {0, 3, 5, 8};             // device memory\n      csr_col_ind_A[nnz] = {0, 1, 3, 1, 2, 0, 3, 4}; // device memory\n      csr_val_A[nnz]     = {1, 2, 3, 4, 5, 6, 7, 8}; // device memory\n\n      // Allocate memory for transposed CSR matrix\n      rocsparse_int m_T   = n_A;\n      rocsparse_int n_T   = m_A;\n      rocsparse_int nnz_T = nnz_A;\n\n      rocsparse_int* csr_row_ptr_T;\n      rocsparse_int* csr_col_ind_T;\n      float* csr_val_T;\n\n      hipMalloc((void**)&csr_row_ptr_T, sizeof(rocsparse_int) * (m_T + 1));\n      hipMalloc((void**)&csr_col_ind_T, sizeof(rocsparse_int) * nnz_T);\n      hipMalloc((void**)&csr_val_T, sizeof(float) * nnz_T);\n\n      // Obtain the temporary buffer size\n      size_t buffer_size;\n      rocsparse_csr2csc_buffer_size(handle,\n                                    m_A,\n                                    n_A,\n                                    nnz_A,\n                                    csr_row_ptr_A,\n                                    csr_col_ind_A,\n                                    rocsparse_action_numeric,\n                                    &buffer_size);\n\n      // Allocate temporary buffer\n      void* temp_buffer;\n      hipMalloc(&temp_buffer, buffer_size);\n\n      rocsparse_scsr2csc(handle,\n                         m_A,\n                         n_A,\n                         nnz_A,\n                         csr_val_A,\n                         csr_row_ptr_A,\n                         csr_col_ind_A,\n                         csr_val_T,\n                         csr_col_ind_T,\n                         csr_row_ptr_T,\n                         rocsparse_action_numeric,\n                         rocsparse_index_base_zero,\n                         temp_buffer);\n  \\endcode\n/\n/**@{"]
+    pub fn rocsparse_scsr2csc(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        csr_val: *const f32,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        csc_val: *mut f32,
+        csc_row_ind: *mut rocsparse_int,
+        csc_col_ptr: *mut rocsparse_int,
+        copy_values: rocsparse_action,
         idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        data_status: *mut rocsparse_data_status,
         temp_buffer: *mut ::std::os::raw::c_void,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_ccheck_matrix_csc(
+    pub fn rocsparse_dcsr2csc(
         handle: rocsparse_handle,
         m: rocsparse_int,
         n: rocsparse_int,
         nnz: rocsparse_int,
-        csc_val: *const rocsparse_float_complex,
-        csc_col_ptr: *const rocsparse_int,
-        csc_row_ind: *const rocsparse_int,
+        csr_val: *const f64,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        csc_val: *mut f64,
+        csc_row_ind: *mut rocsparse_int,
+        csc_col_ptr: *mut rocsparse_int,
+        copy_values: rocsparse_action,
         idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        data_status: *mut rocsparse_data_status,
         temp_buffer: *mut ::std::os::raw::c_void,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_zcheck_matrix_csc(
+    pub fn rocsparse_ccsr2csc(
         handle: rocsparse_handle,
         m: rocsparse_int,
         n: rocsparse_int,
         nnz: rocsparse_int,
-        csc_val: *const rocsparse_double_complex,
-        csc_col_ptr: *const rocsparse_int,
-        csc_row_ind: *const rocsparse_int,
+        csr_val: *const rocsparse_float_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        csc_val: *mut rocsparse_float_complex,
+        csc_row_ind: *mut rocsparse_int,
+        csc_col_ptr: *mut rocsparse_int,
+        copy_values: rocsparse_action,
         idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        data_status: *mut rocsparse_data_status,
         temp_buffer: *mut ::std::os::raw::c_void,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_ell_buffer_size computes the required buffer size needed when\n  calling \\p rocsparse_check_matrix_ell\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse ELL matrix.\n  @param[in]\n  n           number of columns of the sparse ELL matrix.\n  @param[in]\n  ell_width   number of non-zero elements per row of the sparse ELL matrix.\n  @param[in]\n  ell_val     array that contains the elements of the sparse ELL matrix. Padded\n              elements should be zero.\n  @param[in]\n  ell_col_ind array that contains the column indices of the sparse ELL matrix.\n              Padded column indices should be -1.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_scheck_matrix_ell(), rocsparse_dcheck_matrix_ell(),\n              rocsparse_ccheck_matrix_ell() and rocsparse_zcheck_matrix_ell().\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_size \\p m \\p n or \\p ell_width is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p ell_val, \\p ell_col_ind or \\p buffer_size pointer\n          is invalid.\n/\n/**@{"]
-    pub fn rocsparse_scheck_matrix_ell_buffer_size(
+    pub fn rocsparse_zcsr2csc(
         handle: rocsparse_handle,
         m: rocsparse_int,
         n: rocsparse_int,
+        nnz: rocsparse_int,
+        csr_val: *const rocsparse_double_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        csc_val: *mut rocsparse_double_complex,
+        csc_row_ind: *mut rocsparse_int,
+        csc_col_ptr: *mut rocsparse_int,
+        copy_values: rocsparse_action,
+        idx_base: rocsparse_index_base,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse CSR matrix into a compressed sparse CSR matrix\n\n  \\details\n  \\p rocsparse_csr2csr_compress converts a CSR matrix into a compressed CSR matrix by\n  removing entries in the input CSR matrix that are below a non-negative threshold \\p tol\n\n  \\note\n  In the case of complex matrices only the magnitude of the real part of \\p tol is used.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle        handle to the rocsparse library context queue.\n  @param[in]\n  m             number of rows of the sparse CSR matrix.\n  @param[in]\n  n             number of columns of the sparse CSR matrix.\n  @param[in]\n  descr_A       matrix descriptor for the CSR matrix\n  @param[in]\n  csr_val_A     array of \\p nnz_A elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr_A array of \\p m+1 elements that point to the start of every row of the\n                uncompressed sparse CSR matrix.\n  @param[in]\n  csr_col_ind_A array of \\p nnz_A elements containing the column indices of the uncompressed\n                sparse CSR matrix.\n  @param[in]\n  nnz_A         number of elements in the column indices and values arrays of the uncompressed\n                sparse CSR matrix.\n  @param[in]\n  nnz_per_row   array of length \\p m containing the number of entries that will be kept per row in\n                the final compressed CSR matrix.\n  @param[out]\n  csr_val_C     array of \\p nnz_C elements of the compressed sparse CSC matrix.\n  @param[out]\n  csr_row_ptr_C array of \\p m+1 elements that point to the start of every column of the compressed\n                sparse CSR matrix.\n  @param[out]\n  csr_col_ind_C array of \\p nnz_C elements containing the row indices of the compressed\n                sparse CSR matrix.\n  @param[in]\n  tol           the non-negative tolerance used for compression. If \\p tol is complex then only the magnitude\n                of the real part is used. Entries in the input uncompressed CSR array that are below the tolerance\n                are removed in output compressed CSR matrix.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p nnz_A is invalid.\n  \\retval     rocsparse_status_invalid_value \\p tol is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_val_A, \\p csr_row_ptr_A,\n              \\p csr_col_ind_A, \\p csr_val_C, \\p csr_row_ptr_C, \\p csr_col_ind_C or\n              \\p nnz_per_row pointer is invalid.\n\n  \\par Example\n  This example demonstrates how to compress a CSR matrix. Compressing a CSR matrix involves two steps. First we use\n  nnz_compress() to determine how many entries will be in the final compressed CSR matrix. Then we call csr2csr_compress()\n  to finish the compression and fill in the column indices and values arrays of the compressed CSR matrix.\n  \\code{.c}\n      //     1 2 0 3 0\n      // A = 0 4 5 0 0\n      //     6 0 0 7 8\n\n      float tol = 0.0f;\n\n      rocsparse_int m     = 3;\n      rocsparse_int n     = 5;\n      rocsparse_int nnz_A = 8;\n\n      csr_row_ptr_A[m+1]   = {0, 3, 5, 8};             // device memory\n      csr_col_ind_A[nnz_A] = {0, 1, 3, 1, 2, 0, 3, 4}; // device memory\n      csr_val_A[nnz_A]     = {1, 0, 3, 4, 0, 6, 7, 0}; // device memory\n\n      // Allocate memory for the row pointer array of the compressed CSR matrix\n      rocsparse_int* csr_row_ptr_C;\n      hipMalloc(csr_row_ptr_C, sizeof(rocsparse_int) * (m + 1));\n\n      // Allocate memory for the nnz_per_row array\n      rocsparse_int* nnz_per_row;\n      hipMalloc(nnz_per_row, sizeof(rocsparse_int) * m);\n\n      // Call nnz_compress() which fills in nnz_per_row array and finds the number\n      // of entries that will be in the compressed CSR matrix\n      rocsparse_int nnz_C;\n      nnz_compress(handle,\n                   m,\n                   descr_A,\n                   csr_val_A,\n                   csr_row_ptr_A,\n                   nnz_per_row,\n                   &nnz_C,\n                   tol);\n\n      // Allocate column indices and values array for the compressed CSR matrix\n      rocsparse_int* csr_col_ind_C;\n      rocsparse_int* csr_val_C;\n      hipMalloc(csr_col_ind_C, sizeof(rocsparse_int) * nnz_C;\n      hipMalloc(csr_val_C, sizeof(rocsparse_int) * nnz_C;\n\n      // Finish compression by calling csr2csr_compress()\n      csr2csr_compress(handle,\n                       m,\n                       n,\n                       descr_A,\n                       csr_val_A,\n                       csr_row_ptr_A,\n                       csr_col_ind_A,\n                       nnz_A,\n                       nnz_per_row,\n                       csr_val_C,\n                       csr_row_ptr_C,\n                       csr_col_ind_C,\n                       tol);\n  \\endcode\n/\n/**@{"]
+    pub fn rocsparse_scsr2csr_compress(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr_A: rocsparse_mat_descr,
+        csr_val_A: *const f32,
+        csr_row_ptr_A: *const rocsparse_int,
+        csr_col_ind_A: *const rocsparse_int,
+        nnz_A: rocsparse_int,
+        nnz_per_row: *const rocsparse_int,
+        csr_val_C: *mut f32,
+        csr_row_ptr_C: *mut rocsparse_int,
+        csr_col_ind_C: *mut rocsparse_int,
+        tol: f32,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcsr2csr_compress(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr_A: rocsparse_mat_descr,
+        csr_val_A: *const f64,
+        csr_row_ptr_A: *const rocsparse_int,
+        csr_col_ind_A: *const rocsparse_int,
+        nnz_A: rocsparse_int,
+        nnz_per_row: *const rocsparse_int,
+        csr_val_C: *mut f64,
+        csr_row_ptr_C: *mut rocsparse_int,
+        csr_col_ind_C: *mut rocsparse_int,
+        tol: f64,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccsr2csr_compress(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr_A: rocsparse_mat_descr,
+        csr_val_A: *const rocsparse_float_complex,
+        csr_row_ptr_A: *const rocsparse_int,
+        csr_col_ind_A: *const rocsparse_int,
+        nnz_A: rocsparse_int,
+        nnz_per_row: *const rocsparse_int,
+        csr_val_C: *mut rocsparse_float_complex,
+        csr_row_ptr_C: *mut rocsparse_int,
+        csr_col_ind_C: *mut rocsparse_int,
+        tol: rocsparse_float_complex,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcsr2csr_compress(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr_A: rocsparse_mat_descr,
+        csr_val_A: *const rocsparse_double_complex,
+        csr_row_ptr_A: *const rocsparse_int,
+        csr_col_ind_A: *const rocsparse_int,
+        nnz_A: rocsparse_int,
+        nnz_per_row: *const rocsparse_int,
+        csr_val_C: *mut rocsparse_double_complex,
+        csr_row_ptr_C: *mut rocsparse_int,
+        csr_col_ind_C: *mut rocsparse_int,
+        tol: rocsparse_double_complex,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief\n  This function converts the sparse matrix in CSR format into a dense matrix.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  m           number of rows of the dense matrix \\p A.\n\n  @param[in]\n  n           number of columns of the dense matrix \\p A.\n\n  @param[in]\n  descr       the descriptor of the dense matrix \\p A, the supported matrix type is \\ref rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[in]\n  csr_val     array of nnz ( = \\p csr_row_ptr[m] - \\p csr_row_ptr[0] ) nonzero elements of matrix \\p A.\n  @param[in]\n  csr_row_ptr integer array of m+1 elements that contains the start of every row and the end of the last row plus one.\n  @param[in]\n  csr_col_ind integer array of nnz ( = \\p csr_row_ptr[m] - csr_row_ptr[0] ) column indices of the non-zero elements of matrix \\p A.\n\n  @param[out]\n  A           array of dimensions (\\p ld, \\p n)\n\n  @param[out]\n  ld          leading dimension of dense array \\p A.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p ld is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p A or \\p csr_val \\p csr_row_ptr or \\p csr_col_ind\n              pointer is invalid.\n/\n/**@{"]
+    pub fn rocsparse_scsr2dense(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f32,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        A: *mut f32,
+        ld: rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcsr2dense(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f64,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        A: *mut f64,
+        ld: rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccsr2dense(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_float_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        A: *mut rocsparse_float_complex,
+        ld: rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcsr2dense(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_double_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        A: *mut rocsparse_double_complex,
+        ld: rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse CSR matrix into a sparse ELL matrix\n\n  \\details\n  \\p rocsparse_csr2ell_width computes the maximum of the per row non-zero elements\n  over all rows, the ELL \\p width, for a given CSR matrix.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  csr_descr   descriptor of the sparse CSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[in]\n  ell_descr   descriptor of the sparse ELL matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  ell_width   pointer to the number of non-zero elements per row in ELL storage\n              format.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_descr, \\p csr_row_ptr, or\n              \\p ell_width pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general."]
+    pub fn rocsparse_csr2ell_width(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        csr_descr: rocsparse_mat_descr,
+        csr_row_ptr: *const rocsparse_int,
+        ell_descr: rocsparse_mat_descr,
+        ell_width: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse CSR matrix into a sparse ELL matrix\n\n  \\details\n  \\p rocsparse_csr2ell converts a CSR matrix into an ELL matrix. It is assumed,\n  that \\p ell_val and \\p ell_col_ind are allocated. Allocation size is computed by the\n  number of rows times the number of ELL non-zero elements per row, such that\n  \\f$\\text{nnz}_{\\text{ELL}} = m \\cdot \\text{ell_width}\\f$. The number of ELL\n  non-zero elements per row is obtained by rocsparse_csr2ell_width().\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  csr_descr   descriptor of the sparse CSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val     array containing the values of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[in]\n  csr_col_ind array containing the column indices of the sparse CSR matrix.\n  @param[in]\n  ell_descr   descriptor of the sparse ELL matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  ell_width   number of non-zero elements per row in ELL storage format.\n  @param[out]\n  ell_val     array of \\p m times \\p ell_width elements of the sparse ELL matrix.\n  @param[out]\n  ell_col_ind array of \\p m times \\p ell_width elements containing the column indices\n              of the sparse ELL matrix.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p ell_width is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_descr, \\p csr_val,\n              \\p csr_row_ptr, \\p csr_col_ind, \\p ell_descr, \\p ell_val or\n              \\p ell_col_ind pointer is invalid.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  This example converts a CSR matrix into an ELL matrix.\n  \\code{.c}\n      //     1 2 0 3 0\n      // A = 0 4 5 0 0\n      //     6 0 0 7 8\n\n      rocsparse_int m   = 3;\n      rocsparse_int n   = 5;\n      rocsparse_int nnz = 8;\n\n      csr_row_ptr[m+1] = {0, 3, 5, 8};             // device memory\n      csr_col_ind[nnz] = {0, 1, 3, 1, 2, 0, 3, 4}; // device memory\n      csr_val[nnz]     = {1, 2, 3, 4, 5, 6, 7, 8}; // device memory\n\n      // Create ELL matrix descriptor\n      rocsparse_mat_descr ell_descr;\n      rocsparse_create_mat_descr(&ell_descr);\n\n      // Obtain the ELL width\n      rocsparse_int ell_width;\n      rocsparse_csr2ell_width(handle,\n                              m,\n                              csr_descr,\n                              csr_row_ptr,\n                              ell_descr,\n                              &ell_width);\n\n      // Compute ELL non-zero entries\n      rocsparse_int ell_nnz = m * ell_width;\n\n      // Allocate ELL column and value arrays\n      rocsparse_int* ell_col_ind;\n      hipMalloc((void**)&ell_col_ind, sizeof(rocsparse_int) * ell_nnz);\n\n      float* ell_val;\n      hipMalloc((void**)&ell_val, sizeof(float) * ell_nnz);\n\n      // Format conversion\n      rocsparse_scsr2ell(handle,\n                         m,\n                         csr_descr,\n                         csr_val,\n                         csr_row_ptr,\n                         csr_col_ind,\n                         ell_descr,\n                         ell_width,\n                         ell_val,\n                         ell_col_ind);\n  \\endcode\n/\n/**@{"]
+    pub fn rocsparse_scsr2ell(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *const f32,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        ell_descr: rocsparse_mat_descr,
+        ell_width: rocsparse_int,
+        ell_val: *mut f32,
+        ell_col_ind: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcsr2ell(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *const f64,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        ell_descr: rocsparse_mat_descr,
+        ell_width: rocsparse_int,
+        ell_val: *mut f64,
+        ell_col_ind: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccsr2ell(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_float_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        ell_descr: rocsparse_mat_descr,
+        ell_width: rocsparse_int,
+        ell_val: *mut rocsparse_float_complex,
+        ell_col_ind: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcsr2ell(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_double_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        ell_descr: rocsparse_mat_descr,
+        ell_width: rocsparse_int,
+        ell_val: *mut rocsparse_double_complex,
+        ell_col_ind: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief\n  \\details\n  \\p rocsparse_csr2gebsr_buffer_size returns the size of the temporary buffer that\n  is required by \\p rocsparse_csr2gebcsr_nnz, \\p rocsparse_scsr2gebcsr, \\p rocsparse_dcsr2gebsr,\n  \\p rocsparse_ccsr2gebsr and \\p rocsparse_zcsr2gebsr. The temporary storage buffer must be\n  allocated by the user.\n\n  This function computes the number of nonzero block columns per row and the total number of nonzero blocks in a sparse\n  GEneral BSR matrix given a sparse CSR matrix as input.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  dir         direction that specified whether to count nonzero elements by \\ref rocsparse_direction_row or by\n              \\ref rocsparse_direction_row.\n\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n\n  @param[in]\n  n           number of columns of the sparse CSR matrix.\n\n  @param[in]\n  csr_descr    descriptor of the sparse CSR matrix. Currently, only\n               \\ref rocsparse_matrix_type_general is supported.\n\n  @param[in]\n  csr_val      array of \\p nnz elements containing the values of the sparse CSR matrix.\n\n  @param[in]\n  csr_row_ptr  integer array containing \\p m+1 elements that point to the start of each row of the CSR matrix\n\n  @param[in]\n  csr_col_ind  integer array of the column indices for each non-zero element in the CSR matrix\n\n  @param[in]\n  row_block_dim   the row block dimension of the GEneral BSR matrix. Between 1 and \\p m\n\n  @param[in]\n  col_block_dim   the col block dimension of the GEneral BSR matrix. Between 1 and \\p n\n\n  @param[out]\n  buffer_size  number of bytes of the temporary storage buffer required by \\p rocsparse_csr2gebsr_nnz and \\p rocsparse_Xcsr2gebsr.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p row_block_dim  \\p col_block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_val or \\p csr_row_ptr or \\p csr_col_ind or \\p bsr_row_ptr or \\p buffer_size\n              pointer is invalid.\n/\n/**@{"]
+    pub fn rocsparse_scsr2gebsr_buffer_size(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *const f32,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcsr2gebsr_buffer_size(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *const f64,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccsr2gebsr_buffer_size(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_float_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcsr2gebsr_buffer_size(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_double_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief\n  This function computes the number of nonzero block columns per row and the total number of nonzero blocks in a sparse\n  GEneral BSR matrix given a sparse CSR matrix as input.\n\n  \\details\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  dir         direction that specified whether to count nonzero elements by \\ref rocsparse_direction_row or by\n              \\ref rocsparse_direction_row.\n\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n\n  @param[in]\n  n           number of columns of the sparse CSR matrix.\n\n  @param[in]\n  csr_descr    descriptor of the sparse CSR matrix. Currently, only\n               \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_row_ptr integer array containing \\p m+1 elements that point to the start of each row of the CSR matrix\n\n  @param[in]\n  csr_col_ind integer array of the column indices for each non-zero element in the CSR matrix\n\n  @param[in]\n  bsr_descr    descriptor of the sparse GEneral BSR matrix. Currently, only\n               \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  bsr_row_ptr integer array containing \\p mb+1 elements that point to the start of each block row of the General BSR matrix\n\n  @param[in]\n  row_block_dim   the row block dimension of the GEneral BSR matrix. Between 1 and min(m, n)\n\n  @param[in]\n  col_block_dim   the col block dimension of the GEneral BSR matrix. Between 1 and min(m, n)\n\n  @param[out]\n  bsr_nnz_devhost  total number of nonzero elements in device or host memory.\n\n  @param[in]\n  temp_buffer    buffer allocated by the user whose size is determined by calling \\p rocsparse_xcsr2gebsr_buffer_size.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p row_block_dim \\p col_block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_row_ptr or \\p csr_col_ind or \\p bsr_row_ptr or \\p bsr_nnz_devhost\n              pointer is invalid.\n/\n/**@{"]
+    pub fn rocsparse_csr2gebsr_nnz(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        csr_descr: rocsparse_mat_descr,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        bsr_descr: rocsparse_mat_descr,
+        bsr_row_ptr: *mut rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        bsr_nnz_devhost: *mut rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse CSR matrix into a sparse GEneral BSR matrix\n\n  \\details\n  \\p rocsparse_csr2gebsr converts a CSR matrix into a GEneral BSR matrix. It is assumed,\n  that \\p bsr_val, \\p bsr_col_ind and \\p bsr_row_ptr are allocated. Allocation size\n  for \\p bsr_row_ptr is computed as \\p mb+1 where \\p mb is the number of block rows in\n  the GEneral BSR matrix. Allocation size for \\p bsr_val and \\p bsr_col_ind is computed using\n  \\p csr2gebsr_nnz() which also fills in \\p bsr_row_ptr.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  dir          the storage format of the blocks, \\ref rocsparse_direction_row or \\ref rocsparse_direction_column\n  @param[in]\n  m            number of rows in the sparse CSR matrix.\n  @param[in]\n  n            number of columns in the sparse CSR matrix.\n  @param[in]\n  csr_descr    descriptor of the sparse CSR matrix. Currently, only\n               \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val      array of \\p nnz elements containing the values of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr  array of \\p m+1 elements that point to the start of every row of the\n               sparse CSR matrix.\n  @param[in]\n  csr_col_ind  array of \\p nnz elements containing the column indices of the sparse CSR matrix.\n  @param[in]\n  bsr_descr    descriptor of the sparse BSR matrix. Currently, only\n               \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  bsr_val      array of \\p nnzb* \\p row_block_dim* \\p col_block_dim containing the values of the sparse BSR matrix.\n  @param[out]\n  bsr_row_ptr  array of \\p mb+1 elements that point to the start of every block row of the\n               sparse BSR matrix.\n  @param[out]\n  bsr_col_ind  array of \\p nnzb elements containing the block column indices of the sparse BSR matrix.\n  @param[in]\n  row_block_dim    row size of the blocks in the sparse GEneral BSR matrix.\n  @param[in]\n  col_block_dim    col size of the blocks in the sparse GEneral BSR matrix.\n  @param[in]\n  temp_buffer    buffer allocated by the user whose size is determined by calling \\p rocsparse_xcsr2gebsr_buffer_size.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p row_block_dim or \\p col_block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p bsr_val,\n              \\p bsr_row_ptr, \\p bsr_col_ind, \\p csr_val, \\p csr_row_ptr or\n              \\p csr_col_ind pointer is invalid.\n\n  \\par Example\n  This example converts a CSR matrix into an BSR matrix.\n  \\code{.c}\n      //     1 4 0 0 0 0\n      // A = 0 2 3 0 0 0\n      //     5 0 0 7 8 0\n      //     0 0 9 0 6 0\n\n      rocsparse_int m   = 4;\n      rocsparse_int n   = 6;\n      rocsparse_int row_block_dim = 2;\n      rocsparse_int col_block_dim = 3;\n      rocsparse_int nnz = 9;\n      rocsparse_int mb = (m + row_block_dim - 1) / row_block_dim;\n      rocsparse_int nb = (n + col_block_dim - 1) / col_block_dim;\n\n      csr_row_ptr[m+1]  = {0, 2, 4, 7, 9};             // device memory\n      csr_col_ind[nnz]  = {0, 1, 1, 2, 0, 3, 4, 2, 4}; // device memory\n      csr_val[nnz]      = {1, 4, 2, 3, 5, 7, 8, 9, 6}; // device memory\n\n      hipMalloc(&bsr_row_ptr, sizeof(rocsparse_int) *(mb + 1));\n      rocsparse_int nnzb;\n      rocsparse_int* nnzTotalHostPtr = &nnzb;\n      csr2gebsr_nnz(handle,\n                  rocsparse_direction_row,\n                  m,\n                  n,\n                  csr_descr,\n                  csr_row_ptr,\n                  csr_col_ind,\n                  row_block_dim,\n                  col_block_dim,\n                  bsr_descr,\n                  bsr_row_ptr,\n                  nnzTotalHostPtr);\n      nnzb = *nnzTotalHostPtr;\n      hipMalloc(&bsr_col_ind, sizeof(int)*nnzb);\n      hipMalloc(&bsr_val, sizeof(float)*(row_block_dim * col_block_dim) * nnzb);\n      scsr2gebsr(handle,\n               rocsparse_direction_row,\n               m,\n               n,\n               csr_descr,\n               csr_val,\n               csr_row_ptr,\n               csr_col_ind,\n               row_block_dim,\n               col_block_dim,\n               bsr_descr,\n               bsr_val,\n               bsr_row_ptr,\n               bsr_col_ind);\n  \\endcode\n/\n/**@{"]
+    pub fn rocsparse_scsr2gebsr(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *const f32,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        bsr_descr: rocsparse_mat_descr,
+        bsr_val: *mut f32,
+        bsr_row_ptr: *mut rocsparse_int,
+        bsr_col_ind: *mut rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcsr2gebsr(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *const f64,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        bsr_descr: rocsparse_mat_descr,
+        bsr_val: *mut f64,
+        bsr_row_ptr: *mut rocsparse_int,
+        bsr_col_ind: *mut rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccsr2gebsr(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_float_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        bsr_descr: rocsparse_mat_descr,
+        bsr_val: *mut rocsparse_float_complex,
+        bsr_row_ptr: *mut rocsparse_int,
+        bsr_col_ind: *mut rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcsr2gebsr(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_double_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        bsr_descr: rocsparse_mat_descr,
+        bsr_val: *mut rocsparse_double_complex,
+        bsr_row_ptr: *mut rocsparse_int,
+        bsr_col_ind: *mut rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse CSR matrix into a sparse HYB matrix\n\n  \\details\n  \\p rocsparse_csr2hyb converts a CSR matrix into a HYB matrix. It is assumed\n  that \\p hyb has been initialized with rocsparse_create_hyb_mat().\n\n  \\note\n  This function requires a significant amount of storage for the HYB matrix,\n  depending on the matrix structure.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  m               number of rows of the sparse CSR matrix.\n  @param[in]\n  n               number of columns of the sparse CSR matrix.\n  @param[in]\n  descr           descriptor of the sparse CSR matrix. Currently, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val         array containing the values of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr     array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix.\n  @param[in]\n  csr_col_ind     array containing the column indices of the sparse CSR matrix.\n  @param[out]\n  hyb             sparse matrix in HYB format.\n  @param[in]\n  user_ell_width  width of the ELL part of the HYB matrix (only required if\n                  \\p partition_type == \\ref rocsparse_hyb_partition_user).\n  @param[in]\n  partition_type  \\ref rocsparse_hyb_partition_auto (recommended),\n                  \\ref rocsparse_hyb_partition_user or\n                  \\ref rocsparse_hyb_partition_max.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p user_ell_width is invalid.\n  \\retval     rocsparse_status_invalid_value \\p partition_type is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p hyb, \\p csr_val,\n              \\p csr_row_ptr or \\p csr_col_ind pointer is invalid.\n  \\retval     rocsparse_status_memory_error the buffer for the HYB matrix could not be\n              allocated.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  This example converts a CSR matrix into a HYB matrix using user defined partitioning.\n  \\code{.c}\n      // Create HYB matrix structure\n      rocsparse_hyb_mat hyb;\n      rocsparse_create_hyb_mat(&hyb);\n\n      // User defined ell width\n      rocsparse_int user_ell_width = 5;\n\n      // Perform the conversion\n      rocsparse_scsr2hyb(handle,\n                         m,\n                         n,\n                         descr,\n                         csr_val,\n                         csr_row_ptr,\n                         csr_col_ind,\n                         hyb,\n                         user_ell_width,\n                         rocsparse_hyb_partition_user);\n\n      // Do some work\n\n      // Clean up\n      rocsparse_destroy_hyb_mat(hyb);\n  \\endcode\n/\n/**@{"]
+    pub fn rocsparse_scsr2hyb(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f32,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        hyb: rocsparse_hyb_mat,
+        user_ell_width: rocsparse_int,
+        partition_type: rocsparse_hyb_partition,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcsr2hyb(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f64,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        hyb: rocsparse_hyb_mat,
+        user_ell_width: rocsparse_int,
+        partition_type: rocsparse_hyb_partition,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccsr2hyb(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_float_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        hyb: rocsparse_hyb_mat,
+        user_ell_width: rocsparse_int,
+        partition_type: rocsparse_hyb_partition,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcsr2hyb(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_double_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        hyb: rocsparse_hyb_mat,
+        user_ell_width: rocsparse_int,
+        partition_type: rocsparse_hyb_partition,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Sort a sparse CSR matrix\n\n  \\details\n  \\p rocsparse_csrsort_buffer_size returns the size of the temporary storage buffer\n  required by rocsparse_csrsort(). The temporary storage buffer must be allocated by\n  the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  m               number of rows of the sparse CSR matrix.\n  @param[in]\n  n               number of columns of the sparse CSR matrix.\n  @param[in]\n  nnz             number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr     array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix.\n  @param[in]\n  csr_col_ind     array of \\p nnz elements containing the column indices of the sparse\n                  CSR matrix.\n  @param[out]\n  buffer_size     number of bytes of the temporary storage buffer required by\n                  rocsparse_csrsort().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_row_ptr, \\p csr_col_ind or\n              \\p buffer_size pointer is invalid."]
+    pub fn rocsparse_csrsort_buffer_size(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Sort a sparse CSR matrix\n\n  \\details\n  \\p rocsparse_csrsort sorts a matrix in CSR format. The sorted permutation vector\n  \\p perm can be used to obtain sorted \\p csr_val array. In this case, \\p perm must be\n  initialized as the identity permutation, see rocsparse_create_identity_permutation().\n\n  \\p rocsparse_csrsort requires extra temporary storage buffer that has to be allocated by\n  the user. Storage buffer size can be determined by rocsparse_csrsort_buffer_size().\n\n  \\note\n  \\p perm can be \\p NULL if a sorted permutation vector is not required.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  m               number of rows of the sparse CSR matrix.\n  @param[in]\n  n               number of columns of the sparse CSR matrix.\n  @param[in]\n  nnz             number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  descr           descriptor of the sparse CSR matrix. Currently, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_row_ptr     array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix.\n  @param[inout]\n  csr_col_ind     array of \\p nnz elements containing the column indices of the sparse\n                  CSR matrix.\n  @param[inout]\n  perm            array of \\p nnz integers containing the unsorted map indices, can be\n                  \\p NULL.\n  @param[in]\n  temp_buffer     temporary storage buffer allocated by the user, size is returned by\n                  rocsparse_csrsort_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p csr_row_ptr, \\p csr_col_ind\n              or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  The following example sorts a \\f$3 \\times 3\\f$ CSR matrix.\n  \\code{.c}\n      //     1 2 3\n      // A = 4 5 6\n      //     7 8 9\n      rocsparse_int m   = 3;\n      rocsparse_int n   = 3;\n      rocsparse_int nnz = 9;\n\n      csr_row_ptr[m + 1] = {0, 3, 6, 9};                // device memory\n      csr_col_ind[nnz]   = {2, 0, 1, 0, 1, 2, 0, 2, 1}; // device memory\n      csr_val[nnz]       = {3, 1, 2, 4, 5, 6, 7, 9, 8}; // device memory\n\n      // Create permutation vector perm as the identity map\n      rocsparse_int* perm;\n      hipMalloc((void**)&perm, sizeof(rocsparse_int) * nnz);\n      rocsparse_create_identity_permutation(handle, nnz, perm);\n\n      // Allocate temporary buffer\n      size_t buffer_size;\n      void* temp_buffer;\n      rocsparse_csrsort_buffer_size(handle, m, n, nnz, csr_row_ptr, csr_col_ind, &buffer_size);\n      hipMalloc(&temp_buffer, buffer_size);\n\n      // Sort the CSR matrix\n      rocsparse_csrsort(handle, m, n, nnz, descr, csr_row_ptr, csr_col_ind, perm, temp_buffer);\n\n      // Gather sorted csr_val array\n      float* csr_val_sorted;\n      hipMalloc((void**)&csr_val_sorted, sizeof(float) * nnz);\n      rocsparse_sgthr(handle, nnz, csr_val, csr_val_sorted, perm, rocsparse_index_base_zero);\n\n      // Clean up\n      hipFree(temp_buffer);\n      hipFree(perm);\n      hipFree(csr_val);\n  \\endcode"]
+    pub fn rocsparse_csrsort(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *mut rocsparse_int,
+        perm: *mut rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief\n\n  This function converts the matrix A in dense format into a sparse matrix in COO format.\n  All the parameters are assumed to have been pre-allocated by the user and the arrays are\n  filled in based on nnz_per_rows, which can be pre-computed with rocsparse_xnnz().\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  m           number of rows of the dense matrix \\p A.\n\n  @param[in]\n  n           number of columns of the dense matrix \\p A.\n\n  @param[in]\n  descr      the descriptor of the dense matrix \\p A, the supported matrix type is rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[in]\n  A           array of dimensions (\\p ld, \\p n)\n\n  @param[in]\n  ld         leading dimension of dense array \\p A.\n\n  @param[in]\n  nnz_per_rows   array of size \\p n containing the number of non-zero elements per row.\n\n  @param[out]\n  coo_val\n              array of nnz nonzero elements of matrix \\p A.\n  @param[out]\n  coo_row_ind\n              integer array of nnz row indices of the non-zero elements of matrix \\p A.\n  @param[out]\n  coo_col_ind integer array of nnz column indices of the non-zero elements of matrix \\p A.\n\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p ld is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p A or \\p nnz_per_rows or \\p coo_val \\p coo_col_ind or \\p coo_row_ind\n              pointer is invalid.\n/\n/**@{"]
+    pub fn rocsparse_sdense2coo(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        A: *const f32,
+        ld: rocsparse_int,
+        nnz_per_rows: *const rocsparse_int,
+        coo_val: *mut f32,
+        coo_row_ind: *mut rocsparse_int,
+        coo_col_ind: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ddense2coo(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        A: *const f64,
+        ld: rocsparse_int,
+        nnz_per_rows: *const rocsparse_int,
+        coo_val: *mut f64,
+        coo_row_ind: *mut rocsparse_int,
+        coo_col_ind: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cdense2coo(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        A: *const rocsparse_float_complex,
+        ld: rocsparse_int,
+        nnz_per_rows: *const rocsparse_int,
+        coo_val: *mut rocsparse_float_complex,
+        coo_row_ind: *mut rocsparse_int,
+        coo_col_ind: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zdense2coo(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        A: *const rocsparse_double_complex,
+        ld: rocsparse_int,
+        nnz_per_rows: *const rocsparse_int,
+        coo_val: *mut rocsparse_double_complex,
+        coo_row_ind: *mut rocsparse_int,
+        coo_col_ind: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief\n\n  This function converts the matrix A in dense format into a sparse matrix in CSC format.\n  All the parameters are assumed to have been pre-allocated by the user and the arrays are\n  filled in based on nnz_per_columns, which can be pre-computed with rocsparse_xnnz().\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  m           number of rows of the dense matrix \\p A.\n\n  @param[in]\n  n           number of columns of the dense matrix \\p A.\n\n  @param[in]\n  descr      the descriptor of the dense matrix \\p A, the supported matrix type is rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[in]\n  A           array of dimensions (\\p ld, \\p n)\n\n  @param[in]\n  ld         leading dimension of dense array \\p A.\n\n  @param[in]\n  nnz_per_columns   array of size \\p n containing the number of non-zero elements per column.\n\n  @param[out]\n  csc_val\n              array of nnz ( = \\p csc_col_ptr[m] - \\p csc_col_ptr[0] ) nonzero elements of matrix \\p A.\n  @param[out]\n  csc_col_ptr\n              integer array of m+1 elements that contains the start of every column and the end of the last column plus one.\n  @param[out]\n  csc_row_ind\n              integer array of nnz ( = \\p csc_col_ptr[m] - csc_col_ptr[0] ) column indices of the non-zero elements of matrix \\p A.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p ld is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p A or \\p nnz_per_columns or \\p csc_val \\p csc_col_ptr or \\p csc_row_ind\n              pointer is invalid.\n/\n/**@{"]
+    pub fn rocsparse_sdense2csc(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        A: *const f32,
+        ld: rocsparse_int,
+        nnz_per_columns: *const rocsparse_int,
+        csc_val: *mut f32,
+        csc_col_ptr: *mut rocsparse_int,
+        csc_row_ind: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ddense2csc(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        A: *const f64,
+        ld: rocsparse_int,
+        nnz_per_columns: *const rocsparse_int,
+        csc_val: *mut f64,
+        csc_col_ptr: *mut rocsparse_int,
+        csc_row_ind: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cdense2csc(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        A: *const rocsparse_float_complex,
+        ld: rocsparse_int,
+        nnz_per_columns: *const rocsparse_int,
+        csc_val: *mut rocsparse_float_complex,
+        csc_col_ptr: *mut rocsparse_int,
+        csc_row_ind: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zdense2csc(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        A: *const rocsparse_double_complex,
+        ld: rocsparse_int,
+        nnz_per_columns: *const rocsparse_int,
+        csc_val: *mut rocsparse_double_complex,
+        csc_col_ptr: *mut rocsparse_int,
+        csc_row_ind: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief\n  This function converts the matrix A in dense format into a sparse matrix in CSR format.\n  All the parameters are assumed to have been pre-allocated by the user and the arrays are filled in based on nnz_per_row, which can be pre-computed with rocsparse_xnnz().\n\n  \\note\n  This function is blocking with respect to the host.\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  m           number of rows of the dense matrix \\p A.\n\n  @param[in]\n  n           number of columns of the dense matrix \\p A.\n\n  @param[in]\n  descr      the descriptor of the dense matrix \\p A, the supported matrix type is rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[in]\n  A           array of dimensions (\\p ld, \\p n)\n\n  @param[in]\n  ld         leading dimension of dense array \\p A.\n\n  @param[in]\n  nnz_per_rows   array of size \\p n containing the number of non-zero elements per row.\n\n  @param[out]\n  csr_val\n              array of nnz ( = \\p csr_row_ptr[m] - \\p csr_row_ptr[0] ) nonzero elements of matrix \\p A.\n  @param[out]\n  csr_row_ptr\n              integer array of m+1 elements that contains the start of every row and the end of the last row plus one.\n  @param[out]\n  csr_col_ind\n              integer array of nnz ( = \\p csr_row_ptr[m] - csr_row_ptr[0] ) column indices of the non-zero elements of matrix \\p A.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p ld is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p A or \\p nnz_per_rows or \\p csr_val \\p csr_row_ptr or \\p csr_col_ind\n              pointer is invalid.\n/\n/**@{"]
+    pub fn rocsparse_sdense2csr(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        A: *const f32,
+        ld: rocsparse_int,
+        nnz_per_rows: *const rocsparse_int,
+        csr_val: *mut f32,
+        csr_row_ptr: *mut rocsparse_int,
+        csr_col_ind: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ddense2csr(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        A: *const f64,
+        ld: rocsparse_int,
+        nnz_per_rows: *const rocsparse_int,
+        csr_val: *mut f64,
+        csr_row_ptr: *mut rocsparse_int,
+        csr_col_ind: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cdense2csr(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        A: *const rocsparse_float_complex,
+        ld: rocsparse_int,
+        nnz_per_rows: *const rocsparse_int,
+        csr_val: *mut rocsparse_float_complex,
+        csr_row_ptr: *mut rocsparse_int,
+        csr_col_ind: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zdense2csr(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        A: *const rocsparse_double_complex,
+        ld: rocsparse_int,
+        nnz_per_rows: *const rocsparse_int,
+        csr_val: *mut rocsparse_double_complex,
+        csr_row_ptr: *mut rocsparse_int,
+        csr_col_ind: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse ELL matrix into a sparse CSR matrix\n\n  \\details\n  \\p rocsparse_ell2csr_nnz computes the total CSR non-zero elements and the CSR\n  row offsets, that point to the start of every row of the sparse CSR matrix, for\n  a given ELL matrix. It is assumed that \\p csr_row_ptr has been allocated with\n  size \\p m+1.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse ELL matrix.\n  @param[in]\n  n           number of columns of the sparse ELL matrix.\n  @param[in]\n  ell_descr   descriptor of the sparse ELL matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  ell_width   number of non-zero elements per row in ELL storage format.\n  @param[in]\n  ell_col_ind array of \\p m times \\p ell_width elements containing the column indices\n              of the sparse ELL matrix.\n  @param[in]\n  csr_descr   descriptor of the sparse CSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[out]\n  csr_nnz     pointer to the total number of non-zero elements in CSR storage\n              format.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p ell_width is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p ell_descr, \\p ell_col_ind,\n              \\p csr_descr, \\p csr_row_ptr or \\p csr_nnz pointer is invalid.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general."]
+    pub fn rocsparse_ell2csr_nnz(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        ell_descr: rocsparse_mat_descr,
+        ell_width: rocsparse_int,
+        ell_col_ind: *const rocsparse_int,
+        csr_descr: rocsparse_mat_descr,
+        csr_row_ptr: *mut rocsparse_int,
+        csr_nnz: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse ELL matrix into a sparse CSR matrix\n\n  \\details\n  \\p rocsparse_ell2csr converts an ELL matrix into a CSR matrix. It is assumed\n  that \\p csr_row_ptr has already been filled and that \\p csr_val and \\p csr_col_ind\n  are allocated by the user. \\p csr_row_ptr and allocation size of \\p csr_col_ind and\n  \\p csr_val is defined by the number of CSR non-zero elements. Both can be obtained\n  by rocsparse_ell2csr_nnz().\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse ELL matrix.\n  @param[in]\n  n           number of columns of the sparse ELL matrix.\n  @param[in]\n  ell_descr   descriptor of the sparse ELL matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  ell_width   number of non-zero elements per row in ELL storage format.\n  @param[in]\n  ell_val     array of \\p m times \\p ell_width elements of the sparse ELL matrix.\n  @param[in]\n  ell_col_ind array of \\p m times \\p ell_width elements containing the column indices\n              of the sparse ELL matrix.\n  @param[in]\n  csr_descr   descriptor of the sparse CSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  csr_val     array containing the values of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[out]\n  csr_col_ind array containing the column indices of the sparse CSR matrix.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p ell_width is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_descr, \\p csr_val,\n              \\p csr_row_ptr, \\p csr_col_ind, \\p ell_descr, \\p ell_val or\n              \\p ell_col_ind pointer is invalid.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  This example converts an ELL matrix into a CSR matrix.\n  \\code{.c}\n      //     1 2 0 3 0\n      // A = 0 4 5 0 0\n      //     6 0 0 7 8\n\n      rocsparse_int m         = 3;\n      rocsparse_int n         = 5;\n      rocsparse_int nnz       = 9;\n      rocsparse_int ell_width = 3;\n\n      ell_col_ind[nnz] = {0, 1, 0, 1, 2, 3, 3, -1, 4}; // device memory\n      ell_val[nnz]     = {1, 4, 6, 2, 5, 7, 3, 0, 8};  // device memory\n\n      // Create CSR matrix descriptor\n      rocsparse_mat_descr csr_descr;\n      rocsparse_create_mat_descr(&csr_descr);\n\n      // Allocate csr_row_ptr array for row offsets\n      rocsparse_int* csr_row_ptr;\n      hipMalloc((void**)&csr_row_ptr, sizeof(rocsparse_int) * (m + 1));\n\n      // Obtain the number of CSR non-zero entries\n      // and fill csr_row_ptr array with row offsets\n      rocsparse_int csr_nnz;\n      rocsparse_ell2csr_nnz(handle,\n                            m,\n                            n,\n                            ell_descr,\n                            ell_width,\n                            ell_col_ind,\n                            csr_descr,\n                            csr_row_ptr,\n                            &csr_nnz);\n\n      // Allocate CSR column and value arrays\n      rocsparse_int* csr_col_ind;\n      hipMalloc((void**)&csr_col_ind, sizeof(rocsparse_int) * csr_nnz);\n\n      float* csr_val;\n      hipMalloc((void**)&csr_val, sizeof(float) * csr_nnz);\n\n      // Format conversion\n      rocsparse_sell2csr(handle,\n                         m,\n                         n,\n                         ell_descr,\n                         ell_width,\n                         ell_val,\n                         ell_col_ind,\n                         csr_descr,\n                         csr_val,\n                         csr_row_ptr,\n                         csr_col_ind);\n  \\endcode\n/\n/**@{"]
+    pub fn rocsparse_sell2csr(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        ell_descr: rocsparse_mat_descr,
         ell_width: rocsparse_int,
         ell_val: *const f32,
         ell_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        buffer_size: *mut usize,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *mut f32,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *mut rocsparse_int,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_dcheck_matrix_ell_buffer_size(
+    pub fn rocsparse_dell2csr(
         handle: rocsparse_handle,
         m: rocsparse_int,
         n: rocsparse_int,
+        ell_descr: rocsparse_mat_descr,
         ell_width: rocsparse_int,
         ell_val: *const f64,
         ell_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        buffer_size: *mut usize,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *mut f64,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *mut rocsparse_int,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_ccheck_matrix_ell_buffer_size(
+    pub fn rocsparse_cell2csr(
         handle: rocsparse_handle,
         m: rocsparse_int,
         n: rocsparse_int,
+        ell_descr: rocsparse_mat_descr,
         ell_width: rocsparse_int,
         ell_val: *const rocsparse_float_complex,
         ell_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        buffer_size: *mut usize,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *mut rocsparse_float_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *mut rocsparse_int,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_zcheck_matrix_ell_buffer_size(
+    pub fn rocsparse_zell2csr(
         handle: rocsparse_handle,
         m: rocsparse_int,
         n: rocsparse_int,
+        ell_descr: rocsparse_mat_descr,
         ell_width: rocsparse_int,
         ell_val: *const rocsparse_double_complex,
         ell_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        buffer_size: *mut usize,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *mut rocsparse_double_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *mut rocsparse_int,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_ell checks if the input ELL matrix is valid.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse ELL matrix.\n  @param[in]\n  n           number of columns of the sparse ELL matrix.\n  @param[in]\n  ell_width   number of non-zero elements per row of the sparse ELL matrix.\n  @param[in]\n  ell_val     array that contains the elements of the sparse ELL matrix. Padded\n              elements should be zero.\n  @param[in]\n  ell_col_ind array that contains the column indices of the sparse ELL matrix.\n              Padded column indices should be -1.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  data_status modified to indicate the status of the data\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_size \\p m \\p n or \\p ell_width is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p ell_val, \\p ell_col_ind, \\p temp_buffer or \\p data_status pointer\n          is invalid.\n/\n/**@{"]
-    pub fn rocsparse_scheck_matrix_ell(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        ell_width: rocsparse_int,
-        ell_val: *const f32,
-        ell_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        data_status: *mut rocsparse_data_status,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcheck_matrix_ell(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        ell_width: rocsparse_int,
-        ell_val: *const f64,
-        ell_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        data_status: *mut rocsparse_data_status,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccheck_matrix_ell(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        ell_width: rocsparse_int,
-        ell_val: *const rocsparse_float_complex,
-        ell_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        data_status: *mut rocsparse_data_status,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcheck_matrix_ell(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        ell_width: rocsparse_int,
-        ell_val: *const rocsparse_double_complex,
-        ell_col_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        data_status: *mut rocsparse_data_status,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_hyb_buffer_size computes the required buffer size needed when\n  calling \\p rocsparse_check_matrix_hyb\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  hyb         matrix in HYB storage format.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_check_matrix_hyb().\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p hyb or \\p buffer_size pointer is invalid."]
-    pub fn rocsparse_check_matrix_hyb_buffer_size(
-        handle: rocsparse_handle,
-        hyb: rocsparse_hyb_mat,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_hyb checks if the input HYB matrix is valid.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  hyb         matrix in HYB storage format.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  data_status modified to indicate the status of the data\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p hyb or \\p data_status pointer is invalid."]
-    pub fn rocsparse_check_matrix_hyb(
-        handle: rocsparse_handle,
-        hyb: rocsparse_hyb_mat,
-        idx_base: rocsparse_index_base,
-        matrix_type: rocsparse_matrix_type,
-        uplo: rocsparse_fill_mode,
-        storage: rocsparse_storage_mode,
-        data_status: *mut rocsparse_data_status,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level1_module\n  \\brief Scale a sparse vector and add it to a dense vector.\n\n  \\details\n  \\p rocsparse_axpyi multiplies the sparse vector \\f$x\\f$ with scalar \\f$\\alpha\\f$ and\n  adds the result to the dense vector \\f$y\\f$, such that\n\n  \\f[\n      y := y + \\alpha \\cdot x\n  \\f]\n\n  \\code{.c}\n      for(i = 0; i < nnz; ++i)\n      {\n          y[x_ind[i]] = y[x_ind[i]] + alpha * x_val[i];\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  nnz         number of non-zero entries of vector \\f$x\\f$.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  x_val       array of \\p nnz elements containing the values of \\f$x\\f$.\n  @param[in]\n  x_ind       array of \\p nnz elements containing the indices of the non-zero\n              values of \\f$x\\f$.\n  @param[inout]\n  y           array of values in dense format.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p idx_base is invalid.\n  \\retval rocsparse_status_invalid_size \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p alpha, \\p x_val, \\p x_ind or \\p y pointer\n          is invalid.\n/\n/**@{"]
-    pub fn rocsparse_saxpyi(
-        handle: rocsparse_handle,
-        nnz: rocsparse_int,
-        alpha: *const f32,
-        x_val: *const f32,
-        x_ind: *const rocsparse_int,
-        y: *mut f32,
-        idx_base: rocsparse_index_base,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_daxpyi(
-        handle: rocsparse_handle,
-        nnz: rocsparse_int,
-        alpha: *const f64,
-        x_val: *const f64,
-        x_ind: *const rocsparse_int,
-        y: *mut f64,
-        idx_base: rocsparse_index_base,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_caxpyi(
-        handle: rocsparse_handle,
-        nnz: rocsparse_int,
-        alpha: *const rocsparse_float_complex,
-        x_val: *const rocsparse_float_complex,
-        x_ind: *const rocsparse_int,
-        y: *mut rocsparse_float_complex,
-        idx_base: rocsparse_index_base,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zaxpyi(
-        handle: rocsparse_handle,
-        nnz: rocsparse_int,
-        alpha: *const rocsparse_double_complex,
-        x_val: *const rocsparse_double_complex,
-        x_ind: *const rocsparse_int,
-        y: *mut rocsparse_double_complex,
-        idx_base: rocsparse_index_base,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level1_module\n  \\brief Compute the dot product of a sparse vector with a dense vector.\n\n  \\details\n  \\p rocsparse_doti computes the dot product of the sparse vector \\f$x\\f$ with the\n  dense vector \\f$y\\f$, such that\n  \\f[\n    \\text{result} := y^T x\n  \\f]\n\n  \\code{.c}\n      for(i = 0; i < nnz; ++i)\n      {\n          result += x_val[i] * y[x_ind[i]];\n      }\n  \\endcode\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  nnz         number of non-zero entries of vector \\f$x\\f$.\n  @param[in]\n  x_val       array of \\p nnz values.\n  @param[in]\n  x_ind       array of \\p nnz elements containing the indices of the non-zero\n              values of \\f$x\\f$.\n  @param[in]\n  y           array of values in dense format.\n  @param[out]\n  result      pointer to the result, can be host or device memory\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p idx_base is invalid.\n  \\retval rocsparse_status_invalid_size \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p x_val, \\p x_ind, \\p y or \\p result\n          pointer is invalid.\n  \\retval rocsparse_status_memory_error the buffer for the dot product reduction\n          could not be allocated.\n  \\retval rocsparse_status_internal_error an internal error occurred.\n/\n/**@{"]
-    pub fn rocsparse_sdoti(
-        handle: rocsparse_handle,
-        nnz: rocsparse_int,
-        x_val: *const f32,
-        x_ind: *const rocsparse_int,
-        y: *const f32,
-        result: *mut f32,
-        idx_base: rocsparse_index_base,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ddoti(
-        handle: rocsparse_handle,
-        nnz: rocsparse_int,
-        x_val: *const f64,
-        x_ind: *const rocsparse_int,
-        y: *const f64,
-        result: *mut f64,
-        idx_base: rocsparse_index_base,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cdoti(
-        handle: rocsparse_handle,
-        nnz: rocsparse_int,
-        x_val: *const rocsparse_float_complex,
-        x_ind: *const rocsparse_int,
-        y: *const rocsparse_float_complex,
-        result: *mut rocsparse_float_complex,
-        idx_base: rocsparse_index_base,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zdoti(
-        handle: rocsparse_handle,
-        nnz: rocsparse_int,
-        x_val: *const rocsparse_double_complex,
-        x_ind: *const rocsparse_int,
-        y: *const rocsparse_double_complex,
-        result: *mut rocsparse_double_complex,
-        idx_base: rocsparse_index_base,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level1_module\n  \\brief Compute the dot product of a complex conjugate sparse vector with a dense\n  vector.\n\n  \\details\n  \\p rocsparse_dotci computes the dot product of the complex conjugate sparse vector\n  \\f$x\\f$ with the dense vector \\f$y\\f$, such that\n  \\f[\n    \\text{result} := \\bar{x}^H y\n  \\f]\n\n  \\code{.c}\n      for(i = 0; i < nnz; ++i)\n      {\n          result += conj(x_val[i]) * y[x_ind[i]];\n      }\n  \\endcode\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  nnz         number of non-zero entries of vector \\f$x\\f$.\n  @param[in]\n  x_val       array of \\p nnz values.\n  @param[in]\n  x_ind       array of \\p nnz elements containing the indices of the non-zero\n              values of \\f$x\\f$.\n  @param[in]\n  y           array of values in dense format.\n  @param[out]\n  result      pointer to the result, can be host or device memory\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p idx_base is invalid.\n  \\retval rocsparse_status_invalid_size \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p x_val, \\p x_ind, \\p y or \\p result\n          pointer is invalid.\n  \\retval rocsparse_status_memory_error the buffer for the dot product reduction\n          could not be allocated.\n  \\retval rocsparse_status_internal_error an internal error occurred.\n/\n/**@{"]
-    pub fn rocsparse_cdotci(
-        handle: rocsparse_handle,
-        nnz: rocsparse_int,
-        x_val: *const rocsparse_float_complex,
-        x_ind: *const rocsparse_int,
-        y: *const rocsparse_float_complex,
-        result: *mut rocsparse_float_complex,
-        idx_base: rocsparse_index_base,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zdotci(
-        handle: rocsparse_handle,
-        nnz: rocsparse_int,
-        x_val: *const rocsparse_double_complex,
-        x_ind: *const rocsparse_int,
-        y: *const rocsparse_double_complex,
-        result: *mut rocsparse_double_complex,
-        idx_base: rocsparse_index_base,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level1_module\n  \\brief Gather elements from a dense vector and store them into a sparse vector.\n\n  \\details\n  \\p rocsparse_gthr gathers the elements that are listed in \\p x_ind from the dense\n  vector \\f$y\\f$ and stores them in the sparse vector \\f$x\\f$.\n\n  \\code{.c}\n      for(i = 0; i < nnz; ++i)\n      {\n          x_val[i] = y[x_ind[i]];\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  nnz         number of non-zero entries of \\f$x\\f$.\n  @param[in]\n  y           array of values in dense format.\n  @param[out]\n  x_val       array of \\p nnz elements containing the values of \\f$x\\f$.\n  @param[in]\n  x_ind       array of \\p nnz elements containing the indices of the non-zero\n              values of \\f$x\\f$.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_value \\p idx_base is invalid.\n  \\retval     rocsparse_status_invalid_size \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p y, \\p x_val or \\p x_ind pointer is\n              invalid.\n/\n/**@{"]
-    pub fn rocsparse_sgthr(
-        handle: rocsparse_handle,
-        nnz: rocsparse_int,
-        y: *const f32,
-        x_val: *mut f32,
-        x_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dgthr(
-        handle: rocsparse_handle,
-        nnz: rocsparse_int,
-        y: *const f64,
-        x_val: *mut f64,
-        x_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cgthr(
-        handle: rocsparse_handle,
-        nnz: rocsparse_int,
-        y: *const rocsparse_float_complex,
-        x_val: *mut rocsparse_float_complex,
-        x_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zgthr(
-        handle: rocsparse_handle,
-        nnz: rocsparse_int,
-        y: *const rocsparse_double_complex,
-        x_val: *mut rocsparse_double_complex,
-        x_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level1_module\n  \\brief Gather and zero out elements from a dense vector and store them into a sparse\n  vector.\n\n  \\details\n  \\p rocsparse_gthrz gathers the elements that are listed in \\p x_ind from the dense\n  vector \\f$y\\f$ and stores them in the sparse vector \\f$x\\f$. The gathered elements\n  in \\f$y\\f$ are replaced by zero.\n\n  \\code{.c}\n      for(i = 0; i < nnz; ++i)\n      {\n          x_val[i]    = y[x_ind[i]];\n          y[x_ind[i]] = 0;\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  nnz         number of non-zero entries of \\f$x\\f$.\n  @param[inout]\n  y           array of values in dense format.\n  @param[out]\n  x_val       array of \\p nnz elements containing the non-zero values of \\f$x\\f$.\n  @param[in]\n  x_ind       array of \\p nnz elements containing the indices of the non-zero\n              values of \\f$x\\f$.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_value \\p idx_base is invalid.\n  \\retval     rocsparse_status_invalid_size \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p y, \\p x_val or \\p x_ind pointer is\n              invalid.\n/\n/**@{"]
-    pub fn rocsparse_sgthrz(
-        handle: rocsparse_handle,
-        nnz: rocsparse_int,
-        y: *mut f32,
-        x_val: *mut f32,
-        x_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dgthrz(
-        handle: rocsparse_handle,
-        nnz: rocsparse_int,
-        y: *mut f64,
-        x_val: *mut f64,
-        x_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cgthrz(
-        handle: rocsparse_handle,
-        nnz: rocsparse_int,
-        y: *mut rocsparse_float_complex,
-        x_val: *mut rocsparse_float_complex,
-        x_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zgthrz(
-        handle: rocsparse_handle,
-        nnz: rocsparse_int,
-        y: *mut rocsparse_double_complex,
-        x_val: *mut rocsparse_double_complex,
-        x_ind: *const rocsparse_int,
-        idx_base: rocsparse_index_base,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level1_module\n  \\brief Apply Givens rotation to a dense and a sparse vector.\n\n  \\details\n  \\p rocsparse_roti applies the Givens rotation matrix \\f$G\\f$ to the sparse vector\n  \\f$x\\f$ and the dense vector \\f$y\\f$, where\n  \\f[\n    G = \\begin{pmatrix} c & s \\\\ -s & c \\end{pmatrix}\n  \\f]\n\n  \\code{.c}\n      for(i = 0; i < nnz; ++i)\n      {\n          x_tmp = x_val[i];\n          y_tmp = y[x_ind[i]];\n\n          x_val[i]    = c * x_tmp + s * y_tmp;\n          y[x_ind[i]] = c * y_tmp - s * x_tmp;\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  nnz         number of non-zero entries of \\f$x\\f$.\n  @param[inout]\n  x_val       array of \\p nnz elements containing the non-zero values of \\f$x\\f$.\n  @param[in]\n  x_ind       array of \\p nnz elements containing the indices of the non-zero\n              values of \\f$x\\f$.\n  @param[inout]\n  y           array of values in dense format.\n  @param[in]\n  c           pointer to the cosine element of \\f$G\\f$, can be on host or device.\n  @param[in]\n  s           pointer to the sine element of \\f$G\\f$, can be on host or device.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_value \\p idx_base is invalid.\n  \\retval     rocsparse_status_invalid_size \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p c, \\p s, \\p x_val, \\p x_ind or \\p y\n              pointer is invalid.\n/\n/**@{"]
-    pub fn rocsparse_sroti(
-        handle: rocsparse_handle,
-        nnz: rocsparse_int,
-        x_val: *mut f32,
-        x_ind: *const rocsparse_int,
-        y: *mut f32,
-        c: *const f32,
-        s: *const f32,
-        idx_base: rocsparse_index_base,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_droti(
-        handle: rocsparse_handle,
-        nnz: rocsparse_int,
-        x_val: *mut f64,
-        x_ind: *const rocsparse_int,
-        y: *mut f64,
-        c: *const f64,
-        s: *const f64,
-        idx_base: rocsparse_index_base,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level1_module\n  \\brief Scatter elements from a dense vector across a sparse vector.\n\n  \\details\n  \\p rocsparse_sctr scatters the elements that are listed in \\p x_ind from the sparse\n  vector \\f$x\\f$ into the dense vector \\f$y\\f$. Indices of \\f$y\\f$ that are not listed\n  in \\p x_ind remain unchanged.\n\n  \\code{.c}\n      for(i = 0; i < nnz; ++i)\n      {\n          y[x_ind[i]] = x_val[i];\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  nnz         number of non-zero entries of \\f$x\\f$.\n  @param[in]\n  x_val       array of \\p nnz elements containing the non-zero values of \\f$x\\f$.\n  @param[in]\n  x_ind       array of \\p nnz elements containing the indices of the non-zero\n              values of x.\n  @param[inout]\n  y           array of values in dense format.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_value \\p idx_base is invalid.\n  \\retval     rocsparse_status_invalid_size \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p x_val, \\p x_ind or \\p y pointer is\n              invalid.\n/\n/**@{"]
-    pub fn rocsparse_ssctr(
-        handle: rocsparse_handle,
-        nnz: rocsparse_int,
-        x_val: *const f32,
-        x_ind: *const rocsparse_int,
-        y: *mut f32,
-        idx_base: rocsparse_index_base,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dsctr(
-        handle: rocsparse_handle,
-        nnz: rocsparse_int,
-        x_val: *const f64,
-        x_ind: *const rocsparse_int,
-        y: *mut f64,
-        idx_base: rocsparse_index_base,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_csctr(
-        handle: rocsparse_handle,
-        nnz: rocsparse_int,
-        x_val: *const rocsparse_float_complex,
-        x_ind: *const rocsparse_int,
-        y: *mut rocsparse_float_complex,
-        idx_base: rocsparse_index_base,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zsctr(
-        handle: rocsparse_handle,
-        nnz: rocsparse_int,
-        x_val: *const rocsparse_double_complex,
-        x_ind: *const rocsparse_int,
-        y: *mut rocsparse_double_complex,
-        idx_base: rocsparse_index_base,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_isctr(
-        handle: rocsparse_handle,
-        nnz: rocsparse_int,
-        x_val: *const rocsparse_int,
-        x_ind: *const rocsparse_int,
-        y: *mut rocsparse_int,
-        idx_base: rocsparse_index_base,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Sparse matrix vector multiplication using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrmv_ex_analysis performs the analysis step for rocsparse_sbsrmv(),\n  rocsparse_dbsrmv(), rocsparse_cbsrmv() and rocsparse_zbsrmv(). It is expected that\n  this function will be executed only once for a given matrix and particular operation\n  type. The gathered analysis meta data can be cleared by rocsparse_bsrmv_ex_clear().\n\n  \\note\n  If the matrix sparsity pattern changes, the gathered information will become invalid.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         matrix storage of BSR blocks.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  mb          number of block rows of the sparse BSR matrix.\n  @param[in]\n  nb          number of block columns of the sparse BSR matrix.\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse BSR matrix.\n  @param[in]\n  descr       descriptor of the sparse BSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  bsr_val     array of \\p nnzb blocks of the sparse BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of\n              the sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the block column indices of the sparse\n              BSR matrix.\n  @param[in]\n  block_dim     block dimension of the sparse BSR matrix.\n  @param[out]\n  info        structure that holds the information collected during the analysis step.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nb or \\p nnzb is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p bsr_val, \\p bsr_row_ptr,\n              \\p bsr_col_ind or \\p info pointer is invalid.\n  \\retval     rocsparse_status_memory_error the buffer for the gathered information\n              could not be allocated.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans != \\ref rocsparse_operation_none or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
-    pub fn rocsparse_sbsrmv_ex_analysis(
+    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse general BSR matrix into a sparse CSR matrix\n\n  \\details\n  \\p rocsparse_gebsr2csr converts a BSR matrix into a CSR matrix. It is assumed,\n  that \\p csr_val, \\p csr_col_ind and \\p csr_row_ptr are allocated. Allocation size\n  for \\p csr_row_ptr is computed by the number of block rows multiplied by the block\n  dimension plus one. Allocation for \\p csr_val and \\p csr_col_ind is computed by the\n  the number of blocks in the BSR matrix multiplied by the product of the block dimensions.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         the storage format of the blocks, \\ref rocsparse_direction_row or \\ref rocsparse_direction_column\n  @param[in]\n  mb          number of block rows in the sparse general BSR matrix.\n  @param[in]\n  nb          number of block columns in the sparse general BSR matrix.\n  @param[in]\n  bsr_descr   descriptor of the sparse general BSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  bsr_val     array of \\p nnzb*row_block_dim*col_block_dim containing the values of the sparse BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of the\n              sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the block column indices of the sparse BSR matrix.\n  @param[in]\n  row_block_dim   row size of the blocks in the sparse general BSR matrix.\n  @param[in]\n  col_block_dim   column size of the blocks in the sparse general BSR matrix.\n  @param[in]\n  csr_descr   descriptor of the sparse CSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  csr_val     array of \\p nnzb*row_block_dim*col_block_dim elements containing the values of the sparse CSR matrix.\n  @param[out]\n  csr_row_ptr array of \\p m+1 where \\p m=mb*row_block_dim elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[out]\n  csr_col_ind array of \\p nnzb*block_dim*block_dim elements containing the column indices of the sparse CSR matrix.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb or \\p nb or \\p block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p bsr_val,\n              \\p bsr_row_ptr, \\p bsr_col_ind, \\p csr_val, \\p csr_row_ptr or\n              \\p csr_col_ind pointer is invalid.\n\n  \\par Example\n  This example converts a general BSR matrix into an CSR matrix.\n  \\code{.c}\n      //     1 4 0 0 0 0\n      // A = 0 2 3 0 0 0\n      //     5 0 0 7 8 0\n      //     0 0 9 0 6 0\n\n      rocsparse_int mb   = 2;\n      rocsparse_int nb   = 2;\n      rocsparse_int row_block_dim = 2;\n      rocsparse_int col_block_dim = 3;\n      rocsparse_int m = Mb * row_block_dim;\n      rocsparse_int n = Nb * col_block_dim;\n\n      bsr_row_ptr[mb+1]                 = {0, 1, 3};                                              // device memory\n      bsr_col_ind[nnzb]                 = {0, 0, 1};                                              // device memory\n      bsr_val[nnzb*block_dim*block_dim] = {1, 0, 4, 2, 0, 3, 5, 0, 0, 0, 0, 9, 7, 0, 8, 6, 0, 0}; // device memory\n\n      rocsparse_int nnzb = bsr_row_ptr[mb] - bsr_row_ptr[0];\n\n      // Create CSR arrays on device\n      rocsparse_int* csr_row_ptr;\n      rocsparse_int* csr_col_ind;\n      float* csr_val;\n      hipMalloc((void**)&csr_row_ptr, sizeof(rocsparse_int) * (m + 1));\n      hipMalloc((void**)&csr_col_ind, sizeof(rocsparse_int) * nnzb * row_block_dim * col_block_dim);\n      hipMalloc((void**)&csr_val, sizeof(float) * nnzb * row_block_dim * col_block_dim);\n\n      // Create rocsparse handle\n      rocsparse_local_handle handle;\n\n      rocsparse_mat_descr bsr_descr = nullptr;\n      rocsparse_create_mat_descr(&bsr_descr);\n\n      rocsparse_mat_descr csr_descr = nullptr;\n      rocsparse_create_mat_descr(&csr_descr);\n\n      rocsparse_set_mat_index_base(bsr_descr, rocsparse_index_base_zero);\n      rocsparse_set_mat_index_base(csr_descr, rocsparse_index_base_zero);\n\n      // Format conversion\n      rocsparse_sgebsr2csr(handle,\n                         rocsparse_direction_column,\n                         mb,\n                         nb,\n                         bsr_descr,\n                         bsr_val,\n                         bsr_row_ptr,\n                         bsr_col_ind,\n                         row_block_dim,\n                         col_block_dim,\n                         csr_descr,\n                         csr_val,\n                         csr_row_ptr,\n                         csr_col_ind);\n  \\endcode\n/\n/**@{"]
+    pub fn rocsparse_sgebsr2csr(
         handle: rocsparse_handle,
         dir: rocsparse_direction,
-        trans: rocsparse_operation,
         mb: rocsparse_int,
         nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const f32,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dbsrmv_ex_analysis(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const f64,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cbsrmv_ex_analysis(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_float_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zbsrmv_ex_analysis(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_double_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Sparse matrix vector multiplication using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrmv_ex multiplies the scalar \\f$\\alpha\\f$ with a sparse\n  \\f$(mb \\cdot \\text{block_dim}) \\times (nb \\cdot \\text{block_dim})\\f$\n  matrix, defined in BSR storage format, and the dense vector \\f$x\\f$ and adds the\n  result to the dense vector \\f$y\\f$ that is multiplied by the scalar \\f$\\beta\\f$,\n  such that\n  \\f[\n    y := \\alpha \\cdot op(A) \\cdot x + \\beta \\cdot y,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  Currently, only \\p trans == \\ref rocsparse_operation_none is supported.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         matrix storage of BSR blocks.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  mb          number of block rows of the sparse BSR matrix.\n  @param[in]\n  nb          number of block columns of the sparse BSR matrix.\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse BSR matrix.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse BSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  bsr_val     array of \\p nnzb blocks of the sparse BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of\n              the sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the block column indices of the sparse\n              BSR matrix.\n  @param[in]\n  block_dim     block dimension of the sparse BSR matrix.\n  @param[in]\n  x           array of \\p nb*block_dim elements (\\f$op(A) = A\\f$) or \\p mb*block_dim\n              elements (\\f$op(A) = A^T\\f$ or \\f$op(A) = A^H\\f$).\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  y           array of \\p mb*block_dim elements (\\f$op(A) = A\\f$) or \\p nb*block_dim\n              elements (\\f$op(A) = A^T\\f$ or \\f$op(A) = A^H\\f$).\n  @param[out]\n  info        structure that holds the information collected during the analysis step.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nb, \\p nnzb or \\p block_dim is\n              invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p bsr_val,\n              \\p bsr_row_ind, \\p bsr_col_ind, \\p x, \\p beta or \\p y pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans != \\ref rocsparse_operation_none or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
-    pub fn rocsparse_sbsrmv_ex(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const f32,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const f32,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        x: *const f32,
-        beta: *const f32,
-        y: *mut f32,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dbsrmv_ex(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const f64,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const f64,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        x: *const f64,
-        beta: *const f64,
-        y: *mut f64,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cbsrmv_ex(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const rocsparse_float_complex,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_float_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        x: *const rocsparse_float_complex,
-        beta: *const rocsparse_float_complex,
-        y: *mut rocsparse_float_complex,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zbsrmv_ex(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const rocsparse_double_complex,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_double_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        x: *const rocsparse_double_complex,
-        beta: *const rocsparse_double_complex,
-        y: *mut rocsparse_double_complex,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Sparse matrix vector multiplication using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrmv multiplies the scalar \\f$\\alpha\\f$ with a sparse\n  \\f$(mb \\cdot \\text{block_dim}) \\times (nb \\cdot \\text{block_dim})\\f$\n  matrix, defined in BSR storage format, and the dense vector \\f$x\\f$ and adds the\n  result to the dense vector \\f$y\\f$ that is multiplied by the scalar \\f$\\beta\\f$,\n  such that\n  \\f[\n    y := \\alpha \\cdot op(A) \\cdot x + \\beta \\cdot y,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  Currently, only \\p trans == \\ref rocsparse_operation_none is supported.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         matrix storage of BSR blocks.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  mb          number of block rows of the sparse BSR matrix.\n  @param[in]\n  nb          number of block columns of the sparse BSR matrix.\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse BSR matrix.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse BSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  bsr_val     array of \\p nnzb blocks of the sparse BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of\n              the sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the block column indices of the sparse\n              BSR matrix.\n  @param[in]\n  block_dim     block dimension of the sparse BSR matrix.\n  @param[in]\n  x           array of \\p nb*block_dim elements (\\f$op(A) = A\\f$) or \\p mb*block_dim\n              elements (\\f$op(A) = A^T\\f$ or \\f$op(A) = A^H\\f$).\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  y           array of \\p mb*block_dim elements (\\f$op(A) = A\\f$) or \\p nb*block_dim\n              elements (\\f$op(A) = A^T\\f$ or \\f$op(A) = A^H\\f$).\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nb, \\p nnzb or \\p block_dim is\n              invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p bsr_val,\n              \\p bsr_row_ind, \\p bsr_col_ind, \\p x, \\p beta or \\p y pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans != \\ref rocsparse_operation_none or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
-    pub fn rocsparse_sbsrmv(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const f32,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const f32,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        x: *const f32,
-        beta: *const f32,
-        y: *mut f32,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dbsrmv(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const f64,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const f64,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        x: *const f64,
-        beta: *const f64,
-        y: *mut f64,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cbsrmv(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const rocsparse_float_complex,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_float_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        x: *const rocsparse_float_complex,
-        beta: *const rocsparse_float_complex,
-        y: *mut rocsparse_float_complex,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zbsrmv(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const rocsparse_double_complex,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_double_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        x: *const rocsparse_double_complex,
-        beta: *const rocsparse_double_complex,
-        y: *mut rocsparse_double_complex,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Sparse matrix vector multiplication with mask operation using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrxmv multiplies the scalar \\f$\\alpha\\f$ with a sparse\n  \\f$(mb \\cdot \\text{block_dim}) \\times (nb \\cdot \\text{block_dim})\\f$\n  modified matrix, defined in BSR storage format, and the dense vector \\f$x\\f$ and adds the\n  result to the dense vector \\f$y\\f$ that is multiplied by the scalar \\f$\\beta\\f$,\n  such that\n  \\f[\n    y := \\left( \\alpha \\cdot op(A) \\cdot x + \\beta \\cdot y \\right)\\left( \\text{mask} \\right),\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  The \\f$\\text{mask}\\f$ is defined as an array of block row indices.\n  The input sparse matrix is defined with a modified BSR storage format where the beginning and the end of each row\n  is defined with two arrays, \\p bsr_row_ptr and \\p bsr_end_ptr (both of size \\p mb), rather the usual \\p bsr_row_ptr of size \\p mb+1.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  Currently, only \\p trans == \\ref rocsparse_operation_none is supported.\n  Currently, \\p block_dim==1 is not supported.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         matrix storage of BSR blocks.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  size_of_mask number of updated block rows of the array \\p y.\n  @param[in]\n  mb          number of block rows of the sparse BSR matrix.\n  @param[in]\n  nb          number of block columns of the sparse BSR matrix.\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse BSR matrix.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse BSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  bsr_val     array of \\p nnzb blocks of the sparse BSR matrix.\n\n  @param[in]\n  bsr_mask_ptr array of \\p size_of_mask elements that give the indices of the updated block rows.\n\n  @param[in]\n  bsr_row_ptr array of \\p mb elements that point to the start of every block row of\n              the sparse BSR matrix.\n  @param[in]\n  bsr_end_ptr array of \\p mb elements that point to the end of every block row of\n              the sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the block column indices of the sparse\n              BSR matrix.\n  @param[in]\n  block_dim     block dimension of the sparse BSR matrix.\n  @param[in]\n  x           array of \\p nb*block_dim elements (\\f$op(A) = A\\f$) or \\p mb*block_dim\n              elements (\\f$op(A) = A^T\\f$ or \\f$op(A) = A^H\\f$).\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  y           array of \\p mb*block_dim elements (\\f$op(A) = A\\f$) or \\p nb*block_dim\n              elements (\\f$op(A) = A^T\\f$ or \\f$op(A) = A^H\\f$).\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nb, \\p nnzb, \\p block_dim or \\p size_of_mask is\n              invalid.\n  \\retval     rocsparse_status_invalid_value \\p size_of_mask is greater than \\p mb.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p bsr_val,\n              \\p bsr_row_ind, \\p bsr_col_ind, \\p x, \\p beta or \\p y pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_not_implemented\n              \\p block_dim==1, \\p trans != \\ref rocsparse_operation_none or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
-    pub fn rocsparse_sbsrxmv(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        size_of_mask: rocsparse_int,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const f32,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const f32,
-        bsr_mask_ptr: *const rocsparse_int,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_end_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        x: *const f32,
-        beta: *const f32,
-        y: *mut f32,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dbsrxmv(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        size_of_mask: rocsparse_int,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const f64,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const f64,
-        bsr_mask_ptr: *const rocsparse_int,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_end_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        x: *const f64,
-        beta: *const f64,
-        y: *mut f64,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cbsrxmv(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        size_of_mask: rocsparse_int,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const rocsparse_float_complex,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_float_complex,
-        bsr_mask_ptr: *const rocsparse_int,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_end_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        x: *const rocsparse_float_complex,
-        beta: *const rocsparse_float_complex,
-        y: *mut rocsparse_float_complex,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zbsrxmv(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        size_of_mask: rocsparse_int,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const rocsparse_double_complex,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_double_complex,
-        bsr_mask_ptr: *const rocsparse_int,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_end_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        x: *const rocsparse_double_complex,
-        beta: *const rocsparse_double_complex,
-        y: *mut rocsparse_double_complex,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Sparse triangular solve using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrsv_zero_pivot returns \\ref rocsparse_status_zero_pivot, if either a\n  structural or numerical zero has been found during rocsparse_sbsrsv_solve(),\n  rocsparse_dbsrsv_solve(), rocsparse_cbsrsv_solve() or rocsparse_zbsrsv_solve()\n  computation. The first zero pivot \\f$j\\f$ at \\f$A_{j,j}\\f$ is stored in \\p position,\n  using same index base as the BSR matrix.\n\n  \\p position can be in host or device memory. If no zero pivot has been found,\n  \\p position is set to -1 and \\ref rocsparse_status_success is returned instead.\n\n  \\note \\p rocsparse_bsrsv_zero_pivot is a blocking function. It might influence\n  performance negatively.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[inout]\n  position    pointer to zero pivot \\f$j\\f$, can be in host or device memory.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info or \\p position pointer is\n              invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_zero_pivot zero pivot has been found."]
-    pub fn rocsparse_bsrsv_zero_pivot(
-        handle: rocsparse_handle,
-        info: rocsparse_mat_info,
-        position: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Sparse triangular solve using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrsv_buffer_size returns the size of the temporary storage buffer that\n  is required by rocsparse_sbsrsv_analysis(), rocsparse_dbsrsv_analysis(),\n  rocsparse_cbsrsv_analysis(), rocsparse_zbsrsv_analysis(), rocsparse_sbsrsv_solve(),\n  rocsparse_dbsrsv_solve(), rocsparse_cbsrsv_solve() and rocsparse_zbsrsv_solve(). The\n  temporary storage buffer must be allocated by the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         matrix storage of BSR blocks.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  mb          number of block rows of the sparse BSR matrix.\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse BSR matrix.\n  @param[in]\n  descr       descriptor of the sparse BSR matrix.\n  @param[in]\n  bsr_val     array of \\p nnzb blocks of the sparse BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of\n              the sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnz containing the block column indices of the sparse\n              BSR matrix.\n  @param[in]\n  block_dim     block dimension of the sparse BSR matrix.\n  @param[out]\n  info        structure that holds the information collected during the analysis step.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_sbsrsv_analysis(), rocsparse_dbsrsv_analysis(),\n              rocsparse_cbsrsv_analysis(), rocsparse_zbsrsv_analysis(),\n              rocsparse_sbsrsv_solve(), rocsparse_dbsrsv_solve(),\n              rocsparse_cbsrsv_solve() and rocsparse_zbsrsv_solve().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nnzb or \\p block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p bsr_val, \\p bsr_row_ptr,\n              \\p bsr_col_ind, \\p info or \\p buffer_size pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
-    pub fn rocsparse_sbsrsv_buffer_size(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        mb: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const f32,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dbsrsv_buffer_size(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        mb: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const f64,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cbsrsv_buffer_size(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        mb: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_float_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zbsrsv_buffer_size(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        mb: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_double_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Sparse triangular solve using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrsv_analysis performs the analysis step for rocsparse_sbsrsv_solve(),\n  rocsparse_dbsrsv_solve(), rocsparse_cbsrsv_solve() and rocsparse_zbsrsv_solve(). It\n  is expected that this function will be executed only once for a given matrix and\n  particular operation type. The analysis meta data can be cleared by\n  rocsparse_bsrsv_clear().\n\n  \\p rocsparse_bsrsv_analysis can share its meta data with\n  rocsparse_sbsrsm_analysis(), rocsparse_dbsrsm_analysis(),\n  rocsparse_cbsrsm_analysis(), rocsparse_zbsrsm_analysis(),\n  rocsparse_sbsrilu0_analysis(), rocsparse_dbsrilu0_analysis(),\n  rocsparse_cbsrilu0_analysis(), rocsparse_zbsrilu0_analysis(),\n  rocsparse_sbsric0_analysis(), rocsparse_dbsric0_analysis(),\n  rocsparse_cbsric0_analysis() and rocsparse_zbsric0_analysis(). Selecting\n  \\ref rocsparse_analysis_policy_reuse policy can greatly improve computation\n  performance of meta data. However, the user need to make sure that the sparsity\n  pattern remains unchanged. If this cannot be assured,\n  \\ref rocsparse_analysis_policy_force has to be used.\n\n  \\note\n  If the matrix sparsity pattern changes, the gathered information will become invalid.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         matrix storage of BSR blocks.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  mb          number of block rows of the sparse BSR matrix.\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse BSR matrix.\n  @param[in]\n  descr       descriptor of the sparse BSR matrix.\n  @param[in]\n  bsr_val     array of \\p nnzb blocks of the sparse BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of\n              the sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnz containing the block column indices of the sparse\n              BSR matrix.\n  @param[in]\n  block_dim     block dimension of the sparse BSR matrix.\n  @param[out]\n  info        structure that holds the information collected during\n              the analysis step.\n  @param[in]\n  analysis    \\ref rocsparse_analysis_policy_reuse or\n              \\ref rocsparse_analysis_policy_force.\n  @param[in]\n  solve       \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nnzb or \\p block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p bsr_row_ptr,\n              \\p bsr_col_ind, \\p info or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
-    pub fn rocsparse_sbsrsv_analysis(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        mb: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const f32,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        analysis: rocsparse_analysis_policy,
-        solve: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dbsrsv_analysis(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        mb: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const f64,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        analysis: rocsparse_analysis_policy,
-        solve: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cbsrsv_analysis(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        mb: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_float_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        analysis: rocsparse_analysis_policy,
-        solve: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zbsrsv_analysis(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        mb: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_double_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        analysis: rocsparse_analysis_policy,
-        solve: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Sparse matrix vector multiplication using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrmv_ex_clear deallocates all memory that was allocated by\n  rocsparse_sbsrmv_ex_analysis(), rocsparse_dbsrmv_ex_analysis(), rocsparse_cbsrmv_ex_analysis()\n  or rocsparse_zbsrmv_ex_analysis(). This is especially useful, if memory is an issue and\n  the analysis data is not required anymore for further computation, e.g. when\n  switching to another sparse matrix format.\n\n  \\note\n  Calling \\p rocsparse_bsrmv_ex_clear is optional. All allocated resources will be\n  cleared, when the opaque \\ref rocsparse_mat_info struct is destroyed using\n  rocsparse_destroy_mat_info().\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[inout]\n  info        structure that holds the information collected during analysis step.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info pointer is invalid.\n  \\retval     rocsparse_status_memory_error the buffer for the gathered information\n              could not be deallocated.\n  \\retval     rocsparse_status_internal_error an internal error occurred."]
-    pub fn rocsparse_bsrmv_ex_clear(
-        handle: rocsparse_handle,
-        info: rocsparse_mat_info,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Sparse triangular solve using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrsv_clear deallocates all memory that was allocated by\n  rocsparse_sbsrsv_analysis(), rocsparse_dbsrsv_analysis(), rocsparse_cbsrsv_analysis()\n  or rocsparse_zbsrsv_analysis(). This is especially useful, if memory is an issue and\n  the analysis data is not required for further computation, e.g. when switching to\n  another sparse matrix format. Calling \\p rocsparse_bsrsv_clear is optional. All\n  allocated resources will be cleared, when the opaque \\ref rocsparse_mat_info struct\n  is destroyed using rocsparse_destroy_mat_info().\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[inout]\n  info        structure that holds the information collected during the analysis step.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info pointer is invalid.\n  \\retval     rocsparse_status_memory_error the buffer holding the meta data could not\n              be deallocated.\n  \\retval     rocsparse_status_internal_error an internal error occurred."]
-    pub fn rocsparse_bsrsv_clear(
-        handle: rocsparse_handle,
-        info: rocsparse_mat_info,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Sparse triangular solve using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrsv_solve solves a sparse triangular linear system of a sparse\n  \\f$m \\times m\\f$ matrix, defined in BSR storage format, a dense solution vector\n  \\f$y\\f$ and the right-hand side \\f$x\\f$ that is multiplied by \\f$\\alpha\\f$, such that\n  \\f[\n    op(A) \\cdot y = \\alpha \\cdot x,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\p rocsparse_bsrsv_solve requires a user allocated temporary buffer. Its size is\n  returned by rocsparse_sbsrsv_buffer_size(), rocsparse_dbsrsv_buffer_size(),\n  rocsparse_cbsrsv_buffer_size() or rocsparse_zbsrsv_buffer_size(). Furthermore,\n  analysis meta data is required. It can be obtained by rocsparse_sbsrsv_analysis(),\n  rocsparse_dbsrsv_analysis(), rocsparse_cbsrsv_analysis() or\n  rocsparse_zbsrsv_analysis(). \\p rocsparse_bsrsv_solve reports the first zero pivot\n  (either numerical or structural zero). The zero pivot status can be checked calling\n  rocsparse_bsrsv_zero_pivot(). If\n  \\ref rocsparse_diag_type == \\ref rocsparse_diag_type_unit, no zero pivot will be\n  reported, even if \\f$A_{j,j} = 0\\f$ for some \\f$j\\f$.\n\n  \\note\n  The sparse BSR matrix has to be sorted.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  Currently, only \\p trans == \\ref rocsparse_operation_none and\n  \\p trans == \\ref rocsparse_operation_transpose is supported.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         matrix storage of BSR blocks.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  mb          number of block rows of the sparse BSR matrix.\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse BSR matrix.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse BSR matrix.\n  @param[in]\n  bsr_val     array of \\p nnzb blocks of the sparse BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of\n              the sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnz containing the block column indices of the sparse\n              BSR matrix.\n  @param[in]\n  block_dim     block dimension of the sparse BSR matrix.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[in]\n  x           array of \\p m elements, holding the right-hand side.\n  @param[out]\n  y           array of \\p m elements, holding the solution.\n  @param[in]\n  policy      \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nnzb or \\p block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p bsr_val,\n              \\p bsr_row_ptr, \\p bsr_col_ind, \\p x or \\p y pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  Consider the lower triangular \\f$m \\times m\\f$ matrix \\f$L\\f$, stored in BSR\n  storage format with unit diagonal. The following example solves \\f$L \\cdot y = x\\f$.\n  \\code{.c}\n      // Create rocSPARSE handle\n      rocsparse_handle handle;\n      rocsparse_create_handle(&handle);\n\n      // Create matrix descriptor\n      rocsparse_mat_descr descr;\n      rocsparse_create_mat_descr(&descr);\n      rocsparse_set_mat_fill_mode(descr, rocsparse_fill_mode_lower);\n      rocsparse_set_mat_diag_type(descr, rocsparse_diag_type_unit);\n\n      // Create matrix info structure\n      rocsparse_mat_info info;\n      rocsparse_create_mat_info(&info);\n\n      // Obtain required buffer size\n      size_t buffer_size;\n      rocsparse_dbsrsv_buffer_size(handle,\n                                   rocsparse_direction_column,\n                                   rocsparse_operation_none,\n                                   mb,\n                                   nnzb,\n                                   descr,\n                                   bsr_val,\n                                   bsr_row_ptr,\n                                   bsr_col_ind,\n                                   block_dim,\n                                   info,\n                                   &buffer_size);\n\n      // Allocate temporary buffer\n      void* temp_buffer;\n      hipMalloc(&temp_buffer, buffer_size);\n\n      // Perform analysis step\n      rocsparse_dbsrsv_analysis(handle,\n                                rocsparse_direction_column,\n                                rocsparse_operation_none,\n                                mb,\n                                nnzb,\n                                descr,\n                                bsr_val,\n                                bsr_row_ptr,\n                                bsr_col_ind,\n                                block_dim,\n                                info,\n                                rocsparse_analysis_policy_reuse,\n                                rocsparse_solve_policy_auto,\n                                temp_buffer);\n\n      // Solve Ly = x\n      rocsparse_dbsrsv_solve(handle,\n                             rocsparse_direction_column,\n                             rocsparse_operation_none,\n                             mb,\n                             nnzb,\n                             &alpha,\n                             descr,\n                             bsr_val,\n                             bsr_row_ptr,\n                             bsr_col_ind,\n                             block_dim,\n                             info,\n                             x,\n                             y,\n                             rocsparse_solve_policy_auto,\n                             temp_buffer);\n\n      // No zero pivot should be found, with L having unit diagonal\n\n      // Clean up\n      hipFree(temp_buffer);\n      rocsparse_destroy_mat_info(info);\n      rocsparse_destroy_mat_descr(descr);\n      rocsparse_destroy_handle(handle);\n  \\endcode\n/\n/**@{"]
-    pub fn rocsparse_sbsrsv_solve(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        mb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const f32,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const f32,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        x: *const f32,
-        y: *mut f32,
-        policy: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dbsrsv_solve(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        mb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const f64,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const f64,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        x: *const f64,
-        y: *mut f64,
-        policy: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cbsrsv_solve(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        mb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const rocsparse_float_complex,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_float_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        x: *const rocsparse_float_complex,
-        y: *mut rocsparse_float_complex,
-        policy: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zbsrsv_solve(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        mb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const rocsparse_double_complex,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_double_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        x: *const rocsparse_double_complex,
-        y: *mut rocsparse_double_complex,
-        policy: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Sparse matrix vector multiplication using COO storage format\n\n  \\details\n  \\p rocsparse_coomv multiplies the scalar \\f$\\alpha\\f$ with a sparse \\f$m \\times n\\f$\n  matrix, defined in COO storage format, and the dense vector \\f$x\\f$ and adds the\n  result to the dense vector \\f$y\\f$ that is multiplied by the scalar \\f$\\beta\\f$,\n  such that\n  \\f[\n    y := \\alpha \\cdot op(A) \\cdot x + \\beta \\cdot y,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  The COO matrix has to be sorted by row indices. This can be achieved by using\n  rocsparse_coosort_by_row().\n\n  \\code{.c}\n      for(i = 0; i < m; ++i)\n      {\n          y[i] = beta * y[i];\n      }\n\n      for(i = 0; i < nnz; ++i)\n      {\n          y[coo_row_ind[i]] += alpha * coo_val[i] * x[coo_col_ind[i]];\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  m           number of rows of the sparse COO matrix.\n  @param[in]\n  n           number of columns of the sparse COO matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse COO matrix.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse COO matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  coo_val     array of \\p nnz elements of the sparse COO matrix.\n  @param[in]\n  coo_row_ind array of \\p nnz elements containing the row indices of the sparse COO\n              matrix.\n  @param[in]\n  coo_col_ind array of \\p nnz elements containing the column indices of the sparse\n              COO matrix.\n  @param[in]\n  x           array of \\p n elements (\\f$op(A) = A\\f$) or \\p m elements\n              (\\f$op(A) = A^T\\f$ or \\f$op(A) = A^H\\f$).\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  y           array of \\p m elements (\\f$op(A) = A\\f$) or \\p n elements\n              (\\f$op(A) = A^T\\f$ or \\f$op(A) = A^H\\f$).\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p coo_val,\n              \\p coo_row_ind, \\p coo_col_ind, \\p x, \\p beta or \\p y pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
-    pub fn rocsparse_scoomv(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const f32,
-        descr: rocsparse_mat_descr,
-        coo_val: *const f32,
-        coo_row_ind: *const rocsparse_int,
-        coo_col_ind: *const rocsparse_int,
-        x: *const f32,
-        beta: *const f32,
-        y: *mut f32,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcoomv(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const f64,
-        descr: rocsparse_mat_descr,
-        coo_val: *const f64,
-        coo_row_ind: *const rocsparse_int,
-        coo_col_ind: *const rocsparse_int,
-        x: *const f64,
-        beta: *const f64,
-        y: *mut f64,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccoomv(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const rocsparse_float_complex,
-        descr: rocsparse_mat_descr,
-        coo_val: *const rocsparse_float_complex,
-        coo_row_ind: *const rocsparse_int,
-        coo_col_ind: *const rocsparse_int,
-        x: *const rocsparse_float_complex,
-        beta: *const rocsparse_float_complex,
-        y: *mut rocsparse_float_complex,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcoomv(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const rocsparse_double_complex,
-        descr: rocsparse_mat_descr,
-        coo_val: *const rocsparse_double_complex,
-        coo_row_ind: *const rocsparse_int,
-        coo_col_ind: *const rocsparse_int,
-        x: *const rocsparse_double_complex,
-        beta: *const rocsparse_double_complex,
-        y: *mut rocsparse_double_complex,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Sparse matrix vector multiplication using CSR storage format\n\n  \\details\n  \\p rocsparse_csrmv_analysis performs the analysis step for rocsparse_scsrmv(),\n  rocsparse_dcsrmv(), rocsparse_ccsrmv() and rocsparse_zcsrmv(). It is expected that\n  this function will be executed only once for a given matrix and particular operation\n  type. The gathered analysis meta data can be cleared by rocsparse_csrmv_clear().\n\n  \\note\n  If the matrix sparsity pattern changes, the gathered information will become invalid.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  n           number of columns of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[out]\n  info        structure that holds the information collected during the analysis step.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p csr_val, \\p csr_row_ptr,\n              \\p csr_col_ind or \\p info pointer is invalid.\n  \\retval     rocsparse_status_memory_error the buffer for the gathered information\n              could not be allocated.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented if \\ref rocsparse_matrix_type is not one of\n              \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric, or\n              \\ref rocsparse_matrix_type_triangular.\n/\n/**@{"]
-    pub fn rocsparse_scsrmv_analysis(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f32,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcsrmv_analysis(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f64,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccsrmv_analysis(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_float_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcsrmv_analysis(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_double_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Sparse matrix vector multiplication using CSR storage format\n\n  \\details\n  \\p rocsparse_csrmv_clear deallocates all memory that was allocated by\n  rocsparse_scsrmv_analysis(), rocsparse_dcsrmv_analysis(), rocsparse_ccsrmv_analysis()\n  or rocsparse_zcsrmv_analysis(). This is especially useful, if memory is an issue and\n  the analysis data is not required anymore for further computation, e.g. when\n  switching to another sparse matrix format.\n\n  \\note\n  Calling \\p rocsparse_csrmv_clear is optional. All allocated resources will be\n  cleared, when the opaque \\ref rocsparse_mat_info struct is destroyed using\n  rocsparse_destroy_mat_info().\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[inout]\n  info        structure that holds the information collected during analysis step.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info pointer is invalid.\n  \\retval     rocsparse_status_memory_error the buffer for the gathered information\n              could not be deallocated.\n  \\retval     rocsparse_status_internal_error an internal error occurred."]
-    pub fn rocsparse_csrmv_clear(
-        handle: rocsparse_handle,
-        info: rocsparse_mat_info,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Sparse matrix vector multiplication using CSR storage format\n\n  \\details\n  \\p rocsparse_csrmv multiplies the scalar \\f$\\alpha\\f$ with a sparse \\f$m \\times n\\f$\n  matrix, defined in CSR storage format, and the dense vector \\f$x\\f$ and adds the\n  result to the dense vector \\f$y\\f$ that is multiplied by the scalar \\f$\\beta\\f$,\n  such that\n  \\f[\n    y := \\alpha \\cdot op(A) \\cdot x + \\beta \\cdot y,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  The \\p info parameter is optional and contains information collected by\n  rocsparse_scsrmv_analysis(), rocsparse_dcsrmv_analysis(), rocsparse_ccsrmv_analysis()\n  or rocsparse_zcsrmv_analysis(). If present, the information will be used to speed up\n  the \\p csrmv computation. If \\p info == \\p NULL, general \\p csrmv routine will be\n  used instead.\n\n  \\code{.c}\n      for(i = 0; i < m; ++i)\n      {\n          y[i] = beta * y[i];\n\n          for(j = csr_row_ptr[i]; j < csr_row_ptr[i + 1]; ++j)\n          {\n              y[i] = y[i] + alpha * csr_val[j] * x[csr_col_ind[j]];\n          }\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  n           number of columns of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start\n              of every row of the sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[in]\n  info        information collected by rocsparse_scsrmv_analysis(),\n              rocsparse_dcsrmv_analysis(), rocsparse_ccsrmv_analysis() or\n              rocsparse_dcsrmv_analysis(), can be \\p NULL if no information is\n              available.\n  @param[in]\n  x           array of \\p n elements (\\f$op(A) == A\\f$) or \\p m elements\n              (\\f$op(A) == A^T\\f$ or \\f$op(A) == A^H\\f$).\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  y           array of \\p m elements (\\f$op(A) == A\\f$) or \\p n elements\n              (\\f$op(A) == A^T\\f$ or \\f$op(A) == A^H\\f$).\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p csr_val,\n              \\p csr_row_ptr, \\p csr_col_ind, \\p x, \\p beta or \\p y pointer is\n              invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  This example performs a sparse matrix vector multiplication in CSR format\n  using additional meta data to improve performance.\n  \\code{.c}\n      // Create matrix info structure\n      rocsparse_mat_info info;\n      rocsparse_create_mat_info(&info);\n\n      // Perform analysis step to obtain meta data\n      rocsparse_scsrmv_analysis(handle,\n                                rocsparse_operation_none,\n                                m,\n                                n,\n                                nnz,\n                                descr,\n                                csr_val,\n                                csr_row_ptr,\n                                csr_col_ind,\n                                info);\n\n      // Compute y = Ax\n      rocsparse_scsrmv(handle,\n                       rocsparse_operation_none,\n                       m,\n                       n,\n                       nnz,\n                       &alpha,\n                       descr,\n                       csr_val,\n                       csr_row_ptr,\n                       csr_col_ind,\n                       info,\n                       x,\n                       &beta,\n                       y);\n\n      // Do more work\n      // ...\n\n      // Clean up\n      rocsparse_destroy_mat_info(info);\n  \\endcode\n/\n/**@{"]
-    pub fn rocsparse_scsrmv(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const f32,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f32,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        x: *const f32,
-        beta: *const f32,
-        y: *mut f32,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcsrmv(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const f64,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f64,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        x: *const f64,
-        beta: *const f64,
-        y: *mut f64,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccsrmv(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const rocsparse_float_complex,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_float_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        x: *const rocsparse_float_complex,
-        beta: *const rocsparse_float_complex,
-        y: *mut rocsparse_float_complex,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcsrmv(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const rocsparse_double_complex,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_double_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        x: *const rocsparse_double_complex,
-        beta: *const rocsparse_double_complex,
-        y: *mut rocsparse_double_complex,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Sparse triangular solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csrsv_zero_pivot returns \\ref rocsparse_status_zero_pivot, if either a\n  structural or numerical zero has been found during rocsparse_scsrsv_solve(),\n  rocsparse_dcsrsv_solve(), rocsparse_ccsrsv_solve() or rocsparse_zcsrsv_solve()\n  computation. The first zero pivot \\f$j\\f$ at \\f$A_{j,j}\\f$ is stored in \\p position,\n  using same index base as the CSR matrix.\n\n  \\p position can be in host or device memory. If no zero pivot has been found,\n  \\p position is set to -1 and \\ref rocsparse_status_success is returned instead.\n\n  \\note \\p rocsparse_csrsv_zero_pivot is a blocking function. It might influence\n  performance negatively.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[inout]\n  position    pointer to zero pivot \\f$j\\f$, can be in host or device memory.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info or \\p position pointer is\n              invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_zero_pivot zero pivot has been found."]
-    pub fn rocsparse_csrsv_zero_pivot(
-        handle: rocsparse_handle,
-        descr: rocsparse_mat_descr,
-        info: rocsparse_mat_info,
-        position: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Sparse iterative triangular solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csritsv_zero_pivot returns \\ref rocsparse_status_zero_pivot, if either a\n  structural or numerical zero has been found during rocsparse_csritsv_solve() and or rocsparse_csritsv_analysis(),\n  execution. The first zero pivot \\f$j\\f$ at \\f$A_{j,j}\\f$ is stored in \\p position,\n  using same index base as the CSR matrix.\n\n  \\p position can be in host or device memory. If no zero pivot has been found,\n  \\p position is set to -1 and \\ref rocsparse_status_success is returned instead.\n\n  \\note \\p rocsparse_csritsv_zero_pivot is a blocking function. It might influence\n  performance negatively.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[inout]\n  position    pointer to zero pivot \\f$j\\f$, can be in host or device memory.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info or \\p position pointer is\n              invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_zero_pivot zero pivot has been found."]
-    pub fn rocsparse_csritsv_zero_pivot(
-        handle: rocsparse_handle,
-        descr: rocsparse_mat_descr,
-        info: rocsparse_mat_info,
-        position: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Sparse triangular solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csrsv_buffer_size returns the size of the temporary storage buffer that\n  is required by rocsparse_scsrsv_analysis(), rocsparse_dcsrsv_analysis(),\n  rocsparse_ccsrsv_analysis(), rocsparse_zcsrsv_analysis(), rocsparse_scsrsv_solve(),\n  rocsparse_dcsrsv_solve(), rocsparse_ccsrsv_solve() and rocsparse_zcsrsv_solve(). The\n  temporary storage buffer must be allocated by the user. The size of the temporary\n  storage buffer is identical to the size returned by rocsparse_scsrilu0_buffer_size(),\n  rocsparse_dcsrilu0_buffer_size(), rocsparse_ccsrilu0_buffer_size() and\n  rocsparse_zcsrilu0_buffer_size() if the matrix sparsity pattern is identical. The\n  user allocated buffer can thus be shared between subsequent calls to those functions.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[out]\n  info        structure that holds the information collected during the analysis step.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_scsrsv_analysis(), rocsparse_dcsrsv_analysis(),\n              rocsparse_ccsrsv_analysis(), rocsparse_zcsrsv_analysis(),\n              rocsparse_scsrsv_solve(), rocsparse_dcsrsv_solve(),\n              rocsparse_ccsrsv_solve() and rocsparse_zcsrsv_solve().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p csr_val, \\p csr_row_ptr,\n              \\p csr_col_ind, \\p info or \\p buffer_size pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
-    pub fn rocsparse_scsrsv_buffer_size(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        nnz: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f32,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcsrsv_buffer_size(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        nnz: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f64,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccsrsv_buffer_size(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        nnz: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_float_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcsrsv_buffer_size(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        nnz: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_double_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Sparse iterative triangular solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csritsv_buffer_size returns the size of the temporary storage buffer that\n  is required by rocsparse_scsritsv_analysis(), rocsparse_dcsritsv_analysis(),\n  rocsparse_ccsritsv_analysis(), rocsparse_zcsritsv_analysis(), rocsparse_scsritsv_solve(),\n  rocsparse_dcsritsv_solve(), rocsparse_ccsritsv_solve() and rocsparse_zcsritsv_solve(). The\n  temporary storage buffer must be allocated by the user.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[out]\n  info        structure that holds the information collected during the analysis step.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_scsritsv_analysis(), rocsparse_dcsritsv_analysis(),\n              rocsparse_ccsritsv_analysis(), rocsparse_zcsritsv_analysis(),\n              rocsparse_scsritsv_solve(), rocsparse_dcsritsv_solve(),\n              rocsparse_ccsritsv_solve() and rocsparse_zcsritsv_solve().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p csr_val, \\p csr_row_ptr,\n              \\p csr_col_ind, \\p info or \\p buffer_size pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general and \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_triangular.\n/\n/**@{"]
-    pub fn rocsparse_scsritsv_buffer_size(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        nnz: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f32,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcsritsv_buffer_size(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        nnz: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f64,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccsritsv_buffer_size(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        nnz: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_float_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcsritsv_buffer_size(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        nnz: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_double_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Sparse triangular solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csrsv_analysis performs the analysis step for rocsparse_scsrsv_solve(),\n  rocsparse_dcsrsv_solve(), rocsparse_ccsrsv_solve() and rocsparse_zcsrsv_solve(). It\n  is expected that this function will be executed only once for a given matrix and\n  particular operation type. The analysis meta data can be cleared by\n  rocsparse_csrsv_clear().\n\n  \\p rocsparse_csrsv_analysis can share its meta data with\n  rocsparse_scsrsm_analysis(), rocsparse_dcsrsm_analysis(),\n  rocsparse_ccsrsm_analysis(), rocsparse_zcsrsm_analysis(),\n  rocsparse_scsrilu0_analysis(), rocsparse_dcsrilu0_analysis(),\n  rocsparse_ccsrilu0_analysis(), rocsparse_zcsrilu0_analysis(),\n  rocsparse_scsric0_analysis(), rocsparse_dcsric0_analysis(),\n  rocsparse_ccsric0_analysis() and rocsparse_zcsric0_analysis(). Selecting\n  \\ref rocsparse_analysis_policy_reuse policy can greatly improve computation\n  performance of meta data. However, the user need to make sure that the sparsity\n  pattern remains unchanged. If this cannot be assured,\n  \\ref rocsparse_analysis_policy_force has to be used.\n\n  \\note\n  If the matrix sparsity pattern changes, the gathered information will become invalid.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[out]\n  info        structure that holds the information collected during\n              the analysis step.\n  @param[in]\n  analysis    \\ref rocsparse_analysis_policy_reuse or\n              \\ref rocsparse_analysis_policy_force.\n  @param[in]\n  solve       \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p csr_row_ptr,\n              \\p csr_col_ind, \\p info or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
-    pub fn rocsparse_scsrsv_analysis(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        nnz: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f32,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        analysis: rocsparse_analysis_policy,
-        solve: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcsrsv_analysis(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        nnz: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f64,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        analysis: rocsparse_analysis_policy,
-        solve: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccsrsv_analysis(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        nnz: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_float_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        analysis: rocsparse_analysis_policy,
-        solve: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcsrsv_analysis(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        nnz: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_double_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        analysis: rocsparse_analysis_policy,
-        solve: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Sparse iterative triangular solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csritsv_analysis performs the analysis step for rocsparse_scsritsv_solve(),\n  rocsparse_dcsritsv_solve(), rocsparse_ccsritsv_solve() and rocsparse_zcsritsv_solve(). It\n  is expected that this function will be executed only once for a given matrix and\n  particular operation type. The analysis meta data can be cleared by\n  rocsparse_csritsv_clear().\n\n   Selecting\n  \\ref rocsparse_analysis_policy_reuse policy can greatly improve computation\n  performance of meta data. However, the user need to make sure that the sparsity\n  pattern remains unchanged. If this cannot be assured,\n  \\ref rocsparse_analysis_policy_force has to be used.\n\n  \\note\n  If the matrix sparsity pattern changes, the gathered information will become invalid.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[out]\n  info        structure that holds the information collected during\n              the analysis step.\n  @param[in]\n  analysis    \\ref rocsparse_analysis_policy_reuse or\n              \\ref rocsparse_analysis_policy_force.\n  @param[in]\n  solve       \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p csr_row_ptr,\n              \\p csr_col_ind, \\p info or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general and \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_triangular.\n/\n/**@{"]
-    pub fn rocsparse_scsritsv_analysis(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        nnz: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f32,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        analysis: rocsparse_analysis_policy,
-        solve: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcsritsv_analysis(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        nnz: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f64,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        analysis: rocsparse_analysis_policy,
-        solve: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccsritsv_analysis(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        nnz: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_float_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        analysis: rocsparse_analysis_policy,
-        solve: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcsritsv_analysis(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        nnz: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_double_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        analysis: rocsparse_analysis_policy,
-        solve: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Sparse triangular solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csrsv_clear deallocates all memory that was allocated by\n  rocsparse_scsrsv_analysis(), rocsparse_dcsrsv_analysis(), rocsparse_ccsrsv_analysis()\n  or rocsparse_zcsrsv_analysis(). This is especially useful, if memory is an issue and\n  the analysis data is not required for further computation, e.g. when switching to\n  another sparse matrix format. Calling \\p rocsparse_csrsv_clear is optional. All\n  allocated resources will be cleared, when the opaque \\ref rocsparse_mat_info struct\n  is destroyed using rocsparse_destroy_mat_info().\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix.\n  @param[inout]\n  info        structure that holds the information collected during the analysis step.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info pointer is invalid.\n  \\retval     rocsparse_status_memory_error the buffer holding the meta data could not\n              be deallocated.\n  \\retval     rocsparse_status_internal_error an internal error occurred."]
-    pub fn rocsparse_csrsv_clear(
-        handle: rocsparse_handle,
-        descr: rocsparse_mat_descr,
-        info: rocsparse_mat_info,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Sparse triangular solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csritsv_clear deallocates all memory that was allocated by\n  rocsparse_scsritsv_analysis(), rocsparse_dcsritsv_analysis(), rocsparse_ccsritsv_analysis()\n  or rocsparse_zcsritsv_analysis(). This is especially useful, if memory is an issue and\n  the analysis data is not required for further computation, e.g. when switching to\n  another sparse matrix format. Calling \\p rocsparse_csritsv_clear is optional. All\n  allocated resources will be cleared, when the opaque \\ref rocsparse_mat_info struct\n  is destroyed using rocsparse_destroy_mat_info().\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix.\n  @param[inout]\n  info        structure that holds the information collected during the analysis step.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info pointer is invalid.\n  \\retval     rocsparse_status_memory_error the buffer holding the meta data could not\n              be deallocated.\n  \\retval     rocsparse_status_internal_error an internal error occurred."]
-    pub fn rocsparse_csritsv_clear(
-        handle: rocsparse_handle,
-        descr: rocsparse_mat_descr,
-        info: rocsparse_mat_info,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Sparse triangular solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csrsv_solve solves a sparse triangular linear system of a sparse\n  \\f$m \\times m\\f$ matrix, defined in CSR storage format, a dense solution vector\n  \\f$y\\f$ and the right-hand side \\f$x\\f$ that is multiplied by \\f$\\alpha\\f$, such that\n  \\f[\n    op(A) \\cdot y = \\alpha \\cdot x,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\p rocsparse_csrsv_solve requires a user allocated temporary buffer. Its size is\n  returned by rocsparse_scsrsv_buffer_size(), rocsparse_dcsrsv_buffer_size(),\n  rocsparse_ccsrsv_buffer_size() or rocsparse_zcsrsv_buffer_size(). Furthermore,\n  analysis meta data is required. It can be obtained by rocsparse_scsrsv_analysis(),\n  rocsparse_dcsrsv_analysis(), rocsparse_ccsrsv_analysis() or\n  rocsparse_zcsrsv_analysis(). \\p rocsparse_csrsv_solve reports the first zero pivot\n  (either numerical or structural zero). The zero pivot status can be checked calling\n  rocsparse_csrsv_zero_pivot(). If\n  \\ref rocsparse_diag_type == \\ref rocsparse_diag_type_unit, no zero pivot will be\n  reported, even if \\f$A_{j,j} = 0\\f$ for some \\f$j\\f$.\n\n  \\note\n  The sparse CSR matrix has to be sorted. This can be achieved by calling\n  rocsparse_csrsort().\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  Currently, only \\p trans == \\ref rocsparse_operation_none and\n  \\p trans == \\ref rocsparse_operation_transpose is supported.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start\n              of every row of the sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[in]\n  x           array of \\p m elements, holding the right-hand side.\n  @param[out]\n  y           array of \\p m elements, holding the solution.\n  @param[in]\n  policy      \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p csr_val,\n              \\p csr_row_ptr, \\p csr_col_ind, \\p x or \\p y pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  Consider the lower triangular \\f$m \\times m\\f$ matrix \\f$L\\f$, stored in CSR\n  storage format with unit diagonal. The following example solves \\f$L \\cdot y = x\\f$.\n  \\code{.c}\n      // Create rocSPARSE handle\n      rocsparse_handle handle;\n      rocsparse_create_handle(&handle);\n\n      // Create matrix descriptor\n      rocsparse_mat_descr descr;\n      rocsparse_create_mat_descr(&descr);\n      rocsparse_set_mat_fill_mode(descr, rocsparse_fill_mode_lower);\n      rocsparse_set_mat_diag_type(descr, rocsparse_diag_type_unit);\n\n      // Create matrix info structure\n      rocsparse_mat_info info;\n      rocsparse_create_mat_info(&info);\n\n      // Obtain required buffer size\n      size_t buffer_size;\n      rocsparse_dcsrsv_buffer_size(handle,\n                                   rocsparse_operation_none,\n                                   m,\n                                   nnz,\n                                   descr,\n                                   csr_val,\n                                   csr_row_ptr,\n                                   csr_col_ind,\n                                   info,\n                                   &buffer_size);\n\n      // Allocate temporary buffer\n      void* temp_buffer;\n      hipMalloc(&temp_buffer, buffer_size);\n\n      // Perform analysis step\n      rocsparse_dcsrsv_analysis(handle,\n                                rocsparse_operation_none,\n                                m,\n                                nnz,\n                                descr,\n                                csr_val,\n                                csr_row_ptr,\n                                csr_col_ind,\n                                info,\n                                rocsparse_analysis_policy_reuse,\n                                rocsparse_solve_policy_auto,\n                                temp_buffer);\n\n      // Solve Ly = x\n      rocsparse_dcsrsv_solve(handle,\n                             rocsparse_operation_none,\n                             m,\n                             nnz,\n                             &alpha,\n                             descr,\n                             csr_val,\n                             csr_row_ptr,\n                             csr_col_ind,\n                             info,\n                             x,\n                             y,\n                             rocsparse_solve_policy_auto,\n                             temp_buffer);\n\n      // No zero pivot should be found, with L having unit diagonal\n\n      // Clean up\n      hipFree(temp_buffer);\n      rocsparse_destroy_mat_info(info);\n      rocsparse_destroy_mat_descr(descr);\n      rocsparse_destroy_handle(handle);\n  \\endcode\n/\n/**@{"]
-    pub fn rocsparse_scsrsv_solve(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const f32,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f32,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        x: *const f32,
-        y: *mut f32,
-        policy: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcsrsv_solve(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const f64,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f64,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        x: *const f64,
-        y: *mut f64,
-        policy: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccsrsv_solve(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const rocsparse_float_complex,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_float_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        x: *const rocsparse_float_complex,
-        y: *mut rocsparse_float_complex,
-        policy: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcsrsv_solve(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const rocsparse_double_complex,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_double_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        x: *const rocsparse_double_complex,
-        y: *mut rocsparse_double_complex,
-        policy: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Sparse iterative triangular solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csritsv_solve solves iteratively with the use of the Jacobi method a sparse triangular linear system of a sparse\n  \\f$m \\times m\\f$ matrix, defined in CSR storage format, a dense solution vector\n  \\f$y\\f$ and the right-hand side \\f$x\\f$ that is multiplied by \\f$\\alpha\\f$, such that\n  \\f[\n    op(A) \\cdot y = \\alpha \\cdot x,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\p rocsparse_csritsv_solve requires a user allocated temporary buffer. Its size is\n  returned by rocsparse_scsritsv_buffer_size(), rocsparse_dcsritsv_buffer_size(),\n  rocsparse_ccsritsv_buffer_size() or rocsparse_zcsritsv_buffer_size(). Furthermore,\n  analysis meta data is required. It can be obtained by rocsparse_scsritsv_analysis(),\n  rocsparse_dcsritsv_analysis(), rocsparse_ccsritsv_analysis() or\n  rocsparse_zcsritsv_analysis(). \\p rocsparse_csritsv_solve reports the first zero pivot\n  (either numerical or structural zero). The zero pivot status can be checked calling\n  rocsparse_csritsv_zero_pivot(). If\n  \\ref rocsparse_diag_type == \\ref rocsparse_diag_type_unit, no zero pivot will be\n  reported, even if \\f$A_{j,j} = 0\\f$ for some \\f$j\\f$.\n\n  \\note\n  The sparse CSR matrix has to be sorted. This can be achieved by calling\n  rocsparse_csrsort().\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[inout]\n  host_nmaxiter     maximum number of iteration on input and maximum number of iteration on output.\n  @param[in]\n  host_tol          if the pointer is null then loop will execute \\p nmaxiter[0] iterations.\n  @param[out]\n  host_history      (optional, record history)\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start\n              of every row of the sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[in]\n  x           array of \\p m elements, holding the right-hand side.\n  @param[out]\n  y           array of \\p m elements, holding the solution.\n  @param[in]\n  policy      \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p csr_val,\n              \\p csr_row_ptr, \\p csr_col_ind, \\p x or \\p y pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general and \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_triangular.\n\n  \\par Example\n  Consider the lower triangular \\f$m \\times m\\f$ matrix \\f$L\\f$, stored in CSR\n  storage format with unit diagonal. The following example solves \\f$L \\cdot y = x\\f$.\n  \\code{.c}\n      // Create rocSPARSE handle\n      rocsparse_handle handle;\n      rocsparse_create_handle(&handle);\n\n      // Create matrix descriptor\n      rocsparse_mat_descr descr;\n      rocsparse_create_mat_descr(&descr);\n      rocsparse_set_mat_fill_mode(descr, rocsparse_fill_mode_lower);\n      rocsparse_set_mat_diag_type(descr, rocsparse_diag_type_unit);\n\n      // Create matrix info structure\n      rocsparse_mat_info info;\n      rocsparse_create_mat_info(&info);\n\n      // Obtain required buffer size\n      size_t buffer_size;\n      rocsparse_dcsritsv_buffer_size(handle,\n                                   rocsparse_operation_none,\n                                   m,\n                                   nnz,\n                                   descr,\n                                   csr_val,\n                                   csr_row_ptr,\n                                   csr_col_ind,\n                                   info,\n                                   &buffer_size);\n\n      // Allocate temporary buffer\n      void* temp_buffer;\n      hipMalloc(&temp_buffer, buffer_size);\n\n      // Perform analysis step\n      rocsparse_dcsritsv_analysis(handle,\n                                rocsparse_operation_none,\n                                m,\n                                nnz,\n                                descr,\n                                csr_val,\n                                csr_row_ptr,\n                                csr_col_ind,\n                                info,\n                                rocsparse_analysis_policy_reuse,\n                                rocsparse_solve_policy_auto,\n                                temp_buffer);\n\n      // Solve Ly = x\n      rocsparse_int nmaxiter = 200;\n      rocsparse_int maxiter = nmaxiter;\n      tol = 1.0e-4;\n      history[200];\n      rocsparse_dcsritsv_solve(handle,\n                             &maxiter,\n                             &tol,\n                             history,\n                             rocsparse_operation_none,\n                             m,\n                             nnz,\n                             &alpha,\n                             descr,\n                             csr_val,\n                             csr_row_ptr,\n                             csr_col_ind,\n                             info,\n                             x,\n                             y,\n                             rocsparse_solve_policy_auto,\n                             temp_buffer);\n\n      if (maxiter < nmaxiter) {} // convergence\n      else {} // non converged\n      for (int i=0;i<maxiter;++i) printf(\"iter = %d, max residual=%e\\n\", iter, history[i]);\n      // No zero pivot should be found, with L having unit diagonal\n\n      // Clean up\n      hipFree(temp_buffer);\n      rocsparse_destroy_mat_info(info);\n      rocsparse_destroy_mat_descr(descr);\n      rocsparse_destroy_handle(handle);\n  \\endcode\n/\n/**@{"]
-    pub fn rocsparse_scsritsv_solve(
-        handle: rocsparse_handle,
-        host_nmaxiter: *mut rocsparse_int,
-        host_tol: *const f32,
-        host_history: *mut f32,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const f32,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f32,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        x: *const f32,
-        y: *mut f32,
-        policy: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcsritsv_solve(
-        handle: rocsparse_handle,
-        host_nmaxiter: *mut rocsparse_int,
-        host_tol: *const f64,
-        host_history: *mut f64,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const f64,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f64,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        x: *const f64,
-        y: *mut f64,
-        policy: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccsritsv_solve(
-        handle: rocsparse_handle,
-        host_nmaxiter: *mut rocsparse_int,
-        host_tol: *const f32,
-        host_history: *mut f32,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const rocsparse_float_complex,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_float_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        x: *const rocsparse_float_complex,
-        y: *mut rocsparse_float_complex,
-        policy: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcsritsv_solve(
-        handle: rocsparse_handle,
-        host_nmaxiter: *mut rocsparse_int,
-        host_tol: *const f64,
-        host_history: *mut f64,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const rocsparse_double_complex,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_double_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        x: *const rocsparse_double_complex,
-        y: *mut rocsparse_double_complex,
-        policy: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Sparse matrix vector multiplication using ELL storage format\n\n  \\details\n  \\p rocsparse_ellmv multiplies the scalar \\f$\\alpha\\f$ with a sparse \\f$m \\times n\\f$\n  matrix, defined in ELL storage format, and the dense vector \\f$x\\f$ and adds the\n  result to the dense vector \\f$y\\f$ that is multiplied by the scalar \\f$\\beta\\f$,\n  such that\n  \\f[\n    y := \\alpha \\cdot op(A) \\cdot x + \\beta \\cdot y,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\code{.c}\n      for(i = 0; i < m; ++i)\n      {\n          y[i] = beta * y[i];\n\n          for(p = 0; p < ell_width; ++p)\n          {\n              idx = p * m + i;\n\n              if((ell_col_ind[idx] >= 0) && (ell_col_ind[idx] < n))\n              {\n                  y[i] = y[i] + alpha * ell_val[idx] * x[ell_col_ind[idx]];\n              }\n          }\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  m           number of rows of the sparse ELL matrix.\n  @param[in]\n  n           number of columns of the sparse ELL matrix.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse ELL matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  ell_val     array that contains the elements of the sparse ELL matrix. Padded\n              elements should be zero.\n  @param[in]\n  ell_col_ind array that contains the column indices of the sparse ELL matrix.\n              Padded column indices should be -1.\n  @param[in]\n  ell_width   number of non-zero elements per row of the sparse ELL matrix.\n  @param[in]\n  x           array of \\p n elements (\\f$op(A) == A\\f$) or \\p m elements\n              (\\f$op(A) == A^T\\f$ or \\f$op(A) == A^H\\f$).\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  y           array of \\p m elements (\\f$op(A) == A\\f$) or \\p n elements\n              (\\f$op(A) == A^T\\f$ or \\f$op(A) == A^H\\f$).\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p ell_width is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p ell_val,\n              \\p ell_col_ind, \\p x, \\p beta or \\p y pointer is invalid.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
-    pub fn rocsparse_sellmv(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        alpha: *const f32,
-        descr: rocsparse_mat_descr,
-        ell_val: *const f32,
-        ell_col_ind: *const rocsparse_int,
-        ell_width: rocsparse_int,
-        x: *const f32,
-        beta: *const f32,
-        y: *mut f32,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dellmv(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        alpha: *const f64,
-        descr: rocsparse_mat_descr,
-        ell_val: *const f64,
-        ell_col_ind: *const rocsparse_int,
-        ell_width: rocsparse_int,
-        x: *const f64,
-        beta: *const f64,
-        y: *mut f64,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cellmv(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        alpha: *const rocsparse_float_complex,
-        descr: rocsparse_mat_descr,
-        ell_val: *const rocsparse_float_complex,
-        ell_col_ind: *const rocsparse_int,
-        ell_width: rocsparse_int,
-        x: *const rocsparse_float_complex,
-        beta: *const rocsparse_float_complex,
-        y: *mut rocsparse_float_complex,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zellmv(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        alpha: *const rocsparse_double_complex,
-        descr: rocsparse_mat_descr,
-        ell_val: *const rocsparse_double_complex,
-        ell_col_ind: *const rocsparse_int,
-        ell_width: rocsparse_int,
-        x: *const rocsparse_double_complex,
-        beta: *const rocsparse_double_complex,
-        y: *mut rocsparse_double_complex,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Sparse matrix vector multiplication using HYB storage format\n\n  \\details\n  \\p rocsparse_hybmv multiplies the scalar \\f$\\alpha\\f$ with a sparse \\f$m \\times n\\f$\n  matrix, defined in HYB storage format, and the dense vector \\f$x\\f$ and adds the\n  result to the dense vector \\f$y\\f$ that is multiplied by the scalar \\f$\\beta\\f$,\n  such that\n  \\f[\n    y := \\alpha \\cdot op(A) \\cdot x + \\beta \\cdot y,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse HYB matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  hyb         matrix in HYB storage format.\n  @param[in]\n  x           array of \\p n elements (\\f$op(A) == A\\f$) or \\p m elements\n              (\\f$op(A) == A^T\\f$ or \\f$op(A) == A^H\\f$).\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  y           array of \\p m elements (\\f$op(A) == A\\f$) or \\p n elements\n              (\\f$op(A) == A^T\\f$ or \\f$op(A) == A^H\\f$).\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p hyb structure was not initialized with\n              valid matrix sizes.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p hyb, \\p x,\n              \\p beta or \\p y pointer is invalid.\n  \\retval     rocsparse_status_invalid_value \\p hyb structure was not initialized\n              with a valid partitioning type.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_memory_error the buffer could not be allocated.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans != \\ref rocsparse_operation_none or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
-    pub fn rocsparse_shybmv(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        alpha: *const f32,
-        descr: rocsparse_mat_descr,
-        hyb: rocsparse_hyb_mat,
-        x: *const f32,
-        beta: *const f32,
-        y: *mut f32,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dhybmv(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        alpha: *const f64,
-        descr: rocsparse_mat_descr,
-        hyb: rocsparse_hyb_mat,
-        x: *const f64,
-        beta: *const f64,
-        y: *mut f64,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_chybmv(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        alpha: *const rocsparse_float_complex,
-        descr: rocsparse_mat_descr,
-        hyb: rocsparse_hyb_mat,
-        x: *const rocsparse_float_complex,
-        beta: *const rocsparse_float_complex,
-        y: *mut rocsparse_float_complex,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zhybmv(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        alpha: *const rocsparse_double_complex,
-        descr: rocsparse_mat_descr,
-        hyb: rocsparse_hyb_mat,
-        x: *const rocsparse_double_complex,
-        beta: *const rocsparse_double_complex,
-        y: *mut rocsparse_double_complex,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Sparse matrix vector multiplication using GEBSR storage format\n\n  \\details\n  \\p rocsparse_gebsrmv multiplies the scalar \\f$\\alpha\\f$ with a sparse\n  \\f$(mb \\cdot \\text{row_block_dim}) \\times (nb \\cdot \\text{col_block_dim})\\f$\n  matrix, defined in GEBSR storage format, and the dense vector \\f$x\\f$ and adds the\n  result to the dense vector \\f$y\\f$ that is multiplied by the scalar \\f$\\beta\\f$,\n  such that\n  \\f[\n    y := \\alpha \\cdot op(A) \\cdot x + \\beta \\cdot y,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  Currently, only \\p trans == \\ref rocsparse_operation_none is supported.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         matrix storage of GEBSR blocks.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  mb          number of block rows of the sparse GEBSR matrix.\n  @param[in]\n  nb          number of block columns of the sparse GEBSR matrix.\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse GEBSR matrix.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse GEBSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  bsr_val     array of \\p nnzb blocks of the sparse GEBSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of\n              the sparse GEBSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnz containing the block column indices of the sparse\n              GEBSR matrix.\n  @param[in]\n  row_block_dim row block dimension of the sparse GEBSR matrix.\n  @param[in]\n  col_block_dim column block dimension of the sparse GEBSR matrix.\n  @param[in]\n  x           array of \\p nb*col_block_dim elements (\\f$op(A) = A\\f$) or \\p mb*row_block_dim\n              elements (\\f$op(A) = A^T\\f$ or \\f$op(A) = A^H\\f$).\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  y           array of \\p mb*row_block_dim elements (\\f$op(A) = A\\f$) or \\p nb*col_block_dim\n              elements (\\f$op(A) = A^T\\f$ or \\f$op(A) = A^H\\f$).\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nb, \\p nnzb, \\p row_block_dim\n              or \\p col_block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p bsr_val,\n              \\p bsr_row_ind, \\p bsr_col_ind, \\p x, \\p beta or \\p y pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans != \\ref rocsparse_operation_none or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
-    pub fn rocsparse_sgebsrmv(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans: rocsparse_operation,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const f32,
-        descr: rocsparse_mat_descr,
+        bsr_descr: rocsparse_mat_descr,
         bsr_val: *const f32,
         bsr_row_ptr: *const rocsparse_int,
         bsr_col_ind: *const rocsparse_int,
         row_block_dim: rocsparse_int,
         col_block_dim: rocsparse_int,
-        x: *const f32,
-        beta: *const f32,
-        y: *mut f32,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *mut f32,
+        csr_row_ptr: *mut rocsparse_int,
+        csr_col_ind: *mut rocsparse_int,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_dgebsrmv(
+    pub fn rocsparse_dgebsr2csr(
         handle: rocsparse_handle,
         dir: rocsparse_direction,
-        trans: rocsparse_operation,
         mb: rocsparse_int,
         nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const f64,
-        descr: rocsparse_mat_descr,
+        bsr_descr: rocsparse_mat_descr,
         bsr_val: *const f64,
         bsr_row_ptr: *const rocsparse_int,
         bsr_col_ind: *const rocsparse_int,
         row_block_dim: rocsparse_int,
         col_block_dim: rocsparse_int,
-        x: *const f64,
-        beta: *const f64,
-        y: *mut f64,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *mut f64,
+        csr_row_ptr: *mut rocsparse_int,
+        csr_col_ind: *mut rocsparse_int,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_cgebsrmv(
+    pub fn rocsparse_cgebsr2csr(
         handle: rocsparse_handle,
         dir: rocsparse_direction,
-        trans: rocsparse_operation,
         mb: rocsparse_int,
         nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const rocsparse_float_complex,
-        descr: rocsparse_mat_descr,
+        bsr_descr: rocsparse_mat_descr,
         bsr_val: *const rocsparse_float_complex,
         bsr_row_ptr: *const rocsparse_int,
         bsr_col_ind: *const rocsparse_int,
         row_block_dim: rocsparse_int,
         col_block_dim: rocsparse_int,
-        x: *const rocsparse_float_complex,
-        beta: *const rocsparse_float_complex,
-        y: *mut rocsparse_float_complex,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *mut rocsparse_float_complex,
+        csr_row_ptr: *mut rocsparse_int,
+        csr_col_ind: *mut rocsparse_int,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_zgebsrmv(
+    pub fn rocsparse_zgebsr2csr(
         handle: rocsparse_handle,
         dir: rocsparse_direction,
-        trans: rocsparse_operation,
         mb: rocsparse_int,
         nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const rocsparse_double_complex,
-        descr: rocsparse_mat_descr,
+        bsr_descr: rocsparse_mat_descr,
         bsr_val: *const rocsparse_double_complex,
         bsr_row_ptr: *const rocsparse_int,
         bsr_col_ind: *const rocsparse_int,
         row_block_dim: rocsparse_int,
         col_block_dim: rocsparse_int,
-        x: *const rocsparse_double_complex,
-        beta: *const rocsparse_double_complex,
-        y: *mut rocsparse_double_complex,
+        csr_descr: rocsparse_mat_descr,
+        csr_val: *mut rocsparse_double_complex,
+        csr_row_ptr: *mut rocsparse_int,
+        csr_col_ind: *mut rocsparse_int,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Dense matrix sparse vector multiplication\n\n  \\details\n  \\p rocsparse_gemvi_buffer_size returns the size of the temporary storage buffer\n  required by rocsparse_sgemvi(), rocsparse_dgemvi(), rocsparse_cgemvi() or\n  rocsparse_zgemvi(). The temporary storage buffer must be allocated by the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  m           number of rows of the dense matrix.\n  @param[in]\n  n           number of columns of the dense matrix.\n  @param[in]\n  nnz         number of non-zero entries in the sparse vector.\n  @param[out]\n  buffer_size temporary storage buffer size.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n, or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p buffer_size pointer is invalid.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans != \\ref rocsparse_operation_none or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
-    pub fn rocsparse_sgemvi_buffer_size(
+    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse GEneral BSR matrix into a sparse GEneral BSC matrix\n\n  \\details\n  \\p rocsparse_gebsr2gebsc_buffer_size returns the size of the temporary storage buffer\n  required by rocsparse_sgebsr2gebsc(), rocsparse_dgebsr2gebsc(), rocsparse_cgebsr2gebsc() and\n  rocsparse_zgebsr2gebsc(). The temporary storage buffer must be allocated by the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  mb           number of rows of the sparse GEneral BSR matrix.\n  @param[in]\n  nb           number of columns of the sparse GEneral BSR matrix.\n  @param[in]\n  nnzb         number of non-zero entries of the sparse GEneral BSR matrix.\n  @param[in]\n  bsr_val     array of \\p nnzb*row_block_dim*col_block_dim containing the values of the sparse GEneral BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every row of the\n              sparse GEneral BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the column indices of the sparse\n              GEneral BSR matrix.\n  @param[in]\n  row_block_dim   row size of the blocks in the sparse general BSR matrix.\n  @param[in]\n  col_block_dim   col size of the blocks in the sparse general BSR matrix.\n\n  @param[out]\n  p_buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_sgebsr2gebsc(), rocsparse_dgebsr2gebsc(), rocsparse_cgebsr2gebsc() and\n              rocsparse_zgebsr2gebsc().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nb or \\p nnzb is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p bsr_row_ptr, \\p bsr_col_ind or\n              \\p buffer_size pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n/\n/**@{"]
+    pub fn rocsparse_sgebsr2gebsc_buffer_size(
         handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        bsr_val: *const f32,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        p_buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dgebsr2gebsc_buffer_size(
+        handle: rocsparse_handle,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        bsr_val: *const f64,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        p_buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cgebsr2gebsc_buffer_size(
+        handle: rocsparse_handle,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        bsr_val: *const rocsparse_float_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        p_buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zgebsr2gebsc_buffer_size(
+        handle: rocsparse_handle,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        bsr_val: *const rocsparse_double_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        p_buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse GEneral BSR matrix into a sparse GEneral BSC matrix\n\n  \\details\n  \\p rocsparse_gebsr2gebsc converts a GEneral BSR matrix into a GEneral BSC matrix. \\p rocsparse_gebsr2gebsc\n  can also be used to convert a GEneral BSC matrix into a GEneral BSR matrix. \\p copy_values decides\n  whether \\p bsc_val is being filled during conversion (\\ref rocsparse_action_numeric)\n  or not (\\ref rocsparse_action_symbolic).\n\n  \\p rocsparse_gebsr2gebsc requires extra temporary storage buffer that has to be allocated\n  by the user. Storage buffer size can be determined by rocsparse_gebsr2gebsc_buffer_size().\n\n  \\note\n  The resulting matrix can also be seen as the transpose of the input matrix.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  mb          number of rows of the sparse GEneral BSR matrix.\n  @param[in]\n  nb          number of columns of the sparse GEneral BSR matrix.\n  @param[in]\n  nnzb        number of non-zero entries of the sparse GEneral BSR matrix.\n  @param[in]\n  bsr_val     array of \\p nnzb * \\p row_block_dim * \\p col_block_dim  elements of the sparse GEneral BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse GEneral BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              GEneral BSR matrix.\n  @param[in]\n  row_block_dim   row size of the blocks in the sparse general BSR matrix.\n  @param[in]\n  col_block_dim   col size of the blocks in the sparse general BSR matrix.\n  @param[out]\n  bsc_val     array of \\p nnz elements of the sparse BSC matrix.\n  @param[out]\n  bsc_row_ind array of \\p nnz elements containing the row indices of the sparse BSC\n              matrix.\n  @param[out]\n  bsc_col_ptr array of \\p n+1 elements that point to the start of every column of the\n              sparse BSC matrix.\n  @param[in]\n  copy_values \\ref rocsparse_action_symbolic or \\ref rocsparse_action_numeric.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user, size is returned by\n              rocsparse_gebsr2gebsc_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nb or \\p nnzb is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p bsr_val, \\p bsr_row_ptr,\n              \\p bsr_col_ind, \\p bsc_val, \\p bsc_row_ind, \\p bsc_col_ptr or\n              \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n\n  \\par Example\n  This example computes the transpose of a GEneral BSR matrix.\n  \\code{.c}\n      //     1 2 0 3\n      // A = 0 4 5 0\n      //     6 0 0 7\n      //     1 2 3 4\n\n      rocsparse_int mb_A   = 2;\n      rocsparse_int row_block_dim = 2;\n      rocsparse_int col_block_dim = 2;\n      rocsparse_int nb_A   = 2;\n      rocsparse_int nnzb_A = 4;\n\n      bsr_row_ptr_A[mb_A+1] = {0, 2, 4};               // device memory\n      bsr_col_ind_A[nnzb_A] = {0, 1, 0, 1}; // device memory\n      bsr_val_A[nnzb_A]     = {1, 0, 2, 4, 0, 5, 3, 0, 6, 1, 0, 2, 0, 3, 7, 4}; // device memory\n\n      // Allocate memory for transposed BSR matrix\n      rocsparse_int mb_T   = nb_A;\n      rocsparse_int nb_T   = mb_A;\n      rocsparse_int nnzb_T = nnzb_A;\n\n      rocsparse_int* bsr_row_ptr_T;\n      rocsparse_int* bsr_col_ind_T;\n      float* bsr_val_T;\n\n      hipMalloc((void**)&bsr_row_ptr_T, sizeof(rocsparse_int) * (mb_T + 1));\n      hipMalloc((void**)&bsr_col_ind_T, sizeof(rocsparse_int) * nnzb_T);\n      hipMalloc((void**)&bsr_val_T, sizeof(float) * nnzb_T);\n\n      // Obtain the temporary buffer size\n      size_t buffer_size;\n      rocsparse_gebsr2gebsc_buffer_size(handle,\n                                    mb_A,\n                                    nb_A,\n                                    nnzb_A,\n                                    bsr_row_ptr_A,\n                                    bsr_col_ind_A,\n                                    rocsparse_action_numeric,\n                                    &buffer_size);\n\n      // Allocate temporary buffer\n      void* temp_buffer;\n      hipMalloc(&temp_buffer, buffer_size);\n\n      rocsparse_sgebsr2gebsc(handle,\n                         mb_A,\n                         nb_A,\n                         nnzb_A,\n                         bsr_val_A,\n                         bsr_row_ptr_A,\n                         bsr_col_ind_A,\n                         row_block_dim,\n                         col_block_dim,\n                         bsr_val_T,\n                         bsr_col_ind_T,\n                         bsr_row_ptr_T,\n                         rocsparse_action_numeric,\n                         rocsparse_index_base_zero,\n                         temp_buffer);\n  \\endcode\n/\n/**@{"]
+    pub fn rocsparse_sgebsr2gebsc(
+        handle: rocsparse_handle,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        bsr_val: *const f32,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        bsc_val: *mut f32,
+        bsc_row_ind: *mut rocsparse_int,
+        bsc_col_ptr: *mut rocsparse_int,
+        copy_values: rocsparse_action,
+        idx_base: rocsparse_index_base,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dgebsr2gebsc(
+        handle: rocsparse_handle,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        bsr_val: *const f64,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        bsc_val: *mut f64,
+        bsc_row_ind: *mut rocsparse_int,
+        bsc_col_ptr: *mut rocsparse_int,
+        copy_values: rocsparse_action,
+        idx_base: rocsparse_index_base,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cgebsr2gebsc(
+        handle: rocsparse_handle,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        bsr_val: *const rocsparse_float_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        bsc_val: *mut rocsparse_float_complex,
+        bsc_row_ind: *mut rocsparse_int,
+        bsc_col_ptr: *mut rocsparse_int,
+        copy_values: rocsparse_action,
+        idx_base: rocsparse_index_base,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zgebsr2gebsc(
+        handle: rocsparse_handle,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        bsr_val: *const rocsparse_double_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        bsc_val: *mut rocsparse_double_complex,
+        bsc_row_ind: *mut rocsparse_int,
+        bsc_col_ptr: *mut rocsparse_int,
+        copy_values: rocsparse_action,
+        idx_base: rocsparse_index_base,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief\n  This function computes the the size of the user allocated temporary storage buffer used when converting a sparse\n  general BSR matrix to another sparse general BSR matrix.\n\n  \\details\n  \\p rocsparse_gebsr2gebsr_buffer_size returns the size of the temporary storage buffer\n  that is required by rocsparse_gebsr2gebsr_nnz(), rocsparse_sgebsr2gebsr(), rocsparse_dgebsr2gebsr(),\n  rocsparse_cgebsr2gebsr(), and rocsparse_zgebsr2gebsr(). The temporary\n  storage buffer must be allocated by the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  dir         the storage format of the blocks, \\ref rocsparse_direction_row or \\ref rocsparse_direction_column\n\n  @param[in]\n  mb           number of block rows of the general BSR sparse matrix \\p A.\n\n  @param[in]\n  nb           number of block columns of the general BSR sparse matrix \\p A.\n\n  @param[in]\n  nnzb         number of blocks in the general BSR sparse matrix \\p A.\n\n  @param[in]\n  descr_A      the descriptor of the general BSR sparse matrix \\p A, the supported matrix type is rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[in]\n  bsr_val_A    array of \\p nnzb*row_block_dim_A*col_block_dim_A containing the values of the sparse general BSR matrix \\p A.\n\n  @param[in]\n  bsr_row_ptr_A array of \\p mb+1 elements that point to the start of every block row of the\n              sparse general BSR matrix \\p A.\n  @param[in]\n  bsr_col_ind_A array of \\p nnzb elements containing the block column indices of the sparse general BSR matrix \\p A.\n\n  @param[in]\n  row_block_dim_A   row size of the blocks in the sparse general BSR matrix \\p A.\n\n  @param[in]\n  col_block_dim_A   column size of the blocks in the sparse general BSR matrix \\p A.\n\n  @param[in]\n  row_block_dim_C   row size of the blocks in the sparse general BSR matrix \\p C.\n\n  @param[in]\n  col_block_dim_C   column size of the blocks in the sparse general BSR matrix \\p C.\n\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by rocsparse_gebsr2gebsr_nnz(),\n              rocsparse_sgebsr2gebsr(), rocsparse_dgebsr2gebsr(), rocsparse_cgebsr2gebsr(), and rocsparse_zgebsr2gebsr().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb or \\p nb or \\p nnzb or \\p row_block_dim_A or\n              \\p col_block_dim_A or \\p row_block_dim_C or \\p col_block_dim_C is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p bsr_row_ptr_A or \\p bsr_col_ind_A\n              or \\p descr_A or \\p buffer_size pointer is invalid.\n/\n/**@{"]
+    pub fn rocsparse_sgebsr2gebsr_buffer_size(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr_A: rocsparse_mat_descr,
+        bsr_val_A: *const f32,
+        bsr_row_ptr_A: *const rocsparse_int,
+        bsr_col_ind_A: *const rocsparse_int,
+        row_block_dim_A: rocsparse_int,
+        col_block_dim_A: rocsparse_int,
+        row_block_dim_C: rocsparse_int,
+        col_block_dim_C: rocsparse_int,
         buffer_size: *mut usize,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_dgemvi_buffer_size(
+    pub fn rocsparse_dgebsr2gebsr_buffer_size(
         handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
+        dir: rocsparse_direction,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr_A: rocsparse_mat_descr,
+        bsr_val_A: *const f64,
+        bsr_row_ptr_A: *const rocsparse_int,
+        bsr_col_ind_A: *const rocsparse_int,
+        row_block_dim_A: rocsparse_int,
+        col_block_dim_A: rocsparse_int,
+        row_block_dim_C: rocsparse_int,
+        col_block_dim_C: rocsparse_int,
         buffer_size: *mut usize,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_cgemvi_buffer_size(
+    pub fn rocsparse_cgebsr2gebsr_buffer_size(
         handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
+        dir: rocsparse_direction,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr_A: rocsparse_mat_descr,
+        bsr_val_A: *const rocsparse_float_complex,
+        bsr_row_ptr_A: *const rocsparse_int,
+        bsr_col_ind_A: *const rocsparse_int,
+        row_block_dim_A: rocsparse_int,
+        col_block_dim_A: rocsparse_int,
+        row_block_dim_C: rocsparse_int,
+        col_block_dim_C: rocsparse_int,
         buffer_size: *mut usize,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_zgemvi_buffer_size(
+    pub fn rocsparse_zgebsr2gebsr_buffer_size(
         handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
+        dir: rocsparse_direction,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr_A: rocsparse_mat_descr,
+        bsr_val_A: *const rocsparse_double_complex,
+        bsr_row_ptr_A: *const rocsparse_int,
+        bsr_col_ind_A: *const rocsparse_int,
+        row_block_dim_A: rocsparse_int,
+        col_block_dim_A: rocsparse_int,
+        row_block_dim_C: rocsparse_int,
+        col_block_dim_C: rocsparse_int,
         buffer_size: *mut usize,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup level2_module\n  \\brief Dense matrix sparse vector multiplication\n\n  \\details\n  \\p rocsparse_gemvi multiplies the scalar \\f$\\alpha\\f$ with a dense \\f$m \\times n\\f$\n  matrix \\f$A\\f$ and the sparse vector \\f$x\\f$ and adds the result to the dense vector\n  \\f$y\\f$ that is multiplied by the scalar \\f$\\beta\\f$, such that\n  \\f[\n    y := \\alpha \\cdot op(A) \\cdot x + \\beta \\cdot y,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\p rocsparse_gemvi requires a user allocated temporary buffer. Its size is returned\n  by rocsparse_sgemvi_buffer_size(), rocsparse_dgemvi_buffer_size(),\n  rocsparse_cgemvi_buffer_size() or rocsparse_zgemvi_buffer_size().\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  Currently, only \\p trans == \\ref rocsparse_operation_none is supported.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  m           number of rows of the dense matrix.\n  @param[in]\n  n           number of columns of the dense matrix.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  A           pointer to the dense matrix.\n  @param[in]\n  lda         leading dimension of the dense matrix\n  @param[in]\n  nnz         number of non-zero entries in the sparse vector\n  @param[in]\n  x_val       array of \\p nnz elements containing the values of the sparse vector\n  @param[in]\n  x_ind       array of \\p nnz elements containing the indices of the sparse vector\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  y           array of \\p m elements (\\f$op(A) == A\\f$) or \\p n elements\n              (\\f$op(A) == A^T\\f$ or \\f$op(A) == A^H\\f$).\n  @param[in]\n  idx_base    rocsparse_index_base_zero or rocsparse_index_base_one.\n  @param[in]\n  temp_buffer temporary storage buffer\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n, \\p lda or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p alpha, \\p A, \\p x_val, \\p x_ind,\n              \\p beta, \\p y or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans != \\ref rocsparse_operation_none or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
-    pub fn rocsparse_sgemvi(
+    #[doc = " \\ingroup conv_module\n  \\brief This function is used when converting a general BSR sparse matrix \\p A to another general BSR sparse matrix \\p C.\n  Specifically, this function determines the number of non-zero blocks that will exist in \\p C (stored using either a host\n  or device pointer), and computes the row pointer array for \\p C.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  dir         the storage format of the blocks, \\ref rocsparse_direction_row or \\ref rocsparse_direction_column\n\n  @param[in]\n  mb           number of block rows of the general BSR sparse matrix \\p A.\n\n  @param[in]\n  nb           number of block columns of the general BSR sparse matrix \\p A.\n\n  @param[in]\n  nnzb         number of blocks in the general BSR sparse matrix \\p A.\n\n  @param[in]\n  descr_A      the descriptor of the general BSR sparse matrix \\p A, the supported matrix type is rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[in]\n  bsr_row_ptr_A array of \\p mb+1 elements that point to the start of every block row of the\n              sparse general BSR matrix \\p A.\n  @param[in]\n  bsr_col_ind_A array of \\p nnzb elements containing the block column indices of the sparse general BSR matrix \\p A.\n\n  @param[in]\n  row_block_dim_A   row size of the blocks in the sparse general BSR matrix \\p A.\n\n  @param[in]\n  col_block_dim_A   column size of the blocks in the sparse general BSR matrix \\p A.\n\n  @param[in]\n  descr_C      the descriptor of the general BSR sparse matrix \\p C, the supported matrix type is rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[in]\n  bsr_row_ptr_C array of \\p mb_C+1 elements that point to the start of every block row of the\n              sparse general BSR matrix \\p C where \\p mb_C=(m+row_block_dim_C-1)/row_block_dim_C.\n  @param[in]\n  row_block_dim_C   row size of the blocks in the sparse general BSR matrix \\p C.\n\n  @param[in]\n  col_block_dim_C   column size of the blocks in the sparse general BSR matrix \\p C.\n\n  @param[out]\n  nnz_total_dev_host_ptr\n              total number of nonzero blocks in general BSR sparse matrix \\p C stored using device or host memory.\n\n  @param[out]\n  temp_buffer\n              buffer allocated by the user whose size is determined by calling rocsparse_xgebsr2gebsr_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb or \\p nb or \\p nnzb or \\p row_block_dim_A or\n              \\p col_block_dim_A or \\p row_block_dim_C or \\p col_block_dim_C is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p bsr_row_ptr_A or \\p bsr_col_ind_A\n              or \\p bsr_row_ptr_C or \\p descr_A or \\p descr_C or \\p temp_buffer pointer is invalid."]
+    pub fn rocsparse_gebsr2gebsr_nnz(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr_A: rocsparse_mat_descr,
+        bsr_row_ptr_A: *const rocsparse_int,
+        bsr_col_ind_A: *const rocsparse_int,
+        row_block_dim_A: rocsparse_int,
+        col_block_dim_A: rocsparse_int,
+        descr_C: rocsparse_mat_descr,
+        bsr_row_ptr_C: *mut rocsparse_int,
+        row_block_dim_C: rocsparse_int,
+        col_block_dim_C: rocsparse_int,
+        nnz_total_dev_host_ptr: *mut rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief\n  This function converts the general BSR sparse matrix \\p A to another general BSR sparse matrix \\p C.\n\n  \\details\n  The conversion uses three steps. First, the user calls rocsparse_xgebsr2gebsr_buffer_size() to determine the size of\n  the required temporary storage buffer. The user then allocates this buffer. Secondly, the user then allocates \\p mb_C+1\n  integers for the row pointer array for \\p C where \\p mb_C=(m+row_block_dim_C-1)/row_block_dim_C. The user then calls\n  rocsparse_xgebsr2gebsr_nnz() to fill in the row pointer array for \\p C ( \\p bsr_row_ptr_C ) and determine the number of\n  non-zero blocks that will exist in \\p C. Finally, the user allocates space for the colimn indices array of \\p C to have\n  \\p nnzb_C elements and space for the values array of \\p C to have \\p nnzb_C*roc_block_dim_C*col_block_dim_C and then calls\n  rocsparse_xgebsr2gebsr() to complete the conversion.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  dir         the storage format of the blocks, \\ref rocsparse_direction_row or \\ref rocsparse_direction_column\n\n  @param[in]\n  mb           number of block rows of the general BSR sparse matrix \\p A.\n\n  @param[in]\n  nb           number of block columns of the general BSR sparse matrix \\p A.\n\n  @param[in]\n  nnzb         number of blocks in the general BSR sparse matrix \\p A.\n\n  @param[in]\n  descr_A      the descriptor of the general BSR sparse matrix \\p A, the supported matrix type is rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[in]\n  bsr_val_A    array of \\p nnzb*row_block_dim_A*col_block_dim_A containing the values of the sparse general BSR matrix \\p A.\n\n  @param[in]\n  bsr_row_ptr_A array of \\p mb+1 elements that point to the start of every block row of the\n              sparse general BSR matrix \\p A.\n  @param[in]\n  bsr_col_ind_A array of \\p nnzb elements containing the block column indices of the sparse general BSR matrix \\p A.\n\n  @param[in]\n  row_block_dim_A   row size of the blocks in the sparse general BSR matrix \\p A.\n\n  @param[in]\n  col_block_dim_A   column size of the blocks in the sparse general BSR matrix \\p A.\n\n  @param[in]\n  descr_C      the descriptor of the general BSR sparse matrix \\p C, the supported matrix type is rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[in]\n  bsr_val_C    array of \\p nnzb_C*row_block_dim_C*col_block_dim_C containing the values of the sparse general BSR matrix \\p C.\n\n  @param[in]\n  bsr_row_ptr_C array of \\p mb_C+1 elements that point to the start of every block row of the\n              sparse general BSR matrix \\p C.\n  @param[in]\n  bsr_col_ind_C array of \\p nnzb_C elements containing the block column indices of the sparse general BSR matrix \\p C.\n\n  @param[in]\n  row_block_dim_C   row size of the blocks in the sparse general BSR matrix \\p C.\n\n  @param[in]\n  col_block_dim_C   column size of the blocks in the sparse general BSR matrix \\p C.\n\n  @param[out]\n  temp_buffer\n              buffer allocated by the user whose size is determined by calling rocsparse_xgebsr2gebsr_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb or \\p nb or \\p nnzb or \\p row_block_dim_A or\n              \\p col_block_dim_A or \\p row_block_dim_C or \\p col_block_dim_C is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p bsr_row_ptr_A or \\p bsr_col_ind_A or \\p bsr_val_A\n              or \\p bsr_row_ptr_C or \\p bsr_col_ind_C or \\p bsr_val_C or \\p descr_A or \\p descr_C\n              or \\p temp_buffer pointer is invalid.\n/\n/**@{"]
+    pub fn rocsparse_sgebsr2gebsr(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr_A: rocsparse_mat_descr,
+        bsr_val_A: *const f32,
+        bsr_row_ptr_A: *const rocsparse_int,
+        bsr_col_ind_A: *const rocsparse_int,
+        row_block_dim_A: rocsparse_int,
+        col_block_dim_A: rocsparse_int,
+        descr_C: rocsparse_mat_descr,
+        bsr_val_C: *mut f32,
+        bsr_row_ptr_C: *mut rocsparse_int,
+        bsr_col_ind_C: *mut rocsparse_int,
+        row_block_dim_C: rocsparse_int,
+        col_block_dim_C: rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dgebsr2gebsr(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr_A: rocsparse_mat_descr,
+        bsr_val_A: *const f64,
+        bsr_row_ptr_A: *const rocsparse_int,
+        bsr_col_ind_A: *const rocsparse_int,
+        row_block_dim_A: rocsparse_int,
+        col_block_dim_A: rocsparse_int,
+        descr_C: rocsparse_mat_descr,
+        bsr_val_C: *mut f64,
+        bsr_row_ptr_C: *mut rocsparse_int,
+        bsr_col_ind_C: *mut rocsparse_int,
+        row_block_dim_C: rocsparse_int,
+        col_block_dim_C: rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cgebsr2gebsr(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr_A: rocsparse_mat_descr,
+        bsr_val_A: *const rocsparse_float_complex,
+        bsr_row_ptr_A: *const rocsparse_int,
+        bsr_col_ind_A: *const rocsparse_int,
+        row_block_dim_A: rocsparse_int,
+        col_block_dim_A: rocsparse_int,
+        descr_C: rocsparse_mat_descr,
+        bsr_val_C: *mut rocsparse_float_complex,
+        bsr_row_ptr_C: *mut rocsparse_int,
+        bsr_col_ind_C: *mut rocsparse_int,
+        row_block_dim_C: rocsparse_int,
+        col_block_dim_C: rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zgebsr2gebsr(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr_A: rocsparse_mat_descr,
+        bsr_val_A: *const rocsparse_double_complex,
+        bsr_row_ptr_A: *const rocsparse_int,
+        bsr_col_ind_A: *const rocsparse_int,
+        row_block_dim_A: rocsparse_int,
+        col_block_dim_A: rocsparse_int,
+        descr_C: rocsparse_mat_descr,
+        bsr_val_C: *mut rocsparse_double_complex,
+        bsr_row_ptr_C: *mut rocsparse_int,
+        bsr_col_ind_C: *mut rocsparse_int,
+        row_block_dim_C: rocsparse_int,
+        col_block_dim_C: rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse HYB matrix into a sparse CSR matrix\n\n  \\details\n  \\p rocsparse_hyb2csr_buffer_size returns the size of the temporary storage buffer\n  required by rocsparse_shyb2csr(), rocsparse_dhyb2csr(), rocsparse_chyb2csr() and\n  rocsparse_dhyb2csr(). The temporary storage buffer must be allocated by the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  descr           descriptor of the sparse HYB matrix. Currently, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  hyb             sparse matrix in HYB format.\n  @param[in]\n  csr_row_ptr     array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix.\n  @param[out]\n  buffer_size     number of bytes of the temporary storage buffer required by\n                  rocsparse_shyb2csr(), rocsparse_dhyb2csr(), rocsparse_chyb2csr() and\n                  rocsparse_zhyb2csr().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p hyb, \\p csr_row_ptr or\n              \\p buffer_size pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general."]
+    pub fn rocsparse_hyb2csr_buffer_size(
+        handle: rocsparse_handle,
+        descr: rocsparse_mat_descr,
+        hyb: rocsparse_hyb_mat,
+        csr_row_ptr: *const rocsparse_int,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse HYB matrix into a sparse CSR matrix\n\n  \\details\n  \\p rocsparse_hyb2csr converts a HYB matrix into a CSR matrix.\n\n  \\p rocsparse_hyb2csr requires extra temporary storage buffer that has to be allocated\n  by the user. Storage buffer size can be determined by\n  rocsparse_hyb2csr_buffer_size().\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  descr           descriptor of the sparse HYB matrix. Currently, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  hyb             sparse matrix in HYB format.\n  @param[out]\n  csr_val         array containing the values of the sparse CSR matrix.\n  @param[out]\n  csr_row_ptr     array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix.\n  @param[out]\n  csr_col_ind     array containing the column indices of the sparse CSR matrix.\n  @param[in]\n  temp_buffer     temporary storage buffer allocated by the user, size is returned by\n                  rocsparse_hyb2csr_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p hyb, \\p csr_val,\n              \\p csr_row_ptr, \\p csr_col_ind or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  This example converts a HYB matrix into a CSR matrix.\n  \\code{.c}\n      // Create CSR matrix arrays\n      rocsparse_int* csr_row_ptr;\n      rocsparse_int* csr_col_ind;\n      float* csr_val;\n\n      hipMalloc((void**)&csr_row_ptr, sizeof(rocsparse_int) * (m + 1));\n      hipMalloc((void**)&csr_col_ind, sizeof(rocsparse_int) * nnz);\n      hipMalloc((void**)&csr_val, sizeof(float) * nnz);\n\n      // Get required size of temporary buffer\n      size_t size;\n      rocsparse_hyb2csr_buffer_size(handle,\n                                    descr,\n                                    hyb,\n                                    csr_row_ptr,\n                                    &size);\n\n      // Allocate temporary buffer\n      void* buffer;\n      hipMalloc(&buffer, size);\n\n      // Perform the conversion\n      rocsparse_shyb2csr(handle,\n                         descr,\n                         hyb,\n                         csr_val,\n                         csr_row_ptr,\n                         csr_col_ind,\n                         buffer);\n  \\endcode\n/\n/**@{"]
+    pub fn rocsparse_shyb2csr(
+        handle: rocsparse_handle,
+        descr: rocsparse_mat_descr,
+        hyb: rocsparse_hyb_mat,
+        csr_val: *mut f32,
+        csr_row_ptr: *mut rocsparse_int,
+        csr_col_ind: *mut rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dhyb2csr(
+        handle: rocsparse_handle,
+        descr: rocsparse_mat_descr,
+        hyb: rocsparse_hyb_mat,
+        csr_val: *mut f64,
+        csr_row_ptr: *mut rocsparse_int,
+        csr_col_ind: *mut rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_chyb2csr(
+        handle: rocsparse_handle,
+        descr: rocsparse_mat_descr,
+        hyb: rocsparse_hyb_mat,
+        csr_val: *mut rocsparse_float_complex,
+        csr_row_ptr: *mut rocsparse_int,
+        csr_col_ind: *mut rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zhyb2csr(
+        handle: rocsparse_handle,
+        descr: rocsparse_mat_descr,
+        hyb: rocsparse_hyb_mat,
+        csr_val: *mut rocsparse_double_complex,
+        csr_row_ptr: *mut rocsparse_int,
+        csr_col_ind: *mut rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Create the identity map\n\n  \\details\n  \\p rocsparse_create_identity_permutation stores the identity map in \\p p, such that\n  \\f$p = 0:1:(n-1)\\f$.\n\n  \\code{.c}\n      for(i = 0; i < n; ++i)\n      {\n          p[i] = i;\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  n           size of the map \\p p.\n  @param[out]\n  p           array of \\p n integers containing the map.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p n is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p p pointer is invalid.\n\n  \\par Example\n  The following example creates an identity permutation.\n  \\code{.c}\n      rocsparse_int size = 200;\n\n      // Allocate memory to hold the identity map\n      rocsparse_int* perm;\n      hipMalloc((void**)&perm, sizeof(rocsparse_int) * size);\n\n      // Fill perm with the identity permutation\n      rocsparse_create_identity_permutation(handle, size, perm);\n  \\endcode"]
+    pub fn rocsparse_create_identity_permutation(
+        handle: rocsparse_handle,
+        n: rocsparse_int,
+        p: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Inverse a permutation vector.\n\n  \\details\n  \\p rocsparse_inverse_permutation computes\n\n  \\code{.c}\n      for(i = 0; i < n; ++i)\n      {\n          q[p[i]- base] = i + base;\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  n           size of the permutation vector \\p p.\n  @param[in]\n  p           array of \\p n integers containing the permutation vector to inverse.\n  @param[out]\n  q           array of \\p n integers containing the invsrse of the permutation vector.\n  @param[in]\n  base        \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p n is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p p pointer is invalid or \\p q pointer is invalid.\n  \\retval     rocsparse_status_invalid_value \\p base is invalid."]
+    pub fn rocsparse_inverse_permutation(
+        handle: rocsparse_handle,
+        n: rocsparse_int,
+        p: *const rocsparse_int,
+        q: *mut rocsparse_int,
+        base: rocsparse_index_base,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief\n  This function computes the number of nonzero elements per row or column and the total number of nonzero elements in a dense matrix.\n  \\details\n  The routine does support asynchronous execution if the pointer mode is set to device.\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  dir        direction that specified whether to count nonzero elements by \\ref rocsparse_direction_row or by \\ref rocsparse_direction_row.\n\n  @param[in]\n  m           number of rows of the dense matrix \\p A.\n\n  @param[in]\n  n           number of columns of the dense matrix \\p A.\n\n  @param[in]\n  descr      the descriptor of the dense matrix \\p A.\n\n  @param[in]\n  A           array of dimensions (\\p ld, \\p n)\n\n  @param[in]\n  ld         leading dimension of dense array \\p A.\n\n  @param[out]\n  nnz_per_row_columns\n              array of size \\p m or \\p n containing the number of nonzero elements per row or column, respectively.\n  @param[out]\n  nnz_total_dev_host_ptr\n              total number of nonzero elements in device or host memory.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p ld is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p A or \\p nnz_per_row_columns or \\p nnz_total_dev_host_ptr\n              pointer is invalid.\n/\n/**@{"]
+    pub fn rocsparse_snnz(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        A: *const f32,
+        ld: rocsparse_int,
+        nnz_per_row_columns: *mut rocsparse_int,
+        nnz_total_dev_host_ptr: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dnnz(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        A: *const f64,
+        ld: rocsparse_int,
+        nnz_per_row_columns: *mut rocsparse_int,
+        nnz_total_dev_host_ptr: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cnnz(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        A: *const rocsparse_float_complex,
+        ld: rocsparse_int,
+        nnz_per_row_columns: *mut rocsparse_int,
+        nnz_total_dev_host_ptr: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_znnz(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        A: *const rocsparse_double_complex,
+        ld: rocsparse_int,
+        nnz_per_row_columns: *mut rocsparse_int,
+        nnz_total_dev_host_ptr: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  Given a sparse CSR matrix and a non-negative tolerance, this function computes how many entries would be left\n  in each row of the matrix if elements less than the tolerance were removed. It also computes the total number\n  of remaining elements in the matrix.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle        handle to the rocsparse library context queue.\n\n  @param[in]\n  m             number of rows of the sparse CSR matrix.\n\n  @param[in]\n  descr_A       the descriptor of the sparse CSR matrix.\n\n  @param[in]\n  csr_val_A     array of \\p nnz_A elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr_A array of \\p m+1 elements that point to the start of every row of the\n                uncompressed sparse CSR matrix.\n  @param[out]\n  nnz_per_row   array of length \\p m containing the number of entries that will be kept per row in\n                the final compressed CSR matrix.\n  @param[out]\n  nnz_C         number of elements in the column indices and values arrays of the compressed\n                sparse CSR matrix. Can be either host or device pointer.\n  @param[in]\n  tol           the non-negative tolerance used for compression. If \\p tol is complex then only the magnitude\n                of the real part is used. Entries in the input uncompressed CSR array that are below the tolerance\n                are removed in output compressed CSR matrix.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n is invalid.\n  \\retval     rocsparse_status_invalid_value \\p tol is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_val_A or \\p csr_row_ptr_A or \\p nnz_per_row or \\p nnz_C\n              pointer is invalid.\n/\n/**@{"]
+    pub fn rocsparse_snnz_compress(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        descr_A: rocsparse_mat_descr,
+        csr_val_A: *const f32,
+        csr_row_ptr_A: *const rocsparse_int,
+        nnz_per_row: *mut rocsparse_int,
+        nnz_C: *mut rocsparse_int,
+        tol: f32,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dnnz_compress(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        descr_A: rocsparse_mat_descr,
+        csr_val_A: *const f64,
+        csr_row_ptr_A: *const rocsparse_int,
+        nnz_per_row: *mut rocsparse_int,
+        nnz_C: *mut rocsparse_int,
+        tol: f64,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cnnz_compress(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        descr_A: rocsparse_mat_descr,
+        csr_val_A: *const rocsparse_float_complex,
+        csr_row_ptr_A: *const rocsparse_int,
+        nnz_per_row: *mut rocsparse_int,
+        nnz_C: *mut rocsparse_int,
+        tol: rocsparse_float_complex,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_znnz_compress(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        descr_A: rocsparse_mat_descr,
+        csr_val_A: *const rocsparse_double_complex,
+        csr_row_ptr_A: *const rocsparse_int,
+        nnz_per_row: *mut rocsparse_int,
+        nnz_C: *mut rocsparse_int,
+        tol: rocsparse_double_complex,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Convert and prune sparse CSR matrix into a sparse CSR matrix\n\n  \\details\n  \\p rocsparse_prune_csr2csr_buffer_size returns the size of the temporary buffer that\n  is required by \\p rocsparse_sprune_csr2csr_nnz, \\p rocsparse_dprune_csr2csr_nnz,\n  \\p rocsparse_sprune_csr2csr, and \\p rocsparse_dprune_csr2csr. The temporary storage\n  buffer must be allocated by the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle        handle to the rocsparse library context queue.\n  @param[in]\n  m             number of rows in the sparse CSR matrix.\n  @param[in]\n  n             number of columns in the sparse CSR matrix.\n  @param[in]\n  nnz_A         number of non-zeros in the sparse CSR matrix A.\n  @param[in]\n  csr_descr_A   descriptor of the sparse CSR matrix A. Currently, only\n                \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val_A     array of \\p nnz_A elements containing the values of the sparse CSR matrix A.\n  @param[in]\n  csr_row_ptr_A array of \\p m+1 elements that point to the start of every row of the\n                sparse CSR matrix A.\n  @param[in]\n  csr_col_ind_A array of \\p nnz_A elements containing the column indices of the sparse CSR matrix A.\n  @param[in]\n  threshold     pointer to the non-negative pruning threshold which can exist in either host or device memory.\n  @param[in]\n  csr_descr_C   descriptor of the sparse CSR matrix C. Currently, only\n                \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val_C     array of \\p nnz_C elements containing the values of the sparse CSR matrix C.\n  @param[in]\n  csr_row_ptr_C array of \\p m+1 elements that point to the start of every row of the\n                sparse CSR matrix C.\n  @param[in]\n  csr_col_ind_C array of \\p nnz_C elements containing the column indices of the sparse CSR matrix C.\n  @param[out]\n  buffer_size   number of bytes of the temporary storage buffer required by \\p rocsparse_sprune_csr2csr_nnz,\n                \\p rocsparse_dprune_csr2csr_nnz, \\p rocsparse_sprune_csr2csr, and \\p rocsparse_dprune_csr2csr.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p buffer_size pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n\n/\n/**@{"]
+    pub fn rocsparse_sprune_csr2csr_buffer_size(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz_A: rocsparse_int,
+        csr_descr_A: rocsparse_mat_descr,
+        csr_val_A: *const f32,
+        csr_row_ptr_A: *const rocsparse_int,
+        csr_col_ind_A: *const rocsparse_int,
+        threshold: *const f32,
+        csr_descr_C: rocsparse_mat_descr,
+        csr_val_C: *const f32,
+        csr_row_ptr_C: *const rocsparse_int,
+        csr_col_ind_C: *const rocsparse_int,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dprune_csr2csr_buffer_size(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz_A: rocsparse_int,
+        csr_descr_A: rocsparse_mat_descr,
+        csr_val_A: *const f64,
+        csr_row_ptr_A: *const rocsparse_int,
+        csr_col_ind_A: *const rocsparse_int,
+        threshold: *const f64,
+        csr_descr_C: rocsparse_mat_descr,
+        csr_val_C: *const f64,
+        csr_row_ptr_C: *const rocsparse_int,
+        csr_col_ind_C: *const rocsparse_int,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Convert and prune sparse CSR matrix into a sparse CSR matrix\n\n  \\details\n  \\p rocsparse_prune_csr2csr_nnz computes the number of nonzero elements per row and the total\n  number of nonzero elements in a sparse CSR matrix once elements less than the threshold are\n  pruned from the matrix.\n\n  \\note The routine does support asynchronous execution if the pointer mode is set to device.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle        handle to the rocsparse library context queue.\n  @param[in]\n  m             number of rows in the sparse CSR matrix.\n  @param[in]\n  n             number of columns in the sparse CSR matrix.\n  @param[in]\n  nnz_A         number of non-zeros in the sparse CSR matrix A.\n  @param[in]\n  csr_descr_A   descriptor of the sparse CSR matrix A. Currently, only\n                \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val_A     array of \\p nnz_A elements containing the values of the sparse CSR matrix A.\n  @param[in]\n  csr_row_ptr_A array of \\p m+1 elements that point to the start of every row of the\n                sparse CSR matrix A.\n  @param[in]\n  csr_col_ind_A array of \\p nnz_A elements containing the column indices of the sparse CSR matrix A.\n  @param[in]\n  threshold     pointer to the non-negative pruning threshold which can exist in either host or device memory.\n  @param[in]\n  csr_descr_C   descriptor of the sparse CSR matrix C. Currently, only\n                \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  csr_row_ptr_C array of \\p m+1 elements that point to the start of every row of the\n                sparse CSR matrix C.\n  @param[out]\n  nnz_total_dev_host_ptr total number of nonzero elements in device or host memory.\n  @param[out]\n  temp_buffer   buffer allocated by the user whose size is determined by calling \\p rocsparse_xprune_csr2csr_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p nnz_A is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p threshold or \\p csr_descr_A or \\p csr_descr_C or \\p csr_val_A\n              or \\p csr_row_ptr_A or \\p csr_col_ind_A or \\p csr_row_ptr_C or \\p nnz_total_dev_host_ptr\n              or \\p temp_buffer pointer is invalid.\n\n/\n/**@{"]
+    pub fn rocsparse_sprune_csr2csr_nnz(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz_A: rocsparse_int,
+        csr_descr_A: rocsparse_mat_descr,
+        csr_val_A: *const f32,
+        csr_row_ptr_A: *const rocsparse_int,
+        csr_col_ind_A: *const rocsparse_int,
+        threshold: *const f32,
+        csr_descr_C: rocsparse_mat_descr,
+        csr_row_ptr_C: *mut rocsparse_int,
+        nnz_total_dev_host_ptr: *mut rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dprune_csr2csr_nnz(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz_A: rocsparse_int,
+        csr_descr_A: rocsparse_mat_descr,
+        csr_val_A: *const f64,
+        csr_row_ptr_A: *const rocsparse_int,
+        csr_col_ind_A: *const rocsparse_int,
+        threshold: *const f64,
+        csr_descr_C: rocsparse_mat_descr,
+        csr_row_ptr_C: *mut rocsparse_int,
+        nnz_total_dev_host_ptr: *mut rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Convert and prune sparse CSR matrix into a sparse CSR matrix\n\n  \\details\n  This function converts the sparse CSR matrix A into a sparse CSR matrix C by pruning values in A\n  that are less than the threshold. All the parameters are assumed to have been pre-allocated by the user.\n  The user first calls rocsparse_xprune_csr2csr_buffer_size() to determine the size of the buffer used\n  by rocsparse_xprune_csr2csr_nnz() and rocsparse_xprune_csr2csr() which the user then allocates. The user then\n  allocates \\p csr_row_ptr_C to have \\p m+1 elements and then calls rocsparse_xprune_csr2csr_nnz() which fills\n  in the \\p csr_row_ptr_C array stores then number of elements that are larger than the pruning threshold\n  in \\p nnz_total_dev_host_ptr. The user then calls rocsparse_xprune_csr2csr() to complete the conversion.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle        handle to the rocsparse library context queue.\n  @param[in]\n  m             number of rows in the sparse CSR matrix.\n  @param[in]\n  n             number of columns in the sparse CSR matrix.\n  @param[in]\n  nnz_A         number of non-zeros in the sparse CSR matrix A.\n  @param[in]\n  csr_descr_A   descriptor of the sparse CSR matrix A. Currently, only\n                \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val_A     array of \\p nnz_A elements containing the values of the sparse CSR matrix A.\n  @param[in]\n  csr_row_ptr_A array of \\p m+1 elements that point to the start of every row of the\n                sparse CSR matrix A.\n  @param[in]\n  csr_col_ind_A array of \\p nnz_A elements containing the column indices of the sparse CSR matrix A.\n  @param[in]\n  threshold     pointer to the non-negative pruning threshold which can exist in either host or device memory.\n  @param[in]\n  csr_descr_C   descriptor of the sparse CSR matrix C. Currently, only\n                \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  csr_val_C     array of \\p nnz_C elements containing the values of the sparse CSR matrix C.\n  @param[in]\n  csr_row_ptr_C array of \\p m+1 elements that point to the start of every row of the\n                sparse CSR matrix C.\n  @param[out]\n  csr_col_ind_C array of \\p nnz_C elements containing the column indices of the sparse CSR matrix C.\n  @param[in]\n  temp_buffer   buffer allocated by the user whose size is determined by calling \\p rocsparse_xprune_csr2csr_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p nnz_A is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p threshold or \\p csr_descr_A or \\p csr_descr_C or \\p csr_val_A\n              or \\p csr_row_ptr_A or \\p csr_col_ind_A or \\p csr_val_C or \\p csr_row_ptr_C or \\p csr_col_ind_C\n              or \\p temp_buffer pointer is invalid.\n/\n/**@{"]
+    pub fn rocsparse_sprune_csr2csr(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz_A: rocsparse_int,
+        csr_descr_A: rocsparse_mat_descr,
+        csr_val_A: *const f32,
+        csr_row_ptr_A: *const rocsparse_int,
+        csr_col_ind_A: *const rocsparse_int,
+        threshold: *const f32,
+        csr_descr_C: rocsparse_mat_descr,
+        csr_val_C: *mut f32,
+        csr_row_ptr_C: *const rocsparse_int,
+        csr_col_ind_C: *mut rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dprune_csr2csr(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz_A: rocsparse_int,
+        csr_descr_A: rocsparse_mat_descr,
+        csr_val_A: *const f64,
+        csr_row_ptr_A: *const rocsparse_int,
+        csr_col_ind_A: *const rocsparse_int,
+        threshold: *const f64,
+        csr_descr_C: rocsparse_mat_descr,
+        csr_val_C: *mut f64,
+        csr_row_ptr_C: *const rocsparse_int,
+        csr_col_ind_C: *mut rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Convert and prune by percentage a sparse CSR matrix into a sparse CSR matrix\n\n  \\details\n  \\p rocsparse_prune_csr2csr__by_percentage_buffer_size returns the size of the temporary buffer that\n  is required by \\p rocsparse_sprune_csr2csr_nnz_by_percentage, \\p rocsparse_dprune_csr2csr_nnz_by_percentage,\n  \\p rocsparse_sprune_csr2csr_by_percentage, and \\p rocsparse_dprune_csr2csr_by_percentage. The temporary storage\n  buffer must be allocated by the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle        handle to the rocsparse library context queue.\n  @param[in]\n  m             number of rows in the sparse CSR matrix.\n  @param[in]\n  n             number of columns in the sparse CSR matrix.\n  @param[in]\n  nnz_A         number of non-zeros in the sparse CSR matrix A.\n  @param[in]\n  csr_descr_A   descriptor of the sparse CSR matrix A. Currently, only\n                \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val_A     array of \\p nnz_A elements containing the values of the sparse CSR matrix A.\n  @param[in]\n  csr_row_ptr_A array of \\p m+1 elements that point to the start of every row of the\n                sparse CSR matrix A.\n  @param[in]\n  csr_col_ind_A array of \\p nnz_A elements containing the column indices of the sparse CSR matrix A.\n  @param[in]\n  percentage     percentage >= 0 and percentage <= 100.\n  @param[in]\n  csr_descr_C   descriptor of the sparse CSR matrix C. Currently, only\n                \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val_C     array of \\p nnz_C elements containing the values of the sparse CSR matrix C.\n  @param[in]\n  csr_row_ptr_C array of \\p m+1 elements that point to the start of every row of the\n                sparse CSR matrix C.\n  @param[in]\n  csr_col_ind_C array of \\p nnz_C elements containing the column indices of the sparse CSR matrix C.\n  @param[in]\n  info          prune info structure.\n  @param[out]\n  buffer_size   number of bytes of the temporary storage buffer required by \\p rocsparse_sprune_csr2csr_nnz_by_percentage,\n                \\p rocsparse_dprune_csr2csr_nnz_by_percentage, \\p rocsparse_sprune_csr2csr_by_percentage,\n                and \\p rocsparse_dprune_csr2csr_by_percentage.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p buffer_size pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n\n/\n/**@{"]
+    pub fn rocsparse_sprune_csr2csr_by_percentage_buffer_size(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz_A: rocsparse_int,
+        csr_descr_A: rocsparse_mat_descr,
+        csr_val_A: *const f32,
+        csr_row_ptr_A: *const rocsparse_int,
+        csr_col_ind_A: *const rocsparse_int,
+        percentage: f32,
+        csr_descr_C: rocsparse_mat_descr,
+        csr_val_C: *const f32,
+        csr_row_ptr_C: *const rocsparse_int,
+        csr_col_ind_C: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dprune_csr2csr_by_percentage_buffer_size(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz_A: rocsparse_int,
+        csr_descr_A: rocsparse_mat_descr,
+        csr_val_A: *const f64,
+        csr_row_ptr_A: *const rocsparse_int,
+        csr_col_ind_A: *const rocsparse_int,
+        percentage: f64,
+        csr_descr_C: rocsparse_mat_descr,
+        csr_val_C: *const f64,
+        csr_row_ptr_C: *const rocsparse_int,
+        csr_col_ind_C: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Convert and prune by percentage a sparse CSR matrix into a sparse CSR matrix\n\n  \\details\n  \\p rocsparse_prune_csr2csr_nnz_by_percentage computes the number of nonzero elements per row and the total\n  number of nonzero elements in a sparse CSR matrix once elements less than the threshold are\n  pruned from the matrix.\n\n  \\note The routine does support asynchronous execution if the pointer mode is set to device.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle        handle to the rocsparse library context queue.\n  @param[in]\n  m             number of rows in the sparse CSR matrix.\n  @param[in]\n  n             number of columns in the sparse CSR matrix.\n  @param[in]\n  nnz_A         number of non-zeros in the sparse CSR matrix A.\n  @param[in]\n  csr_descr_A   descriptor of the sparse CSR matrix A. Currently, only\n                \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val_A     array of \\p nnz_A elements containing the values of the sparse CSR matrix A.\n  @param[in]\n  csr_row_ptr_A array of \\p m+1 elements that point to the start of every row of the\n                sparse CSR matrix A.\n  @param[in]\n  csr_col_ind_A array of \\p nnz_A elements containing the column indices of the sparse CSR matrix A.\n  @param[in]\n  percentage    percentage >= 0 and percentage <= 100.\n  @param[in]\n  csr_descr_C   descriptor of the sparse CSR matrix C. Currently, only\n                \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  csr_row_ptr_C array of \\p m+1 elements that point to the start of every row of the\n                sparse CSR matrix C.\n  @param[out]\n  nnz_total_dev_host_ptr total number of nonzero elements in device or host memory.\n  @param[in]\n  info          prune info structure.\n  @param[out]\n  temp_buffer   buffer allocated by the user whose size is determined by calling\n                \\p rocsparse_xprune_csr2csr_by_percentage_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p nnz_A or \\p percentage is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_descr_A or \\p csr_descr_C or \\p info or \\p csr_val_A\n              or \\p csr_row_ptr_A or \\p csr_col_ind_A or \\p csr_row_ptr_C or \\p nnz_total_dev_host_ptr\n              or \\p temp_buffer pointer is invalid.\n\n/\n/**@{"]
+    pub fn rocsparse_sprune_csr2csr_nnz_by_percentage(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz_A: rocsparse_int,
+        csr_descr_A: rocsparse_mat_descr,
+        csr_val_A: *const f32,
+        csr_row_ptr_A: *const rocsparse_int,
+        csr_col_ind_A: *const rocsparse_int,
+        percentage: f32,
+        csr_descr_C: rocsparse_mat_descr,
+        csr_row_ptr_C: *mut rocsparse_int,
+        nnz_total_dev_host_ptr: *mut rocsparse_int,
+        info: rocsparse_mat_info,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dprune_csr2csr_nnz_by_percentage(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz_A: rocsparse_int,
+        csr_descr_A: rocsparse_mat_descr,
+        csr_val_A: *const f64,
+        csr_row_ptr_A: *const rocsparse_int,
+        csr_col_ind_A: *const rocsparse_int,
+        percentage: f64,
+        csr_descr_C: rocsparse_mat_descr,
+        csr_row_ptr_C: *mut rocsparse_int,
+        nnz_total_dev_host_ptr: *mut rocsparse_int,
+        info: rocsparse_mat_info,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief Convert and prune by percentage a sparse CSR matrix into a sparse CSR matrix\n\n  \\details\n  This function converts the sparse CSR matrix A into a sparse CSR matrix C by pruning values in A\n  that are less than the threshold. All the parameters are assumed to have been pre-allocated by the user.\n  The user first calls rocsparse_xprune_csr2csr_buffer_size() to determine the size of the buffer used\n  by rocsparse_xprune_csr2csr_nnz() and rocsparse_xprune_csr2csr() which the user then allocates. The user then\n  allocates \\p csr_row_ptr_C to have \\p m+1 elements and then calls rocsparse_xprune_csr2csr_nnz() which fills\n  in the \\p csr_row_ptr_C array stores then number of elements that are larger than the pruning threshold\n  in \\p nnz_total_dev_host_ptr. The user then calls rocsparse_xprune_csr2csr() to complete the conversion.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle        handle to the rocsparse library context queue.\n  @param[in]\n  m             number of rows in the sparse CSR matrix.\n  @param[in]\n  n             number of columns in the sparse CSR matrix.\n  @param[in]\n  nnz_A         number of non-zeros in the sparse CSR matrix A.\n  @param[in]\n  csr_descr_A   descriptor of the sparse CSR matrix A. Currently, only\n                \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val_A     array of \\p nnz_A elements containing the values of the sparse CSR matrix A.\n  @param[in]\n  csr_row_ptr_A array of \\p m+1 elements that point to the start of every row of the\n                sparse CSR matrix A.\n  @param[in]\n  csr_col_ind_A array of \\p nnz_A elements containing the column indices of the sparse CSR matrix A.\n  @param[in]\n  percentage    percentage >= 0 and percentage <= 100.\n  @param[in]\n  csr_descr_C   descriptor of the sparse CSR matrix C. Currently, only\n                \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  csr_val_C     array of \\p nnz_C elements containing the values of the sparse CSR matrix C.\n  @param[in]\n  csr_row_ptr_C array of \\p m+1 elements that point to the start of every row of the\n                sparse CSR matrix C.\n  @param[out]\n  csr_col_ind_C array of \\p nnz_C elements containing the column indices of the sparse CSR matrix C.\n  @param[in]\n  info          prune info structure.\n  @param[in]\n  temp_buffer   buffer allocated by the user whose size is determined by calling \\p rocsparse_xprune_csr2csr_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p nnz_A or \\p percentage is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_descr_A or \\p csr_descr_C or \\p info or \\p csr_val_A\n              or \\p csr_row_ptr_A or \\p csr_col_ind_A or \\p csr_val_C or \\p csr_row_ptr_C or \\p csr_col_ind_C\n              or \\p temp_buffer pointer is invalid.\n/\n/**@{"]
+    pub fn rocsparse_sprune_csr2csr_by_percentage(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz_A: rocsparse_int,
+        csr_descr_A: rocsparse_mat_descr,
+        csr_val_A: *const f32,
+        csr_row_ptr_A: *const rocsparse_int,
+        csr_col_ind_A: *const rocsparse_int,
+        percentage: f32,
+        csr_descr_C: rocsparse_mat_descr,
+        csr_val_C: *mut f32,
+        csr_row_ptr_C: *const rocsparse_int,
+        csr_col_ind_C: *mut rocsparse_int,
+        info: rocsparse_mat_info,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dprune_csr2csr_by_percentage(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz_A: rocsparse_int,
+        csr_descr_A: rocsparse_mat_descr,
+        csr_val_A: *const f64,
+        csr_row_ptr_A: *const rocsparse_int,
+        csr_col_ind_A: *const rocsparse_int,
+        percentage: f64,
+        csr_descr_C: rocsparse_mat_descr,
+        csr_val_C: *mut f64,
+        csr_row_ptr_C: *const rocsparse_int,
+        csr_col_ind_C: *mut rocsparse_int,
+        info: rocsparse_mat_info,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief\n  This function computes the the size of the user allocated temporary storage buffer used when converting and pruning\n  a dense matrix to a CSR matrix.\n\n  \\details\n  \\p rocsparse_prune_dense2csr_buffer_size returns the size of the temporary storage buffer\n  that is required by rocsparse_sprune_dense2csr_nnz(), rocsparse_dprune_dense2csr_nnz(),\n  rocsparse_sprune_dense2csr(), and rocsparse_dprune_dense2csr(). The temporary\n  storage buffer must be allocated by the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  m           number of rows of the dense matrix \\p A.\n\n  @param[in]\n  n           number of columns of the dense matrix \\p A.\n\n  @param[in]\n  A           array of dimensions (\\p lda, \\p n)\n\n  @param[in]\n  lda         leading dimension of dense array \\p A.\n\n  @param[in]\n  threshold   pointer to the pruning non-negative threshold which can exist in either host or device memory.\n\n  @param[in]\n  descr      the descriptor of the dense matrix \\p A, the supported matrix type is rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[in]\n  csr_val\n              array of nnz ( = \\p csr_row_ptr[m] - \\p csr_row_ptr[0] ) nonzero elements of matrix \\p A.\n  @param[in]\n  csr_row_ptr\n              integer array of \\p m+1 elements that contains the start of every row and the end of the last row plus one.\n  @param[in]\n  csr_col_ind\n              integer array of nnz ( = \\p csr_row_ptr[m] - csr_row_ptr[0] ) column indices of the non-zero elements of matrix \\p A.\n\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_sprune_dense2csr_nnz(), rocsparse_dprune_dense2csr_nnz(),\n              rocsparse_sprune_dense2csr() and rocsparse_dprune_dense2csr().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p buffer_size pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n/\n/**@{"]
+    pub fn rocsparse_sprune_dense2csr_buffer_size(
         handle: rocsparse_handle,
-        trans: rocsparse_operation,
         m: rocsparse_int,
         n: rocsparse_int,
-        alpha: *const f32,
         A: *const f32,
         lda: rocsparse_int,
-        nnz: rocsparse_int,
-        x_val: *const f32,
-        x_ind: *const rocsparse_int,
-        beta: *const f32,
-        y: *mut f32,
-        idx_base: rocsparse_index_base,
-        temp_buffer: *mut ::std::os::raw::c_void,
+        threshold: *const f32,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f32,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        buffer_size: *mut usize,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_dgemvi(
+    pub fn rocsparse_dprune_dense2csr_buffer_size(
         handle: rocsparse_handle,
-        trans: rocsparse_operation,
         m: rocsparse_int,
         n: rocsparse_int,
-        alpha: *const f64,
         A: *const f64,
         lda: rocsparse_int,
-        nnz: rocsparse_int,
-        x_val: *const f64,
-        x_ind: *const rocsparse_int,
-        beta: *const f64,
-        y: *mut f64,
-        idx_base: rocsparse_index_base,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cgemvi(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        alpha: *const rocsparse_float_complex,
-        A: *const rocsparse_float_complex,
-        lda: rocsparse_int,
-        nnz: rocsparse_int,
-        x_val: *const rocsparse_float_complex,
-        x_ind: *const rocsparse_int,
-        beta: *const rocsparse_float_complex,
-        y: *mut rocsparse_float_complex,
-        idx_base: rocsparse_index_base,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zgemvi(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        alpha: *const rocsparse_double_complex,
-        A: *const rocsparse_double_complex,
-        lda: rocsparse_int,
-        nnz: rocsparse_int,
-        x_val: *const rocsparse_double_complex,
-        x_ind: *const rocsparse_int,
-        beta: *const rocsparse_double_complex,
-        y: *mut rocsparse_double_complex,
-        idx_base: rocsparse_index_base,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level3_module\n  \\brief Sparse matrix dense matrix multiplication using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrmm multiplies the scalar \\f$\\alpha\\f$ with a sparse \\f$mb \\times kb\\f$\n  matrix \\f$A\\f$, defined in BSR storage format, and the dense \\f$k \\times n\\f$\n  matrix \\f$B\\f$ (where \\f$k = block\\_dim \\times kb\\f$) and adds the result to the dense\n  \\f$m \\times n\\f$ matrix \\f$C\\f$ (where \\f$m = block\\_dim \\times mb\\f$) that\n  is multiplied by the scalar \\f$\\beta\\f$, such that\n  \\f[\n    C := \\alpha \\cdot op(A) \\cdot op(B) + \\beta \\cdot C,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans_A == rocsparse_operation_none} \\\\\n    \\end{array}\n    \\right.\n  \\f]\n  and\n  \\f[\n    op(B) = \\left\\{\n    \\begin{array}{ll}\n        B,   & \\text{if trans_B == rocsparse_operation_none} \\\\\n        B^T, & \\text{if trans_B == rocsparse_operation_transpose} \\\\\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  Currently, only \\p trans_A == \\ref rocsparse_operation_none is supported.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         the storage format of the blocks. Can be \\ref rocsparse_direction_row or \\ref rocsparse_direction_column.\n  @param[in]\n  trans_A     matrix \\f$A\\f$ operation type. Currently, only \\ref rocsparse_operation_none is supported.\n  @param[in]\n  trans_B     matrix \\f$B\\f$ operation type. Currently, only \\ref rocsparse_operation_none and rocsparse_operation_transpose\n              are supported.\n  @param[in]\n  mb          number of block rows of the sparse BSR matrix \\f$A\\f$.\n  @param[in]\n  n           number of columns of the dense matrix \\f$op(B)\\f$ and \\f$C\\f$.\n  @param[in]\n  kb          number of block columns of the sparse BSR matrix \\f$A\\f$.\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse BSR matrix \\f$A\\f$.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse BSR matrix \\f$A\\f$. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  bsr_val     array of \\p nnzb*block_dim*block_dim elements of the sparse BSR matrix \\f$A\\f$.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of the\n              sparse BSR matrix \\f$A\\f$.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the block column indices of the sparse\n              BSR matrix \\f$A\\f$.\n  @param[in]\n  block_dim   size of the blocks in the sparse BSR matrix.\n  @param[in]\n  B           array of dimension \\f$ldb \\times n\\f$ (\\f$op(B) == B\\f$),\n              \\f$ldb \\times k\\f$ otherwise.\n  @param[in]\n  ldb         leading dimension of \\f$B\\f$, must be at least \\f$\\max{(1, k)}\\f$ (\\f$ op(B) == B\\f$) where \\f$k = block\\_dim \\times kb\\f$,\n  \\f$\\max{(1, n)}\\f$ otherwise.\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  C           array of dimension \\f$ldc \\times n\\f$.\n  @param[in]\n  ldc         leading dimension of \\f$C\\f$, must be at least \\f$\\max{(1, m)}\\f$ (\\f$ op(A) == A\\f$) where \\f$m = block\\_dim \\times mb\\f$,\n  \\f$\\max{(1, k)}\\f$ where \\f$k = block\\_dim \\times kb\\f$ otherwise.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p n, \\p kb, \\p nnzb, \\p ldb or \\p ldc\n              is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p bsr_val,\n              \\p bsr_row_ptr, \\p bsr_col_ind, \\p B, \\p beta or \\p C pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans_A != \\ref rocsparse_operation_none or\n              \\p trans_B == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  This example multiplies a BSR matrix with a dense matrix.\n  \\code{.c}\n      //     1 2 0 3 0 0\n      // A = 0 4 5 0 0 0\n      //     0 0 0 7 8 0\n      //     0 0 1 2 4 1\n\n      rocsparse_int block_dim = 2;\n      rocsparse_int mb   = 2;\n      rocsparse_int kb   = 3;\n      rocsparse_int nnzb = 4;\n      rocsparse_direction dir = rocsparse_direction_row;\n\n      bsr_row_ptr[mb+1]                 = {0, 2, 4};                                        // device memory\n      bsr_col_ind[nnzb]                 = {0, 1, 1, 2};                                     // device memory\n      bsr_val[nnzb*block_dim*block_dim] = {1, 2, 0, 4, 0, 3, 5, 0, 0, 7, 1, 2, 8, 0, 4, 1}; // device memory\n\n      // Set dimension n of B\n      rocsparse_int n = 64;\n      rocsparse_int m = mb * block_dim;\n      rocsparse_int k = kb * block_dim;\n\n      // Allocate and generate dense matrix B\n      std::vector<float> hB(k * n);\n      for(rocsparse_int i = 0; i < k * n; ++i)\n      {\n          hB[i] = static_cast<float>(rand()) / RAND_MAX;\n      }\n\n      // Copy B to the device\n      float* B;\n      hipMalloc((void**)&B, sizeof(float) * k * n);\n      hipMemcpy(B, hB.data(), sizeof(float) * k * n, hipMemcpyHostToDevice);\n\n      // alpha and beta\n      float alpha = 1.0f;\n      float beta  = 0.0f;\n\n      // Allocate memory for the resulting matrix C\n      float* C;\n      hipMalloc((void**)&C, sizeof(float) * m * n);\n\n      // Perform the matrix multiplication\n      rocsparse_sbsrmm(handle,\n                       dir,\n                       rocsparse_operation_none,\n                       rocsparse_operation_none,\n                       mb,\n                       n,\n                       kb,\n                       nnzb,\n                       &alpha,\n                       descr,\n                       bsr_val,\n                       bsr_row_ptr,\n                       bsr_col_ind,\n                       block_dim,\n                       B,\n                       k,\n                       &beta,\n                       C,\n                       m);\n  \\endcode\n/\n/**@{"]
-    pub fn rocsparse_sbsrmm(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        mb: rocsparse_int,
-        n: rocsparse_int,
-        kb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const f32,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const f32,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        B: *const f32,
-        ldb: rocsparse_int,
-        beta: *const f32,
-        C: *mut f32,
-        ldc: rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dbsrmm(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        mb: rocsparse_int,
-        n: rocsparse_int,
-        kb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const f64,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const f64,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        B: *const f64,
-        ldb: rocsparse_int,
-        beta: *const f64,
-        C: *mut f64,
-        ldc: rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cbsrmm(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        mb: rocsparse_int,
-        n: rocsparse_int,
-        kb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const rocsparse_float_complex,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_float_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        B: *const rocsparse_float_complex,
-        ldb: rocsparse_int,
-        beta: *const rocsparse_float_complex,
-        C: *mut rocsparse_float_complex,
-        ldc: rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zbsrmm(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        mb: rocsparse_int,
-        n: rocsparse_int,
-        kb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const rocsparse_double_complex,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_double_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        B: *const rocsparse_double_complex,
-        ldb: rocsparse_int,
-        beta: *const rocsparse_double_complex,
-        C: *mut rocsparse_double_complex,
-        ldc: rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level3_module\n  \\brief Sparse matrix dense matrix multiplication using GEneral BSR storage format\n\n  \\details\n  \\p rocsparse_gebsrmm multiplies the scalar \\f$\\alpha\\f$ with a sparse \\f$mb \\times kb\\f$\n  matrix \\f$A\\f$, defined in GEneral BSR storage format, and the dense \\f$k \\times n\\f$\n  matrix \\f$B\\f$ (where \\f$k = col_block\\_dim \\times kb\\f$) and adds the result to the dense\n  \\f$m \\times n\\f$ matrix \\f$C\\f$ (where \\f$m = row_block\\_dim \\times mb\\f$) that\n  is multiplied by the scalar \\f$\\beta\\f$, such that\n  \\f[\n    C := \\alpha \\cdot op(A) \\cdot op(B) + \\beta \\cdot C,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans_A == rocsparse_operation_none} \\\\\n    \\end{array}\n    \\right.\n  \\f]\n  and\n  \\f[\n    op(B) = \\left\\{\n    \\begin{array}{ll}\n        B,   & \\text{if trans_B == rocsparse_operation_none} \\\\\n        B^T, & \\text{if trans_B == rocsparse_operation_transpose} \\\\\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  Currently, only \\p trans_A == \\ref rocsparse_operation_none is supported.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         the storage format of the blocks. Can be \\ref rocsparse_direction_row or \\ref rocsparse_direction_column.\n  @param[in]\n  trans_A     matrix \\f$A\\f$ operation type. Currently, only \\ref rocsparse_operation_none is supported.\n  @param[in]\n  trans_B     matrix \\f$B\\f$ operation type. Currently, only \\ref rocsparse_operation_none and rocsparse_operation_transpose\n              are supported.\n  @param[in]\n  mb          number of block rows of the sparse GEneral BSR matrix \\f$A\\f$.\n  @param[in]\n  n           number of columns of the dense matrix \\f$op(B)\\f$ and \\f$C\\f$.\n  @param[in]\n  kb          number of block columns of the sparse GEneral BSR matrix \\f$A\\f$.\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse GEneral BSR matrix \\f$A\\f$.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse GEneral BSR matrix \\f$A\\f$. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  bsr_val     array of \\p nnzb*row_block_dim*col_block_dim elements of the sparse GEneral BSR matrix \\f$A\\f$.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of the\n              sparse GEneral BSR matrix \\f$A\\f$.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the block column indices of the sparse\n              GEneral BSR matrix \\f$A\\f$.\n  @param[in]\n  row_block_dim   row size of the blocks in the sparse GEneral BSR matrix.\n  @param[in]\n  col_block_dim   column size of the blocks in the sparse GEneral BSR matrix.\n  @param[in]\n  B           array of dimension \\f$ldb \\times n\\f$ (\\f$op(B) == B\\f$),\n              \\f$ldb \\times k\\f$ otherwise.\n  @param[in]\n  ldb         leading dimension of \\f$B\\f$, must be at least \\f$\\max{(1, k)}\\f$ (\\f$ op(B) == B\\f$) where \\f$k = col\\_block\\_dim \\times kb\\f$,\n  \\f$\\max{(1, n)}\\f$ otherwise.\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  C           array of dimension \\f$ldc \\times n\\f$.\n  @param[in]\n  ldc         leading dimension of \\f$C\\f$, must be at least \\f$\\max{(1, m)}\\f$ (\\f$ op(A) == A\\f$) where \\f$m = row\\_block\\_dim \\times mb\\f$,\n  \\f$\\max{(1, k)}\\f$ where \\f$k = col\\_block\\_dim \\times kb\\f$ otherwise.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p n, \\p kb, \\p nnzb, \\p ldb, \\p ldc, \\p row_block_dim\n              or \\p col_block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p bsr_val,\n              \\p bsr_row_ptr, \\p bsr_col_ind, \\p B, \\p beta or \\p C pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans_A != \\ref rocsparse_operation_none or\n              \\p trans_B == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  This example multiplies a GEneral BSR matrix with a dense matrix.\n  \\code{.c}\n      //     1 2 0 3 0 0\n      // A = 0 4 5 0 0 0\n      //     0 0 0 7 8 0\n      //     0 0 1 2 4 1\n\n      rocsparse_int row_block_dim = 2;\n      rocsparse_int col_block_dim = 3;\n      rocsparse_int mb   = 2;\n      rocsparse_int kb   = 2;\n      rocsparse_int nnzb = 4;\n      rocsparse_direction dir = rocsparse_direction_row;\n\n      bsr_row_ptr[mb+1]                 = {0, 2, 4};                                        // device memory\n      bsr_col_ind[nnzb]                 = {0, 1, 0, 1};                                     // device memory\n      bsr_val[nnzb*row_block_dim*col_block_dim] = {1, 2, 0, 0, 4, 5, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 7, 8, 0, 2, 4, 1}; // device memory\n\n      // Set dimension n of B\n      rocsparse_int n = 64;\n      rocsparse_int m = mb * row_block_dim;\n      rocsparse_int k = kb * col_block_dim;\n\n      // Allocate and generate dense matrix B\n      std::vector<float> hB(k * n);\n      for(rocsparse_int i = 0; i < k * n; ++i)\n      {\n          hB[i] = static_cast<float>(rand()) / RAND_MAX;\n      }\n\n      // Copy B to the device\n      float* B;\n      hipMalloc((void**)&B, sizeof(float) * k * n);\n      hipMemcpy(B, hB.data(), sizeof(float) * k * n, hipMemcpyHostToDevice);\n\n      // alpha and beta\n      float alpha = 1.0f;\n      float beta  = 0.0f;\n\n      // Allocate memory for the resulting matrix C\n      float* C;\n      hipMalloc((void**)&C, sizeof(float) * m * n);\n\n      // Perform the matrix multiplication\n      rocsparse_sgebsrmm(handle,\n                         dir,\n                         rocsparse_operation_none,\n                         rocsparse_operation_none,\n                         mb,\n                         n,\n                         kb,\n                         nnzb,\n                         &alpha,\n                         descr,\n                         bsr_val,\n                         bsr_row_ptr,\n                         bsr_col_ind,\n                         row_block_dim,\n                         col_block_dim,\n                         B,\n                         k,\n                         &beta,\n                         C,\n                         m);\n  \\endcode\n/\n/**@{"]
-    pub fn rocsparse_sgebsrmm(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        mb: rocsparse_int,
-        n: rocsparse_int,
-        kb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const f32,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const f32,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        B: *const f32,
-        ldb: rocsparse_int,
-        beta: *const f32,
-        C: *mut f32,
-        ldc: rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dgebsrmm(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        mb: rocsparse_int,
-        n: rocsparse_int,
-        kb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const f64,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const f64,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        B: *const f64,
-        ldb: rocsparse_int,
-        beta: *const f64,
-        C: *mut f64,
-        ldc: rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cgebsrmm(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        mb: rocsparse_int,
-        n: rocsparse_int,
-        kb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const rocsparse_float_complex,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_float_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        B: *const rocsparse_float_complex,
-        ldb: rocsparse_int,
-        beta: *const rocsparse_float_complex,
-        C: *mut rocsparse_float_complex,
-        ldc: rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zgebsrmm(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        mb: rocsparse_int,
-        n: rocsparse_int,
-        kb: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const rocsparse_double_complex,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_double_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        B: *const rocsparse_double_complex,
-        ldb: rocsparse_int,
-        beta: *const rocsparse_double_complex,
-        C: *mut rocsparse_double_complex,
-        ldc: rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level3_module\n  \\brief Sparse matrix dense matrix multiplication using CSR storage format\n\n  \\details\n  \\p rocsparse_csrmm multiplies the scalar \\f$\\alpha\\f$ with a sparse \\f$m \\times k\\f$\n  matrix \\f$A\\f$, defined in CSR storage format, and the dense \\f$k \\times n\\f$\n  matrix \\f$B\\f$ and adds the result to the dense \\f$m \\times n\\f$ matrix \\f$C\\f$ that\n  is multiplied by the scalar \\f$\\beta\\f$, such that\n  \\f[\n    C := \\alpha \\cdot op(A) \\cdot op(B) + \\beta \\cdot C,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans_A == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans_A == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans_A == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n  and\n  \\f[\n    op(B) = \\left\\{\n    \\begin{array}{ll}\n        B,   & \\text{if trans_B == rocsparse_operation_none} \\\\\n        B^T, & \\text{if trans_B == rocsparse_operation_transpose} \\\\\n        B^H, & \\text{if trans_B == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\code{.c}\n      for(i = 0; i < ldc; ++i)\n      {\n          for(j = 0; j < n; ++j)\n          {\n              C[i][j] = beta * C[i][j];\n\n              for(k = csr_row_ptr[i]; k < csr_row_ptr[i + 1]; ++k)\n              {\n                  C[i][j] += alpha * csr_val[k] * B[csr_col_ind[k]][j];\n              }\n          }\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans_A     matrix \\f$A\\f$ operation type.\n  @param[in]\n  trans_B     matrix \\f$B\\f$ operation type.\n  @param[in]\n  m           number of rows of the sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  n           number of columns of the dense matrix \\f$op(B)\\f$ and \\f$C\\f$.\n  @param[in]\n  k           number of columns of the sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix \\f$A\\f$. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix \\f$A\\f$.\n  @param[in]\n  B           array of dimension \\f$ldb \\times n\\f$ (\\f$op(B) == B\\f$),\n              \\f$ldb \\times k\\f$ otherwise.\n  @param[in]\n  ldb         leading dimension of \\f$B\\f$, must be at least \\f$\\max{(1, k)}\\f$\n              (\\f$op(B) == B\\f$), \\f$\\max{(1, n)}\\f$ otherwise.\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  C           array of dimension \\f$ldc \\times n\\f$.\n  @param[in]\n  ldc         leading dimension of \\f$C\\f$, must be at least \\f$\\max{(1, m)}\\f$\n              (\\f$op(A) == A\\f$), \\f$\\max{(1, k)}\\f$ otherwise.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n, \\p k, \\p nnz, \\p ldb or \\p ldc\n              is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p csr_val,\n              \\p csr_row_ptr, \\p csr_col_ind, \\p B, \\p beta or \\p C pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  This example multiplies a CSR matrix with a dense matrix.\n  \\code{.c}\n      //     1 2 0 3 0\n      // A = 0 4 5 0 0\n      //     6 0 0 7 8\n\n      rocsparse_int m   = 3;\n      rocsparse_int k   = 5;\n      rocsparse_int nnz = 8;\n\n      csr_row_ptr[m+1] = {0, 3, 5, 8};             // device memory\n      csr_col_ind[nnz] = {0, 1, 3, 1, 2, 0, 3, 4}; // device memory\n      csr_val[nnz]     = {1, 2, 3, 4, 5, 6, 7, 8}; // device memory\n\n      // Set dimension n of B\n      rocsparse_int n = 64;\n\n      // Allocate and generate dense matrix B\n      std::vector<float> hB(k * n);\n      for(rocsparse_int i = 0; i < k * n; ++i)\n      {\n          hB[i] = static_cast<float>(rand()) / RAND_MAX;\n      }\n\n      // Copy B to the device\n      float* B;\n      hipMalloc((void**)&B, sizeof(float) * k * n);\n      hipMemcpy(B, hB.data(), sizeof(float) * k * n, hipMemcpyHostToDevice);\n\n      // alpha and beta\n      float alpha = 1.0f;\n      float beta  = 0.0f;\n\n      // Allocate memory for the resulting matrix C\n      float* C;\n      hipMalloc((void**)&C, sizeof(float) * m * n);\n\n      // Perform the matrix multiplication\n      rocsparse_scsrmm(handle,\n                       rocsparse_operation_none,\n                       rocsparse_operation_none,\n                       m,\n                       n,\n                       k,\n                       nnz,\n                       &alpha,\n                       descr,\n                       csr_val,\n                       csr_row_ptr,\n                       csr_col_ind,\n                       B,\n                       k,\n                       &beta,\n                       C,\n                       m);\n  \\endcode\n/\n/**@{"]
-    pub fn rocsparse_scsrmm(
-        handle: rocsparse_handle,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        k: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const f32,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f32,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        B: *const f32,
-        ldb: rocsparse_int,
-        beta: *const f32,
-        C: *mut f32,
-        ldc: rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcsrmm(
-        handle: rocsparse_handle,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        k: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const f64,
+        threshold: *const f64,
         descr: rocsparse_mat_descr,
         csr_val: *const f64,
         csr_row_ptr: *const rocsparse_int,
         csr_col_ind: *const rocsparse_int,
-        B: *const f64,
-        ldb: rocsparse_int,
-        beta: *const f64,
-        C: *mut f64,
-        ldc: rocsparse_int,
+        buffer_size: *mut usize,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_ccsrmm(
+    #[doc = " \\ingroup conv_module\n  \\brief\n  This function computes the number of nonzero elements per row and the total number of nonzero elements in a dense matrix once\n  elements less than the threshold are pruned from the matrix.\n\n  \\details\n  The routine does support asynchronous execution if the pointer mode is set to device.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  m           number of rows of the dense matrix \\p A.\n\n  @param[in]\n  n           number of columns of the dense matrix \\p A.\n\n  @param[in]\n  A           array of dimensions (\\p lda, \\p n)\n\n  @param[in]\n  lda         leading dimension of dense array \\p A.\n\n  @param[in]\n  threshold   pointer to the pruning non-negative threshold which can exist in either host or device memory.\n\n  @param[in]\n  descr      the descriptor of the dense matrix \\p A.\n\n  @param[out]\n  csr_row_ptr\n              integer array of \\p m+1 elements that contains the start of every row and the end of the last row plus one.\n  @param[out]\n  nnz_total_dev_host_ptr\n              total number of nonzero elements in device or host memory.\n\n  @param[out]\n  temp_buffer\n              buffer allocated by the user whose size is determined by calling rocsparse_xprune_dense2csr_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p lda is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p A or \\p threshold or \\p descr or \\p csr_row_ptr\n              or \\p nnz_total_dev_host_ptr or \\p temp_buffer pointer is invalid.\n/\n/**@{"]
+    pub fn rocsparse_sprune_dense2csr_nnz(
         handle: rocsparse_handle,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
         m: rocsparse_int,
         n: rocsparse_int,
-        k: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const rocsparse_float_complex,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_float_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        B: *const rocsparse_float_complex,
-        ldb: rocsparse_int,
-        beta: *const rocsparse_float_complex,
-        C: *mut rocsparse_float_complex,
-        ldc: rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcsrmm(
-        handle: rocsparse_handle,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        k: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const rocsparse_double_complex,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_double_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        B: *const rocsparse_double_complex,
-        ldb: rocsparse_int,
-        beta: *const rocsparse_double_complex,
-        C: *mut rocsparse_double_complex,
-        ldc: rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level3_module\n  \\brief Sparse triangular system solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csrsm_zero_pivot returns \\ref rocsparse_status_zero_pivot, if either a\n  structural or numerical zero has been found during rocsparse_scsrsm_solve(),\n  rocsparse_dcsrsm_solve(), rocsparse_ccsrsm_solve() or rocsparse_zcsrsm_solve()\n  computation. The first zero pivot \\f$j\\f$ at \\f$A_{j,j}\\f$ is stored in \\p position,\n  using same index base as the CSR matrix.\n\n  \\p position can be in host or device memory. If no zero pivot has been found,\n  \\p position is set to -1 and \\ref rocsparse_status_success is returned instead.\n\n  \\note \\p rocsparse_csrsm_zero_pivot is a blocking function. It might influence\n  performance negatively.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[inout]\n  position    pointer to zero pivot \\f$j\\f$, can be in host or device memory.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info or \\p position pointer is\n              invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_zero_pivot zero pivot has been found."]
-    pub fn rocsparse_csrsm_zero_pivot(
-        handle: rocsparse_handle,
-        info: rocsparse_mat_info,
-        position: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level3_module\n  \\brief Sparse triangular system solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csrsm_buffer_size returns the size of the temporary storage buffer that\n  is required by rocsparse_scsrsm_analysis(), rocsparse_dcsrsm_analysis(),\n  rocsparse_ccsrsm_analysis(), rocsparse_zcsrsm_analysis(), rocsparse_scsrsm_solve(),\n  rocsparse_dcsrsm_solve(), rocsparse_ccsrsm_solve() and rocsparse_zcsrsm_solve(). The\n  temporary storage buffer must be allocated by the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans_A     matrix A operation type.\n  @param[in]\n  trans_B     matrix B operation type.\n  @param[in]\n  m           number of rows of the sparse CSR matrix A.\n  @param[in]\n  nrhs        number of columns of the dense matrix op(B).\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix A.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix A.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix A.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix A.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix A.\n  @param[in]\n  B           array of \\p m \\f$\\times\\f$ \\p nrhs elements of the rhs matrix B.\n  @param[in]\n  ldb         leading dimension of rhs matrix B.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[in]\n  policy      \\ref rocsparse_solve_policy_auto.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_scsrsm_analysis(), rocsparse_dcsrsm_analysis(),\n              rocsparse_ccsrsm_analysis(), rocsparse_zcsrsm_analysis(),\n              rocsparse_scsrsm_solve(), rocsparse_dcsrsm_solve(),\n              rocsparse_ccsrsm_solve() and rocsparse_zcsrsm_solve().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p nrhs or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p alpha, \\p descr, \\p csr_val,\n              \\p csr_row_ptr, \\p csr_col_ind, \\p B, \\p info or \\p buffer_size pointer\n              is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans_A == \\ref rocsparse_operation_conjugate_transpose,\n              \\p trans_B == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
-    pub fn rocsparse_scsrsm_buffer_size(
-        handle: rocsparse_handle,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        m: rocsparse_int,
-        nrhs: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const f32,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f32,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        B: *const f32,
-        ldb: rocsparse_int,
-        info: rocsparse_mat_info,
-        policy: rocsparse_solve_policy,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcsrsm_buffer_size(
-        handle: rocsparse_handle,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        m: rocsparse_int,
-        nrhs: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const f64,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f64,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        B: *const f64,
-        ldb: rocsparse_int,
-        info: rocsparse_mat_info,
-        policy: rocsparse_solve_policy,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccsrsm_buffer_size(
-        handle: rocsparse_handle,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        m: rocsparse_int,
-        nrhs: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const rocsparse_float_complex,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_float_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        B: *const rocsparse_float_complex,
-        ldb: rocsparse_int,
-        info: rocsparse_mat_info,
-        policy: rocsparse_solve_policy,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcsrsm_buffer_size(
-        handle: rocsparse_handle,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        m: rocsparse_int,
-        nrhs: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const rocsparse_double_complex,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_double_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        B: *const rocsparse_double_complex,
-        ldb: rocsparse_int,
-        info: rocsparse_mat_info,
-        policy: rocsparse_solve_policy,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level3_module\n  \\brief Sparse triangular system solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csrsm_analysis performs the analysis step for rocsparse_scsrsm_solve(),\n  rocsparse_dcsrsm_solve(), rocsparse_ccsrsm_solve() and rocsparse_zcsrsm_solve(). It\n  is expected that this function will be executed only once for a given matrix and\n  particular operation type. The analysis meta data can be cleared by\n  rocsparse_csrsm_clear().\n\n  \\p rocsparse_csrsm_analysis can share its meta data with\n  rocsparse_scsrilu0_analysis(), rocsparse_dcsrilu0_analysis(),\n  rocsparse_ccsrilu0_analysis(), rocsparse_zcsrilu0_analysis(),\n  rocsparse_scsric0_analysis(), rocsparse_dcsric0_analysis(),\n  rocsparse_ccsric0_analysis(), rocsparse_zcsric0_analysis(),\n  rocsparse_scsrsv_analysis(), rocsparse_dcsrsv_analysis(),\n  rocsparse_ccsrsv_analysis() and rocsparse_zcsrsv_analysis(). Selecting\n  \\ref rocsparse_analysis_policy_reuse policy can greatly improve computation\n  performance of meta data. However, the user need to make sure that the sparsity\n  pattern remains unchanged. If this cannot be assured,\n  \\ref rocsparse_analysis_policy_force has to be used.\n\n  \\note\n  If the matrix sparsity pattern changes, the gathered information will become invalid.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans_A     matrix A operation type.\n  @param[in]\n  trans_B     matrix B operation type.\n  @param[in]\n  m           number of rows of the sparse CSR matrix A.\n  @param[in]\n  nrhs        number of columns of the dense matrix op(B).\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix A.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix A.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix A.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix A.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix A.\n  @param[in]\n  B           array of \\p m \\f$\\times\\f$ \\p nrhs elements of the rhs matrix B.\n  @param[in]\n  ldb         leading dimension of rhs matrix B.\n  @param[out]\n  info        structure that holds the information collected during the analysis step.\n  @param[in]\n  analysis    \\ref rocsparse_analysis_policy_reuse or\n              \\ref rocsparse_analysis_policy_force.\n  @param[in]\n  solve       \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p nrhs or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p alpha, \\p descr, \\p csr_val,\n              \\p csr_row_ptr, \\p csr_col_ind, \\p B, \\p info or \\p temp_buffer pointer\n              is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans_A == \\ref rocsparse_operation_conjugate_transpose,\n              \\p trans_B == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
-    pub fn rocsparse_scsrsm_analysis(
-        handle: rocsparse_handle,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        m: rocsparse_int,
-        nrhs: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const f32,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f32,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        B: *const f32,
-        ldb: rocsparse_int,
-        info: rocsparse_mat_info,
-        analysis: rocsparse_analysis_policy,
-        solve: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcsrsm_analysis(
-        handle: rocsparse_handle,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        m: rocsparse_int,
-        nrhs: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const f64,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f64,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        B: *const f64,
-        ldb: rocsparse_int,
-        info: rocsparse_mat_info,
-        analysis: rocsparse_analysis_policy,
-        solve: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccsrsm_analysis(
-        handle: rocsparse_handle,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        m: rocsparse_int,
-        nrhs: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const rocsparse_float_complex,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_float_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        B: *const rocsparse_float_complex,
-        ldb: rocsparse_int,
-        info: rocsparse_mat_info,
-        analysis: rocsparse_analysis_policy,
-        solve: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcsrsm_analysis(
-        handle: rocsparse_handle,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        m: rocsparse_int,
-        nrhs: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const rocsparse_double_complex,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_double_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        B: *const rocsparse_double_complex,
-        ldb: rocsparse_int,
-        info: rocsparse_mat_info,
-        analysis: rocsparse_analysis_policy,
-        solve: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level3_module\n  \\brief Sparse triangular system solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csrsm_clear deallocates all memory that was allocated by\n  rocsparse_scsrsm_analysis(), rocsparse_dcsrsm_analysis(), rocsparse_ccsrsm_analysis()\n  or rocsparse_zcsrsm_analysis(). This is especially useful, if memory is an issue and\n  the analysis data is not required for further computation, e.g. when switching to\n  another sparse matrix format. Calling \\p rocsparse_csrsm_clear is optional. All\n  allocated resources will be cleared, when the opaque \\ref rocsparse_mat_info struct\n  is destroyed using rocsparse_destroy_mat_info().\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[inout]\n  info        structure that holds the information collected during the analysis step.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info pointer is invalid.\n  \\retval     rocsparse_status_memory_error the buffer holding the meta data could not\n              be deallocated.\n  \\retval     rocsparse_status_internal_error an internal error occurred."]
-    pub fn rocsparse_csrsm_clear(
-        handle: rocsparse_handle,
-        info: rocsparse_mat_info,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level3_module\n  \\brief Sparse triangular system solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csrsm_solve solves a sparse triangular linear system of a sparse\n  \\f$m \\times m\\f$ matrix, defined in CSR storage format, a dense solution matrix\n  \\f$X\\f$ and the right-hand side matrix \\f$B\\f$ that is multiplied by \\f$\\alpha\\f$, such that\n  \\f[\n    op(A) \\cdot op(X) = \\alpha \\cdot op(B),\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans_A == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans_A == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans_A == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n  ,\n  \\f[\n    op(B) = \\left\\{\n    \\begin{array}{ll}\n        B,   & \\text{if trans_B == rocsparse_operation_none} \\\\\n        B^T, & \\text{if trans_B == rocsparse_operation_transpose} \\\\\n        B^H, & \\text{if trans_B == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n  and\n  \\f[\n    op(X) = \\left\\{\n    \\begin{array}{ll}\n        X,   & \\text{if trans_B == rocsparse_operation_none} \\\\\n        X^T, & \\text{if trans_B == rocsparse_operation_transpose} \\\\\n        X^H, & \\text{if trans_B == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\p rocsparse_csrsm_solve requires a user allocated temporary buffer. Its size is\n  returned by rocsparse_scsrsm_buffer_size(), rocsparse_dcsrsm_buffer_size(),\n  rocsparse_ccsrsm_buffer_size() or rocsparse_zcsrsm_buffer_size(). Furthermore,\n  analysis meta data is required. It can be obtained by rocsparse_scsrsm_analysis(),\n  rocsparse_dcsrsm_analysis(), rocsparse_ccsrsm_analysis() or\n  rocsparse_zcsrsm_analysis(). \\p rocsparse_csrsm_solve reports the first zero pivot\n  (either numerical or structural zero). The zero pivot status can be checked calling\n  rocsparse_csrsm_zero_pivot(). If\n  \\ref rocsparse_diag_type == \\ref rocsparse_diag_type_unit, no zero pivot will be\n  reported, even if \\f$A_{j,j} = 0\\f$ for some \\f$j\\f$.\n\n  \\note\n  The sparse CSR matrix has to be sorted. This can be achieved by calling\n  rocsparse_csrsort().\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  Currently, only \\p trans_A != \\ref rocsparse_operation_conjugate_transpose and\n  \\p trans_B != \\ref rocsparse_operation_conjugate_transpose is supported.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans_A     matrix A operation type.\n  @param[in]\n  trans_B     matrix B operation type.\n  @param[in]\n  m           number of rows of the sparse CSR matrix A.\n  @param[in]\n  nrhs        number of columns of the dense matrix op(B).\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix A.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix A.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix A.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix A.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix A.\n  @param[inout]\n  B           array of \\p m \\f$\\times\\f$ \\p nrhs elements of the rhs matrix B.\n  @param[in]\n  ldb         leading dimension of rhs matrix B.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[in]\n  policy      \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p nrhs or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p alpha, \\p descr, \\p csr_val,\n              \\p csr_row_ptr, \\p csr_col_ind, \\p B, \\p info or \\p temp_buffer pointer\n              is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans_A == \\ref rocsparse_operation_conjugate_transpose,\n              \\p trans_B == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  Consider the lower triangular \\f$m \\times m\\f$ matrix \\f$L\\f$, stored in CSR\n  storage format with unit diagonal. The following example solves \\f$L \\cdot X = B\\f$.\n  \\code{.c}\n      // Create rocSPARSE handle\n      rocsparse_handle handle;\n      rocsparse_create_handle(&handle);\n\n      // Create matrix descriptor\n      rocsparse_mat_descr descr;\n      rocsparse_create_mat_descr(&descr);\n      rocsparse_set_mat_fill_mode(descr, rocsparse_fill_mode_lower);\n      rocsparse_set_mat_diag_type(descr, rocsparse_diag_type_unit);\n\n      // Create matrix info structure\n      rocsparse_mat_info info;\n      rocsparse_create_mat_info(&info);\n\n      // Obtain required buffer size\n      size_t buffer_size;\n      rocsparse_dcsrsm_buffer_size(handle,\n                                   rocsparse_operation_none,\n                                   rocsparse_operation_none,\n                                   m,\n                                   nrhs,\n                                   nnz,\n                                   &alpha,\n                                   descr,\n                                   csr_val,\n                                   csr_row_ptr,\n                                   csr_col_ind,\n                                   B,\n                                   ldb,\n                                   info,\n                                   rocsparse_solve_policy_auto,\n                                   &buffer_size);\n\n      // Allocate temporary buffer\n      void* temp_buffer;\n      hipMalloc(&temp_buffer, buffer_size);\n\n      // Perform analysis step\n      rocsparse_dcsrsm_analysis(handle,\n                                rocsparse_operation_none,\n                                rocsparse_operation_none,\n                                m,\n                                nrhs,\n                                nnz,\n                                &alpha,\n                                descr,\n                                csr_val,\n                                csr_row_ptr,\n                                csr_col_ind,\n                                B,\n                                ldb,\n                                info,\n                                rocsparse_analysis_policy_reuse,\n                                rocsparse_solve_policy_auto,\n                                temp_buffer);\n\n      // Solve LX = B\n      rocsparse_dcsrsm_solve(handle,\n                             rocsparse_operation_none,\n                             rocsparse_operation_none,\n                             m,\n                             nrhs,\n                             nnz,\n                             &alpha,\n                             descr,\n                             csr_val,\n                             csr_row_ptr,\n                             csr_col_ind,\n                             B,\n                             ldb,\n                             info,\n                             rocsparse_solve_policy_auto,\n                             temp_buffer);\n\n      // No zero pivot should be found, with L having unit diagonal\n\n      // Clean up\n      hipFree(temp_buffer);\n      rocsparse_destroy_mat_info(info);\n      rocsparse_destroy_mat_descr(descr);\n      rocsparse_destroy_handle(handle);\n  \\endcode\n/\n/**@{"]
-    pub fn rocsparse_scsrsm_solve(
-        handle: rocsparse_handle,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        m: rocsparse_int,
-        nrhs: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const f32,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f32,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        B: *mut f32,
-        ldb: rocsparse_int,
-        info: rocsparse_mat_info,
-        policy: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcsrsm_solve(
-        handle: rocsparse_handle,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        m: rocsparse_int,
-        nrhs: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const f64,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f64,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        B: *mut f64,
-        ldb: rocsparse_int,
-        info: rocsparse_mat_info,
-        policy: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccsrsm_solve(
-        handle: rocsparse_handle,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        m: rocsparse_int,
-        nrhs: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const rocsparse_float_complex,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_float_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        B: *mut rocsparse_float_complex,
-        ldb: rocsparse_int,
-        info: rocsparse_mat_info,
-        policy: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcsrsm_solve(
-        handle: rocsparse_handle,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        m: rocsparse_int,
-        nrhs: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const rocsparse_double_complex,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_double_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        B: *mut rocsparse_double_complex,
-        ldb: rocsparse_int,
-        info: rocsparse_mat_info,
-        policy: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level3_module\n  \\brief Sparse triangular system solve using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrsm_zero_pivot returns \\ref rocsparse_status_zero_pivot, if either a\n  structural or numerical zero has been found during rocsparse_sbsrsm_solve(),\n  rocsparse_dbsrsm_solve(), rocsparse_cbsrsm_solve() or rocsparse_zbsrsm_solve()\n  computation. The first zero pivot \\f$j\\f$ at \\f$A_{j,j}\\f$ is stored in \\p position,\n  using same index base as the BSR matrix.\n\n  \\p position can be in host or device memory. If no zero pivot has been found,\n  \\p position is set to -1 and \\ref rocsparse_status_success is returned instead.\n\n  \\note \\p rocsparse_bsrsm_zero_pivot is a blocking function. It might influence\n  performance negatively.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[inout]\n  position    pointer to zero pivot \\f$j\\f$, can be in host or device memory.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info or \\p position pointer is\n              invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_zero_pivot zero pivot has been found."]
-    pub fn rocsparse_bsrsm_zero_pivot(
-        handle: rocsparse_handle,
-        info: rocsparse_mat_info,
-        position: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level3_module\n  \\brief Sparse triangular system solve using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrsm_buffer_size returns the size of the temporary storage buffer that\n  is required by rocsparse_sbsrsm_analysis(), rocsparse_dbsrsm_analysis(),\n  rocsparse_cbsrsm_analysis(), rocsparse_zbsrsm_analysis(), rocsparse_sbsrsm_solve(),\n  rocsparse_dbsrsm_solve(), rocsparse_cbsrsm_solve() and rocsparse_zbsrsm_solve(). The\n  temporary storage buffer must be allocated by the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         matrix storage of BSR blocks.\n  @param[in]\n  trans_A     matrix A operation type.\n  @param[in]\n  trans_X     matrix X operation type.\n  @param[in]\n  mb          number of block rows of the sparse BSR matrix A.\n  @param[in]\n  nrhs        number of columns of the dense matrix op(X).\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse BSR matrix A.\n  @param[in]\n  descr       descriptor of the sparse BSR matrix A.\n  @param[in]\n  bsr_val     array of \\p nnzb blocks of the sparse BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of\n              the sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb containing the block column indices of the sparse\n              BSR matrix.\n  @param[in]\n  block_dim   block dimension of the sparse BSR matrix.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_sbsrsm_analysis(), rocsparse_dbsrsm_analysis(),\n              rocsparse_cbsrsm_analysis(), rocsparse_zbsrsm_analysis(),\n              rocsparse_sbsrsm_solve(), rocsparse_dbsrsm_solve(),\n              rocsparse_cbsrsm_solve() and rocsparse_zbsrsm_solve().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nrhs, \\p nnzb or \\p block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p bsr_val,\n              \\p bsr_row_ptr, \\p bsr_col_ind, \\p info or \\p buffer_size pointer\n              is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans_A == \\ref rocsparse_operation_conjugate_transpose,\n              \\p trans_X == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
-    pub fn rocsparse_sbsrsm_buffer_size(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans_A: rocsparse_operation,
-        trans_X: rocsparse_operation,
-        mb: rocsparse_int,
-        nrhs: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const f32,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dbsrsm_buffer_size(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans_A: rocsparse_operation,
-        trans_X: rocsparse_operation,
-        mb: rocsparse_int,
-        nrhs: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const f64,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cbsrsm_buffer_size(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans_A: rocsparse_operation,
-        trans_X: rocsparse_operation,
-        mb: rocsparse_int,
-        nrhs: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_float_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zbsrsm_buffer_size(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans_A: rocsparse_operation,
-        trans_X: rocsparse_operation,
-        mb: rocsparse_int,
-        nrhs: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_double_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level3_module\n  \\brief Sparse triangular system solve using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrsm_analysis performs the analysis step for rocsparse_sbsrsm_solve(),\n  rocsparse_dbsrsm_solve(), rocsparse_cbsrsm_solve() and rocsparse_zbsrsm_solve(). It\n  is expected that this function will be executed only once for a given matrix and\n  particular operation type. The analysis meta data can be cleared by\n  rocsparse_bsrsm_clear().\n\n  \\p rocsparse_bsrsm_analysis can share its meta data with\n  rocsparse_sbsrilu0_analysis(), rocsparse_dbsrilu0_analysis(),\n  rocsparse_cbsrilu0_analysis(), rocsparse_zbsrilu0_analysis(),\n  rocsparse_sbsric0_analysis(), rocsparse_dbsric0_analysis(),\n  rocsparse_cbsric0_analysis(), rocsparse_zbsric0_analysis(),\n  rocsparse_sbsrsv_analysis(), rocsparse_dbsrsv_analysis(),\n  rocsparse_cbsrsv_analysis() and rocsparse_zbsrsv_analysis(). Selecting\n  \\ref rocsparse_analysis_policy_reuse policy can greatly improve computation\n  performance of meta data. However, the user need to make sure that the sparsity\n  pattern remains unchanged. If this cannot be assured,\n  \\ref rocsparse_analysis_policy_force has to be used.\n\n  \\note\n  If the matrix sparsity pattern changes, the gathered information will become invalid.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         matrix storage of BSR blocks.\n  @param[in]\n  trans_A     matrix A operation type.\n  @param[in]\n  trans_X     matrix X operation type.\n  @param[in]\n  mb          number of block rows of the sparse BSR matrix A.\n  @param[in]\n  nrhs        number of columns of the dense matrix op(X).\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse BSR matrix A.\n  @param[in]\n  descr       descriptor of the sparse BSR matrix A.\n  @param[in]\n  bsr_val     array of \\p nnzb blocks of the sparse BSR matrix A.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of\n              the sparse BSR matrix A.\n  @param[in]\n  bsr_col_ind array of \\p nnzb containing the block column indices of the sparse\n              BSR matrix A.\n  @param[in]\n  block_dim   block dimension of the sparse BSR matrix A.\n  @param[out]\n  info        structure that holds the information collected during the analysis step.\n  @param[in]\n  analysis    \\ref rocsparse_analysis_policy_reuse or\n              \\ref rocsparse_analysis_policy_force.\n  @param[in]\n  solve       \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nrhs, \\p nnzb or \\p block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p bsr_val, \\p bsr_row_ptr,\n              \\p bsr_col_ind, \\p info or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans_A == \\ref rocsparse_operation_conjugate_transpose,\n              \\p trans_X == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
-    pub fn rocsparse_sbsrsm_analysis(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans_A: rocsparse_operation,
-        trans_X: rocsparse_operation,
-        mb: rocsparse_int,
-        nrhs: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const f32,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        analysis: rocsparse_analysis_policy,
-        solve: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dbsrsm_analysis(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans_A: rocsparse_operation,
-        trans_X: rocsparse_operation,
-        mb: rocsparse_int,
-        nrhs: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const f64,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        analysis: rocsparse_analysis_policy,
-        solve: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cbsrsm_analysis(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans_A: rocsparse_operation,
-        trans_X: rocsparse_operation,
-        mb: rocsparse_int,
-        nrhs: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_float_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        analysis: rocsparse_analysis_policy,
-        solve: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zbsrsm_analysis(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans_A: rocsparse_operation,
-        trans_X: rocsparse_operation,
-        mb: rocsparse_int,
-        nrhs: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_double_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        analysis: rocsparse_analysis_policy,
-        solve: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level3_module\n  \\brief Sparse triangular system solve using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrsm_clear deallocates all memory that was allocated by\n  rocsparse_sbsrsm_analysis(), rocsparse_dbsrsm_analysis(), rocsparse_cbsrsm_analysis()\n  or rocsparse_zbsrsm_analysis(). This is especially useful, if memory is an issue and\n  the analysis data is not required for further computation, e.g. when switching to\n  another sparse matrix format. Calling \\p rocsparse_bsrsm_clear is optional. All\n  allocated resources will be cleared, when the opaque \\ref rocsparse_mat_info struct\n  is destroyed using rocsparse_destroy_mat_info().\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[inout]\n  info        structure that holds the information collected during the analysis step.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info pointer is invalid.\n  \\retval     rocsparse_status_memory_error the buffer holding the meta data could not\n              be deallocated.\n  \\retval     rocsparse_status_internal_error an internal error occurred."]
-    pub fn rocsparse_bsrsm_clear(
-        handle: rocsparse_handle,
-        info: rocsparse_mat_info,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level3_module\n  \\brief Sparse triangular system solve using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrsm_solve solves a sparse triangular linear system of a sparse\n  \\f$m \\times m\\f$ matrix, defined in BSR storage format, a dense solution matrix\n  \\f$X\\f$ and the right-hand side matrix \\f$B\\f$ that is multiplied by \\f$\\alpha\\f$, such that\n  \\f[\n    op(A) \\cdot op(X) = \\alpha \\cdot op(B),\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans_A == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans_A == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans_A == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n  ,\n  \\f[\n    op(X) = \\left\\{\n    \\begin{array}{ll}\n        X,   & \\text{if trans_X == rocsparse_operation_none} \\\\\n        X^T, & \\text{if trans_X == rocsparse_operation_transpose} \\\\\n        X^H, & \\text{if trans_X == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\p rocsparse_bsrsm_solve requires a user allocated temporary buffer. Its size is\n  returned by rocsparse_sbsrsm_buffer_size(), rocsparse_dbsrsm_buffer_size(),\n  rocsparse_cbsrsm_buffer_size() or rocsparse_zbsrsm_buffer_size(). Furthermore,\n  analysis meta data is required. It can be obtained by rocsparse_sbsrsm_analysis(),\n  rocsparse_dbsrsm_analysis(), rocsparse_cbsrsm_analysis() or\n  rocsparse_zbsrsm_analysis(). \\p rocsparse_bsrsm_solve reports the first zero pivot\n  (either numerical or structural zero). The zero pivot status can be checked calling\n  rocsparse_bsrsm_zero_pivot(). If\n  \\ref rocsparse_diag_type == \\ref rocsparse_diag_type_unit, no zero pivot will be\n  reported, even if \\f$A_{j,j} = 0\\f$ for some \\f$j\\f$.\n\n  \\note\n  The sparse BSR matrix has to be sorted.\n\n  \\note\n  Operation type of B and X must match, if \\f$op(B)=B, op(X)=X\\f$.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  Currently, only \\p trans_A != \\ref rocsparse_operation_conjugate_transpose and\n  \\p trans_X != \\ref rocsparse_operation_conjugate_transpose is supported.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         matrix storage of BSR blocks.\n  @param[in]\n  trans_A     matrix A operation type.\n  @param[in]\n  trans_X     matrix X operation type.\n  @param[in]\n  mb          number of block rows of the sparse BSR matrix A.\n  @param[in]\n  nrhs        number of columns of the dense matrix op(X).\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse BSR matrix A.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse BSR matrix A.\n  @param[in]\n  bsr_val     array of \\p nnzb blocks of the sparse BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of\n              the sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb containing the block column indices of the sparse\n              BSR matrix.\n  @param[in]\n  block_dim   block dimension of the sparse BSR matrix.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[in]\n  B           rhs matrix B with leading dimension \\p ldb.\n  @param[in]\n  ldb         leading dimension of rhs matrix B.\n  @param[out]\n  X           solution matrix X with leading dimension \\p ldx.\n  @param[in]\n  ldx         leading dimension of solution matrix X.\n  @param[in]\n  policy      \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nrhs, \\p nnzb or \\p block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p alpha, \\p descr, \\p bsr_val,\n              \\p bsr_row_ptr, \\p bsr_col_ind, \\p B, \\p X \\p info or \\p temp_buffer pointer\n              is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans_A == \\ref rocsparse_operation_conjugate_transpose,\n              \\p trans_X == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
-    pub fn rocsparse_sbsrsm_solve(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans_A: rocsparse_operation,
-        trans_X: rocsparse_operation,
-        mb: rocsparse_int,
-        nrhs: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const f32,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const f32,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        B: *const f32,
-        ldb: rocsparse_int,
-        X: *mut f32,
-        ldx: rocsparse_int,
-        policy: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dbsrsm_solve(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans_A: rocsparse_operation,
-        trans_X: rocsparse_operation,
-        mb: rocsparse_int,
-        nrhs: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const f64,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const f64,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        B: *const f64,
-        ldb: rocsparse_int,
-        X: *mut f64,
-        ldx: rocsparse_int,
-        policy: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cbsrsm_solve(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans_A: rocsparse_operation,
-        trans_X: rocsparse_operation,
-        mb: rocsparse_int,
-        nrhs: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const rocsparse_float_complex,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_float_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        B: *const rocsparse_float_complex,
-        ldb: rocsparse_int,
-        X: *mut rocsparse_float_complex,
-        ldx: rocsparse_int,
-        policy: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zbsrsm_solve(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        trans_A: rocsparse_operation,
-        trans_X: rocsparse_operation,
-        mb: rocsparse_int,
-        nrhs: rocsparse_int,
-        nnzb: rocsparse_int,
-        alpha: *const rocsparse_double_complex,
-        descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_double_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        info: rocsparse_mat_info,
-        B: *const rocsparse_double_complex,
-        ldb: rocsparse_int,
-        X: *mut rocsparse_double_complex,
-        ldx: rocsparse_int,
-        policy: rocsparse_solve_policy,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup level3_module\n  \\brief Dense matrix sparse matrix multiplication using CSR storage format\n\n  \\details\n  \\p rocsparse_gemmi multiplies the scalar \\f$\\alpha\\f$ with a dense \\f$m \\times k\\f$\n  matrix \\f$A\\f$ and the sparse \\f$k \\times n\\f$ matrix \\f$B\\f$, defined in CSR\n  storage format and adds the result to the dense \\f$m \\times n\\f$ matrix \\f$C\\f$ that\n  is multiplied by the scalar \\f$\\beta\\f$, such that\n  \\f[\n    C := \\alpha \\cdot op(A) \\cdot op(B) + \\beta \\cdot C\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans_A == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans_A == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans_A == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n  and\n  \\f[\n    op(B) = \\left\\{\n    \\begin{array}{ll}\n        B,   & \\text{if trans_B == rocsparse_operation_none} \\\\\n        B^T, & \\text{if trans_B == rocsparse_operation_transpose} \\\\\n        B^H, & \\text{if trans_B == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans_A     matrix \\f$A\\f$ operation type.\n  @param[in]\n  trans_B     matrix \\f$B\\f$ operation type.\n  @param[in]\n  m           number of rows of the dense matrix \\f$A\\f$.\n  @param[in]\n  n           number of columns of the sparse CSR matrix \\f$op(B)\\f$ and \\f$C\\f$.\n  @param[in]\n  k           number of columns of the dense matrix \\f$A\\f$.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  A           array of dimension \\f$lda \\times k\\f$ (\\f$op(A) == A\\f$) or\n              \\f$lda \\times m\\f$ (\\f$op(A) == A^T\\f$ or \\f$op(A) == A^H\\f$).\n  @param[in]\n  lda         leading dimension of \\f$A\\f$, must be at least \\f$m\\f$\n              (\\f$op(A) == A\\f$) or \\f$k\\f$ (\\f$op(A) == A^T\\f$ or\n              \\f$op(A) == A^H\\f$).\n  @param[in]\n  descr       descriptor of the sparse CSR matrix \\f$B\\f$. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse CSR\n              matrix \\f$B\\f$.\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  C           array of dimension \\f$ldc \\times n\\f$ that holds the values of \\f$C\\f$.\n  @param[in]\n  ldc         leading dimension of \\f$C\\f$, must be at least \\f$m\\f$.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n, \\p k, \\p nnz, \\p lda or \\p ldc\n              is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p alpha, \\p A, \\p csr_val,\n              \\p csr_row_ptr, \\p csr_col_ind, \\p beta or \\p C pointer is invalid.\n\n  \\par Example\n  This example multiplies a dense matrix with a CSC matrix.\n  \\code{.c}\n      rocsparse_int m   = 2;\n      rocsparse_int n   = 5;\n      rocsparse_int k   = 3;\n      rocsparse_int nnz = 8;\n      rocsparse_int lda = m;\n      rocsparse_int ldc = m;\n\n      // Matrix A (m x k)\n      // (  9.0  10.0  11.0 )\n      // ( 12.0  13.0  14.0 )\n\n      // Matrix B (k x n)\n      // ( 1.0  2.0  0.0  3.0  0.0 )\n      // ( 0.0  4.0  5.0  0.0  0.0 )\n      // ( 6.0  0.0  0.0  7.0  8.0 )\n\n      // Matrix C (m x n)\n      // ( 15.0  16.0  17.0  18.0  19.0 )\n      // ( 20.0  21.0  22.0  23.0  24.0 )\n\n      A[lda * k]           = {9.0, 12.0, 10.0, 13.0, 11.0, 14.0};      // device memory\n      csc_col_ptr_B[n + 1] = {0, 2, 4, 5, 7, 8};                       // device memory\n      csc_row_ind_B[nnz]   = {0, 0, 1, 1, 2, 3, 3, 4};                 // device memory\n      csc_val_B[nnz]       = {1.0, 6.0, 2.0, 4.0, 5.0, 3.0, 7.0, 8.0}; // device memory\n      C[ldc * n]           = {15.0, 20.0, 16.0, 21.0, 17.0, 22.0,      // device memory\n                              18.0, 23.0, 19.0, 24.0};\n\n      // alpha and beta\n      float alpha = 1.0f;\n      float beta  = 0.0f;\n\n      // Perform the matrix multiplication\n      rocsparse_sgemmi(handle,\n                       rocsparse_operation_none,\n                       rocsparse_operation_transpose,\n                       m,\n                       n,\n                       k,\n                       nnz,\n                       &alpha,\n                       A,\n                       lda,\n                       descr_B,\n                       csc_val_B,\n                       csc_col_ptr_B,\n                       csc_row_ind_B,\n                       &beta,\n                       C,\n                       ldc);\n  \\endcode\n/\n/**@{"]
-    pub fn rocsparse_sgemmi(
-        handle: rocsparse_handle,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        k: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const f32,
         A: *const f32,
         lda: rocsparse_int,
+        threshold: *const f32,
+        descr: rocsparse_mat_descr,
+        csr_row_ptr: *mut rocsparse_int,
+        nnz_total_dev_host_ptr: *mut rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dprune_dense2csr_nnz(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        A: *const f64,
+        lda: rocsparse_int,
+        threshold: *const f64,
+        descr: rocsparse_mat_descr,
+        csr_row_ptr: *mut rocsparse_int,
+        nnz_total_dev_host_ptr: *mut rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief\n  This function converts the matrix A in dense format into a sparse matrix in CSR format while pruning values\n  that are less than the threshold. All the parameters are assumed to have been pre-allocated by the user.\n\n  \\details\n  The user first allocates \\p csr_row_ptr to have \\p m+1 elements and then calls rocsparse_xprune_dense2csr_nnz()\n  which fills in the \\p csr_row_ptr array and stores the number of elements that are larger than the pruning threshold\n  in \\p nnz_total_dev_host_ptr. The user then allocates \\p csr_col_ind and \\p csr_val to have size \\p nnz_total_dev_host_ptr\n  and completes the conversion by calling rocsparse_xprune_dense2csr(). A temporary storage buffer is used by both\n  rocsparse_xprune_dense2csr_nnz() and rocsparse_xprune_dense2csr() and must be allocated by the user and whose size is determined\n  by rocsparse_xprune_dense2csr_buffer_size().\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  m           number of rows of the dense matrix \\p A.\n\n  @param[in]\n  n           number of columns of the dense matrix \\p A.\n\n  @param[in]\n  A           array of dimensions (\\p lda, \\p n)\n\n  @param[in]\n  lda         leading dimension of dense array \\p A.\n\n  @param[in]\n  threshold   pointer to the non-negative pruning threshold which can exist in either host or device memory.\n\n  @param[in]\n  descr      the descriptor of the dense matrix \\p A, the supported matrix type is rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[out]\n  csr_val\n              array of nnz ( = \\p csr_row_ptr[m] - \\p csr_row_ptr[0] ) nonzero elements of matrix \\p A.\n  @param[in]\n  csr_row_ptr\n              integer array of \\p m+1 elements that contains the start of every row and the end of the last row plus one.\n  @param[out]\n  csr_col_ind\n              integer array of nnz ( = \\p csr_row_ptr[m] - csr_row_ptr[0] ) column indices of the non-zero elements of matrix \\p A.\n\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user, size is returned by\n              rocsparse_xprune_dense2csr_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p lda is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p A or \\p descr or \\p threshold or \\p csr_val\n              or \\p csr_row_ptr or \\p csr_col_ind or \\p temp_buffer pointer is invalid.\n/\n/**@{"]
+    pub fn rocsparse_sprune_dense2csr(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        A: *const f32,
+        lda: rocsparse_int,
+        threshold: *const f32,
+        descr: rocsparse_mat_descr,
+        csr_val: *mut f32,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *mut rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dprune_dense2csr(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        A: *const f64,
+        lda: rocsparse_int,
+        threshold: *const f64,
+        descr: rocsparse_mat_descr,
+        csr_val: *mut f64,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *mut rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup conv_module\n  \\brief\n  This function computes the size of the user allocated temporary storage buffer used when converting and pruning by percentage a\n  dense matrix to a CSR matrix.\n\n  \\details\n  When converting and pruning a dense matrix A to a CSR matrix by percentage the following steps are performed. First the user\n  calls \\p rocsparse_prune_dense2csr_by_percentage_buffer_size which determines the size of the temporary storage buffer. Once\n  determined, this buffer must be allocated by the user. Next the user allocates the csr_row_ptr array to have \\p m+1 elements\n  and calls \\p rocsparse_prune_dense2csr_nnz_by_percentage. Finally the user finishes the conversion by allocating the csr_col_ind\n  and csr_val arrays (whos size is determined by the value at nnz_total_dev_host_ptr) and calling \\p rocsparse_prune_dense2csr_by_percentage.\n\n  The pruning by percentage works by first sorting the absolute values of the dense matrix \\p A. We then determine a position in this\n  sorted array by\n  \\f[\n    pos = ceil(m*n*(percentage/100)) - 1\n    pos = min(pos, m*n-1)\n    pos = max(pos, 0)\n    threshold = sorted_A[pos]\n  \\f]\n  Once we have this threshold we prune values in the dense matrix \\p A as in \\p rocsparse_prune_dense2csr.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  m           number of rows of the dense matrix \\p A.\n\n  @param[in]\n  n           number of columns of the dense matrix \\p A.\n\n  @param[in]\n  A           array of dimensions (\\p lda, \\p n)\n\n  @param[in]\n  lda         leading dimension of dense array \\p A.\n\n  @param[in]\n  percentage  percentage >= 0 and percentage <= 100.\n\n  @param[in]\n  descr      the descriptor of the dense matrix \\p A, the supported matrix type is rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[in]\n  csr_val    array of nnz ( = \\p csr_row_ptr[m] - \\p csr_row_ptr[0] ) nonzero elements of matrix \\p A.\n\n  @param[in]\n  csr_row_ptr integer array of \\p m+1 elements that contains the start of every row and the end of the last row plus one.\n\n  @param[in]\n  csr_col_ind integer array of nnz ( = \\p csr_row_ptr[m] - csr_row_ptr[0] ) column indices of the non-zero elements of matrix \\p A.\n\n  @param[in]\n  info prune information structure\n\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_sprune_dense2csr_nnz_by_percentage(), rocsparse_dprune_dense2csr_nnz_by_percentage(),\n              rocsparse_sprune_dense2csr_by_percentage() and rocsparse_dprune_dense2csr_by_percentage().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p buffer_size pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n/\n/**@{"]
+    pub fn rocsparse_sprune_dense2csr_by_percentage_buffer_size(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        A: *const f32,
+        lda: rocsparse_int,
+        percentage: f32,
         descr: rocsparse_mat_descr,
         csr_val: *const f32,
         csr_row_ptr: *const rocsparse_int,
         csr_col_ind: *const rocsparse_int,
-        beta: *const f32,
-        C: *mut f32,
-        ldc: rocsparse_int,
+        info: rocsparse_mat_info,
+        buffer_size: *mut usize,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_dgemmi(
+    pub fn rocsparse_dprune_dense2csr_by_percentage_buffer_size(
         handle: rocsparse_handle,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
         m: rocsparse_int,
         n: rocsparse_int,
-        k: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const f64,
         A: *const f64,
         lda: rocsparse_int,
+        percentage: f64,
         descr: rocsparse_mat_descr,
         csr_val: *const f64,
         csr_row_ptr: *const rocsparse_int,
         csr_col_ind: *const rocsparse_int,
-        beta: *const f64,
-        C: *mut f64,
-        ldc: rocsparse_int,
+        info: rocsparse_mat_info,
+        buffer_size: *mut usize,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_cgemmi(
+    #[doc = " \\ingroup conv_module\n  \\brief\n  This function computes the number of nonzero elements per row and the total number of nonzero elements in a dense matrix\n  when converting and pruning by percentage a dense matrix to a CSR matrix.\n\n  \\details\n  When converting and pruning a dense matrix A to a CSR matrix by percentage the following steps are performed. First the user\n  calls \\p rocsparse_prune_dense2csr_by_percentage_buffer_size which determines the size of the temporary storage buffer. Once\n  determined, this buffer must be allocated by the user. Next the user allocates the csr_row_ptr array to have \\p m+1 elements\n  and calls \\p rocsparse_prune_dense2csr_nnz_by_percentage. Finally the user finishes the conversion by allocating the csr_col_ind\n  and csr_val arrays (whos size is determined by the value at nnz_total_dev_host_ptr) and calling \\p rocsparse_prune_dense2csr_by_percentage.\n\n  The pruning by percentage works by first sorting the absolute values of the dense matrix \\p A. We then determine a position in this\n  sorted array by\n  \\f[\n    pos = ceil(m*n*(percentage/100)) - 1\n    pos = min(pos, m*n-1)\n    pos = max(pos, 0)\n    threshold = sorted_A[pos]\n  \\f]\n  Once we have this threshold we prune values in the dense matrix \\p A as in \\p rocsparse_prune_dense2csr.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  m           number of rows of the dense matrix \\p A.\n\n  @param[in]\n  n           number of columns of the dense matrix \\p A.\n\n  @param[in]\n  A           array of dimensions (\\p lda, \\p n)\n\n  @param[in]\n  lda         leading dimension of dense array \\p A.\n\n  @param[in]\n  percentage  percentage >= 0 and percentage <= 100.\n\n  @param[in]\n  descr       the descriptor of the dense matrix \\p A.\n\n  @param[out]\n  csr_row_ptr integer array of \\p m+1 elements that contains the start of every row and the end of the last row plus one.\n\n  @param[out]\n  nnz_total_dev_host_ptr total number of nonzero elements in device or host memory.\n\n  @param[in]\n  info prune information structure\n\n  @param[out]\n  temp_buffer buffer allocated by the user whose size is determined by calling rocsparse_xprune_dense2csr_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p lda or \\p percentage is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p A or \\p descr or \\p info or \\p csr_row_ptr\n              or \\p nnz_total_dev_host_ptr or \\p temp_buffer pointer is invalid.\n/\n/**@{"]
+    pub fn rocsparse_sprune_dense2csr_nnz_by_percentage(
         handle: rocsparse_handle,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
         m: rocsparse_int,
         n: rocsparse_int,
-        k: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const rocsparse_float_complex,
-        A: *const rocsparse_float_complex,
+        A: *const f32,
         lda: rocsparse_int,
+        percentage: f32,
         descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_float_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        beta: *const rocsparse_float_complex,
-        C: *mut rocsparse_float_complex,
-        ldc: rocsparse_int,
+        csr_row_ptr: *mut rocsparse_int,
+        nnz_total_dev_host_ptr: *mut rocsparse_int,
+        info: rocsparse_mat_info,
+        temp_buffer: *mut ::std::os::raw::c_void,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    pub fn rocsparse_zgemmi(
+    pub fn rocsparse_dprune_dense2csr_nnz_by_percentage(
         handle: rocsparse_handle,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
         m: rocsparse_int,
         n: rocsparse_int,
-        k: rocsparse_int,
-        nnz: rocsparse_int,
-        alpha: *const rocsparse_double_complex,
-        A: *const rocsparse_double_complex,
+        A: *const f64,
         lda: rocsparse_int,
+        percentage: f64,
         descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_double_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        beta: *const rocsparse_double_complex,
-        C: *mut rocsparse_double_complex,
-        ldc: rocsparse_int,
+        csr_row_ptr: *mut rocsparse_int,
+        nnz_total_dev_host_ptr: *mut rocsparse_int,
+        info: rocsparse_mat_info,
+        temp_buffer: *mut ::std::os::raw::c_void,
     ) -> rocsparse_status;
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup extra_module\n  \\brief Sparse matrix sparse matrix addition using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrgeam_nnz computes the total BSR non-zero elements and the BSR row\n  offsets, that point to the start of every row of the sparse BSR matrix, of the\n  resulting matrix C. It is assumed that \\p bsr_row_ptr_C has been allocated with\n  size \\p mb+1.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  Currently, only \\ref rocsparse_matrix_type_general is supported.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  dir             direction that specifies whether to count nonzero elements by \\ref rocsparse_direction_row or by\n                  \\ref rocsparse_direction_row in the BSR matrices \\f$A\\f$, \\f$B\\f$, and \\f$C\\f$.\n  @param[in]\n  mb              number of block rows in the sparse BSR matrix \\f$op(A)\\f$ and \\f$C\\f$.\n  @param[in]\n  nb              number of block columns of the sparse BSR matrix \\f$op(B)\\f$ and\n                  \\f$C\\f$.\n  @param[in]\n  block_dim       the block dimension of the BSR matrix \\f$A\\f$. Between 1 and m where \\p m=mb*block_dim.\n  @param[in]\n  descr_A         descriptor of the sparse BSR matrix \\f$A\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnzb_A          number of non-zero block entries of the sparse BSR matrix \\f$A\\f$.\n  @param[in]\n  bsr_row_ptr_A   array of \\p mb+1 elements that point to the start of every block row of the\n                  sparse BSR matrix \\f$A\\f$.\n  @param[in]\n  bsr_col_ind_A   array of \\p nnzb_A elements containing the column indices of the\n                  sparse BSR matrix \\f$A\\f$.\n  @param[in]\n  descr_B         descriptor of the sparse BSR matrix \\f$B\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnzb_B          number of non-zero block entries of the sparse BSR matrix \\f$B\\f$.\n  @param[in]\n  bsr_row_ptr_B   array of \\p mb+1 elements that point to the start of every block row of the\n                  sparse BSR matrix \\f$B\\f$.\n  @param[in]\n  bsr_col_ind_B   array of \\p nnzb_B elements containing the block column indices of the\n                  sparse BSR matrix \\f$B\\f$.\n  @param[in]\n  descr_C         descriptor of the sparse BSR matrix \\f$C\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  bsr_row_ptr_C   array of \\p mb+1 elements that point to the start of every block row of the\n                  sparse BSR matrix \\f$C\\f$.\n  @param[out]\n  nnzb_C          pointer to the number of non-zero block entries of the sparse BSR\n                  matrix \\f$C\\f$. \\p nnzb_C can be a host or device pointer.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_size \\p mb, \\p nb, \\p kb, \\p nnzb_A or \\p nnzb_B is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p descr_A, \\p bsr_row_ptr_A,\n          \\p bsr_col_ind_A, \\p descr_B, \\p bsr_row_ptr_B, \\p bsr_col_ind_B,\n          \\p descr_C, \\p bsr_row_ptr_C or \\p nnzb_C is invalid.\n  \\retval rocsparse_status_not_implemented\n          \\p rocsparse_matrix_type != \\ref rocsparse_matrix_type_general."]
+    #[doc = " \\ingroup conv_module\n  \\brief\n  This function converts the matrix A in dense format into a sparse matrix in CSR format while pruning values\n  based on percentage.\n\n  \\details\n  When converting and pruning a dense matrix A to a CSR matrix by percentage the following steps are performed. First the user\n  calls \\p rocsparse_prune_dense2csr_by_percentage_buffer_size which determines the size of the temporary storage buffer. Once\n  determined, this buffer must be allocated by the user. Next the user allocates the csr_row_ptr array to have \\p m+1 elements\n  and calls \\p rocsparse_prune_dense2csr_nnz_by_percentage. Finally the user finishes the conversion by allocating the csr_col_ind\n  and csr_val arrays (whos size is determined by the value at nnz_total_dev_host_ptr) and calling \\p rocsparse_prune_dense2csr_by_percentage.\n\n  The pruning by percentage works by first sorting the absolute values of the dense matrix \\p A. We then determine a position in this\n  sorted array by\n  \\f[\n    pos = ceil(m*n*(percentage/100)) - 1\n    pos = min(pos, m*n-1)\n    pos = max(pos, 0)\n    threshold = sorted_A[pos]\n  \\f]\n  Once we have this threshold we prune values in the dense matrix \\p A as in \\p rocsparse_prune_dense2csr.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  m           number of rows of the dense matrix \\p A.\n\n  @param[in]\n  n           number of columns of the dense matrix \\p A.\n\n  @param[in]\n  A           array of dimensions (\\p lda, \\p n)\n\n  @param[in]\n  lda         leading dimension of dense array \\p A.\n\n  @param[in]\n  percentage  percentage >= 0 and percentage <= 100.\n\n  @param[in]\n  descr       the descriptor of the dense matrix \\p A, the supported matrix type is rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[out]\n  csr_val array of nnz ( = \\p csr_row_ptr[m] - \\p csr_row_ptr[0] ) nonzero elements of matrix \\p A.\n\n  @param[in]\n  csr_row_ptr integer array of \\p m+1 elements that contains the start of every row and the end of the last row plus one.\n\n  @param[out]\n  csr_col_ind integer array of nnz ( = \\p csr_row_ptr[m] - csr_row_ptr[0] ) column indices of the non-zero elements of matrix \\p A.\n\n  @param[in]\n  info prune information structure\n\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user, size is returned by\n              rocsparse_xprune_dense2csr_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p lda or \\p percentage is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p A or \\p descr or \\p info or \\p csr_val\n              or \\p csr_row_ptr or \\p csr_col_ind or \\p temp_buffer pointer is invalid.\n/\n/**@{"]
+    pub fn rocsparse_sprune_dense2csr_by_percentage(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        A: *const f32,
+        lda: rocsparse_int,
+        percentage: f32,
+        descr: rocsparse_mat_descr,
+        csr_val: *mut f32,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *mut rocsparse_int,
+        info: rocsparse_mat_info,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dprune_dense2csr_by_percentage(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        A: *const f64,
+        lda: rocsparse_int,
+        percentage: f64,
+        descr: rocsparse_mat_descr,
+        csr_val: *mut f64,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *mut rocsparse_int,
+        info: rocsparse_mat_info,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup extra_module\n  \\brief Sparse matrix sparse matrix addition using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrgeam_nnz computes the total BSR non-zero elements and the BSR row\n  offsets, that point to the start of every row of the sparse BSR matrix, of the\n  resulting matrix C. It is assumed that \\p bsr_row_ptr_C has been allocated with\n  size \\p mb+1.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  Currently, only \\ref rocsparse_matrix_type_general is supported.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  dir             direction that specifies whether to count nonzero elements by \\ref rocsparse_direction_row or by\n                  \\ref rocsparse_direction_row in the BSR matrices \\f$A\\f$, \\f$B\\f$, and \\f$C\\f$.\n  @param[in]\n  mb              number of block rows in the sparse BSR matrix \\f$op(A)\\f$ and \\f$C\\f$.\n  @param[in]\n  nb              number of block columns of the sparse BSR matrix \\f$op(B)\\f$ and\n                  \\f$C\\f$.\n  @param[in]\n  block_dim       the block dimension of the BSR matrix \\f$A\\f$. Between 1 and m where \\p m=mb*block_dim.\n  @param[in]\n  descr_A         descriptor of the sparse BSR matrix \\f$A\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnzb_A          number of non-zero block entries of the sparse BSR matrix \\f$A\\f$.\n  @param[in]\n  bsr_row_ptr_A   array of \\p mb+1 elements that point to the start of every block row of the\n                  sparse BSR matrix \\f$A\\f$.\n  @param[in]\n  bsr_col_ind_A   array of \\p nnzb_A elements containing the column indices of the\n                  sparse BSR matrix \\f$A\\f$.\n  @param[in]\n  descr_B         descriptor of the sparse BSR matrix \\f$B\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnzb_B          number of non-zero block entries of the sparse BSR matrix \\f$B\\f$.\n  @param[in]\n  bsr_row_ptr_B   array of \\p mb+1 elements that point to the start of every block row of the\n                  sparse BSR matrix \\f$B\\f$.\n  @param[in]\n  bsr_col_ind_B   array of \\p nnzb_B elements containing the block column indices of the\n                  sparse BSR matrix \\f$B\\f$.\n  @param[in]\n  descr_C         descriptor of the sparse BSR matrix \\f$C\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  bsr_row_ptr_C   array of \\p mb+1 elements that point to the start of every block row of the\n                  sparse BSR matrix \\f$C\\f$.\n  @param[out]\n  nnzb_C          pointer to the number of non-zero block entries of the sparse BSR\n                  matrix \\f$C\\f$. \\p nnzb_C can be a host or device pointer.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_size \\p mb, \\p nb, \\p kb, \\p nnzb_A or \\p nnzb_B is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p descr_A, \\p bsr_row_ptr_A,\n          \\p bsr_col_ind_A, \\p descr_B, \\p bsr_row_ptr_B, \\p bsr_col_ind_B,\n          \\p descr_C, \\p bsr_row_ptr_C or \\p nnzb_C is invalid.\n  \\retval rocsparse_status_not_implemented\n          \\p rocsparse_matrix_type != \\ref rocsparse_matrix_type_general."]
     pub fn rocsparse_bsrgeam_nnzb(
         handle: rocsparse_handle,
         dir: rocsparse_direction,
@@ -5433,124 +4385,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup extra_module\n  \\brief Sparse matrix sparse matrix addition using CSR storage format\n\n  \\details\n  \\p rocsparse_csrgeam_nnz computes the total CSR non-zero elements and the CSR row\n  offsets, that point to the start of every row of the sparse CSR matrix, of the\n  resulting matrix C. It is assumed that \\p csr_row_ptr_C has been allocated with\n  size \\p m+1.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  Currently, only \\ref rocsparse_matrix_type_general is supported.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  m               number of rows of the sparse CSR matrix \\f$A\\f$, \\f$B\\f$ and \\f$C\\f$.\n  @param[in]\n  n               number of columns of the sparse CSR matrix \\f$A\\f$, \\f$B\\f$ and \\f$C\\f$.\n  @param[in]\n  descr_A         descriptor of the sparse CSR matrix \\f$A\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnz_A           number of non-zero entries of the sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  csr_row_ptr_A   array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  csr_col_ind_A   array of \\p nnz_A elements containing the column indices of the\n                  sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  descr_B         descriptor of the sparse CSR matrix \\f$B\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnz_B           number of non-zero entries of the sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  csr_row_ptr_B   array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  csr_col_ind_B   array of \\p nnz_B elements containing the column indices of the\n                  sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  descr_C         descriptor of the sparse CSR matrix \\f$C\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  csr_row_ptr_C   array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix \\f$C\\f$.\n  @param[out]\n  nnz_C           pointer to the number of non-zero entries of the sparse CSR\n                  matrix \\f$C\\f$. \\p nnz_C can be a host or device pointer.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_size \\p m, \\p n, \\p nnz_A or \\p nnz_B is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p descr_A, \\p csr_row_ptr_A,\n          \\p csr_col_ind_A, \\p descr_B, \\p csr_row_ptr_B, \\p csr_col_ind_B,\n          \\p descr_C, \\p csr_row_ptr_C or \\p nnz_C is invalid.\n  \\retval rocsparse_status_not_implemented\n          \\p rocsparse_matrix_type != \\ref rocsparse_matrix_type_general."]
-    pub fn rocsparse_csrgeam_nnz(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr_A: rocsparse_mat_descr,
-        nnz_A: rocsparse_int,
-        csr_row_ptr_A: *const rocsparse_int,
-        csr_col_ind_A: *const rocsparse_int,
-        descr_B: rocsparse_mat_descr,
-        nnz_B: rocsparse_int,
-        csr_row_ptr_B: *const rocsparse_int,
-        csr_col_ind_B: *const rocsparse_int,
-        descr_C: rocsparse_mat_descr,
-        csr_row_ptr_C: *mut rocsparse_int,
-        nnz_C: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup extra_module\n  \\brief Sparse matrix sparse matrix addition using CSR storage format\n\n  \\details\n  \\p rocsparse_csrgeam multiplies the scalar \\f$\\alpha\\f$ with the sparse\n  \\f$m \\times n\\f$ matrix \\f$A\\f$, defined in CSR storage format, multiplies the\n  scalar \\f$\\beta\\f$ with the sparse \\f$m \\times n\\f$ matrix \\f$B\\f$, defined in CSR\n  storage format, and adds both resulting matrices to obtain the sparse\n  \\f$m \\times n\\f$ matrix \\f$C\\f$, defined in CSR storage format, such that\n  \\f[\n    C := \\alpha \\cdot A + \\beta \\cdot B.\n  \\f]\n\n  It is assumed that \\p csr_row_ptr_C has already been filled and that \\p csr_val_C and\n  \\p csr_col_ind_C are allocated by the user. \\p csr_row_ptr_C and allocation size of\n  \\p csr_col_ind_C and \\p csr_val_C is defined by the number of non-zero elements of\n  the sparse CSR matrix C. Both can be obtained by rocsparse_csrgeam_nnz().\n\n  \\note Both scalars \\f$\\alpha\\f$ and \\f$beta\\f$ have to be valid.\n\n  \\note Currently, only \\ref rocsparse_matrix_type_general is supported.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  m               number of rows of the sparse CSR matrix \\f$A\\f$, \\f$B\\f$ and \\f$C\\f$.\n  @param[in]\n  n               number of columns of the sparse CSR matrix \\f$A\\f$, \\f$B\\f$ and \\f$C\\f$.\n  @param[in]\n  alpha           scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr_A         descriptor of the sparse CSR matrix \\f$A\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnz_A           number of non-zero entries of the sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  csr_val_A       array of \\p nnz_A elements of the sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  csr_row_ptr_A   array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  csr_col_ind_A   array of \\p nnz_A elements containing the column indices of the\n                  sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  beta            scalar \\f$\\beta\\f$.\n  @param[in]\n  descr_B         descriptor of the sparse CSR matrix \\f$B\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnz_B           number of non-zero entries of the sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  csr_val_B       array of \\p nnz_B elements of the sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  csr_row_ptr_B   array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  csr_col_ind_B   array of \\p nnz_B elements containing the column indices of the\n                  sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  descr_C         descriptor of the sparse CSR matrix \\f$C\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  csr_val_C       array of elements of the sparse CSR matrix \\f$C\\f$.\n  @param[in]\n  csr_row_ptr_C   array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix \\f$C\\f$.\n  @param[out]\n  csr_col_ind_C   array of elements containing the column indices of the\n                  sparse CSR matrix \\f$C\\f$.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_size \\p m, \\p n, \\p nnz_A or \\p nnz_B is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p alpha, \\p descr_A, \\p csr_val_A,\n          \\p csr_row_ptr_A, \\p csr_col_ind_A, \\p beta, \\p descr_B, \\p csr_val_B,\n          \\p csr_row_ptr_B, \\p csr_col_ind_B, \\p descr_C, \\p csr_val_C,\n          \\p csr_row_ptr_C or \\p csr_col_ind_C is invalid.\n  \\retval rocsparse_status_not_implemented\n          \\p rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  This example adds two CSR matrices.\n  \\code{.c}\n  // Initialize scalar multipliers\n  float alpha = 1.0f;\n  float beta  = 1.0f;\n\n  // Create matrix descriptors\n  rocsparse_mat_descr descr_A;\n  rocsparse_mat_descr descr_B;\n  rocsparse_mat_descr descr_C;\n\n  rocsparse_create_mat_descr(&descr_A);\n  rocsparse_create_mat_descr(&descr_B);\n  rocsparse_create_mat_descr(&descr_C);\n\n  // Set pointer mode\n  rocsparse_set_pointer_mode(handle, rocsparse_pointer_mode_host);\n\n  // Obtain number of total non-zero entries in C and row pointers of C\n  rocsparse_int nnz_C;\n  hipMalloc((void**)&csr_row_ptr_C, sizeof(rocsparse_int) * (m + 1));\n\n  rocsparse_csrgeam_nnz(handle,\n                        m,\n                        n,\n                        descr_A,\n                        nnz_A,\n                        csr_row_ptr_A,\n                        csr_col_ind_A,\n                        descr_B,\n                        nnz_B,\n                        csr_row_ptr_B,\n                        csr_col_ind_B,\n                        descr_C,\n                        csr_row_ptr_C,\n                        &nnz_C);\n\n  // Compute column indices and values of C\n  hipMalloc((void**)&csr_col_ind_C, sizeof(rocsparse_int) * nnz_C);\n  hipMalloc((void**)&csr_val_C, sizeof(float) * nnz_C);\n\n  rocsparse_scsrgeam(handle,\n                     m,\n                     n,\n                     &alpha,\n                     descr_A,\n                     nnz_A,\n                     csr_val_A,\n                     csr_row_ptr_A,\n                     csr_col_ind_A,\n                     &beta,\n                     descr_B,\n                     nnz_B,\n                     csr_val_B,\n                     csr_row_ptr_B,\n                     csr_col_ind_B,\n                     descr_C,\n                     csr_val_C,\n                     csr_row_ptr_C,\n                     csr_col_ind_C);\n  \\endcode\n/\n/**@{"]
-    pub fn rocsparse_scsrgeam(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        alpha: *const f32,
-        descr_A: rocsparse_mat_descr,
-        nnz_A: rocsparse_int,
-        csr_val_A: *const f32,
-        csr_row_ptr_A: *const rocsparse_int,
-        csr_col_ind_A: *const rocsparse_int,
-        beta: *const f32,
-        descr_B: rocsparse_mat_descr,
-        nnz_B: rocsparse_int,
-        csr_val_B: *const f32,
-        csr_row_ptr_B: *const rocsparse_int,
-        csr_col_ind_B: *const rocsparse_int,
-        descr_C: rocsparse_mat_descr,
-        csr_val_C: *mut f32,
-        csr_row_ptr_C: *const rocsparse_int,
-        csr_col_ind_C: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcsrgeam(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        alpha: *const f64,
-        descr_A: rocsparse_mat_descr,
-        nnz_A: rocsparse_int,
-        csr_val_A: *const f64,
-        csr_row_ptr_A: *const rocsparse_int,
-        csr_col_ind_A: *const rocsparse_int,
-        beta: *const f64,
-        descr_B: rocsparse_mat_descr,
-        nnz_B: rocsparse_int,
-        csr_val_B: *const f64,
-        csr_row_ptr_B: *const rocsparse_int,
-        csr_col_ind_B: *const rocsparse_int,
-        descr_C: rocsparse_mat_descr,
-        csr_val_C: *mut f64,
-        csr_row_ptr_C: *const rocsparse_int,
-        csr_col_ind_C: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccsrgeam(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        alpha: *const rocsparse_float_complex,
-        descr_A: rocsparse_mat_descr,
-        nnz_A: rocsparse_int,
-        csr_val_A: *const rocsparse_float_complex,
-        csr_row_ptr_A: *const rocsparse_int,
-        csr_col_ind_A: *const rocsparse_int,
-        beta: *const rocsparse_float_complex,
-        descr_B: rocsparse_mat_descr,
-        nnz_B: rocsparse_int,
-        csr_val_B: *const rocsparse_float_complex,
-        csr_row_ptr_B: *const rocsparse_int,
-        csr_col_ind_B: *const rocsparse_int,
-        descr_C: rocsparse_mat_descr,
-        csr_val_C: *mut rocsparse_float_complex,
-        csr_row_ptr_C: *const rocsparse_int,
-        csr_col_ind_C: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcsrgeam(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        alpha: *const rocsparse_double_complex,
-        descr_A: rocsparse_mat_descr,
-        nnz_A: rocsparse_int,
-        csr_val_A: *const rocsparse_double_complex,
-        csr_row_ptr_A: *const rocsparse_int,
-        csr_col_ind_A: *const rocsparse_int,
-        beta: *const rocsparse_double_complex,
-        descr_B: rocsparse_mat_descr,
-        nnz_B: rocsparse_int,
-        csr_val_B: *const rocsparse_double_complex,
-        csr_row_ptr_B: *const rocsparse_int,
-        csr_col_ind_B: *const rocsparse_int,
-        descr_C: rocsparse_mat_descr,
-        csr_val_C: *mut rocsparse_double_complex,
-        csr_row_ptr_C: *const rocsparse_int,
-        csr_col_ind_C: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup extra_module\n  \\brief Sparse matrix sparse matrix multiplication using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrgemm_buffer_size returns the size of the temporary storage buffer\n  that is required by rocsparse_bsrgemm_nnzb(), rocsparse_sbsrgemm(),\n  rocsparse_dbsrgemm(), rocsparse_cbsrgemm() and rocsparse_zbsrgemm(). The temporary\n  storage buffer must be allocated by the user.\n\n  \\note\n  This function is blocking with respect to the host.\n  \\note\n  Currently, only \\p trans_A == \\p trans_B == \\ref rocsparse_operation_none is\n  supported.\n  \\note\n  Currently, only \\ref rocsparse_matrix_type_general is supported.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  dir             direction that specifies whether to count nonzero elements by \\ref rocsparse_direction_row or by\n                  \\ref rocsparse_direction_row in the BSR matrices \\f$A\\f$, \\f$B\\f$, \\f$C\\f$, and \\f$D\\f$.\n  @param[in]\n  trans_A         matrix \\f$A\\f$ operation type.\n  @param[in]\n  trans_B         matrix \\f$B\\f$ operation type.\n  @param[in]\n  mb              number of block rows in the sparse BSR matrix \\f$op(A)\\f$ and \\f$C\\f$.\n  @param[in]\n  nb              number of block columns of the sparse BSR matrix \\f$op(B)\\f$ and\n                  \\f$C\\f$.\n  @param[in]\n  kb              number of block columns of the sparse BSR matrix \\f$op(A)\\f$ and number of\n                  rows of the sparse BSR matrix \\f$op(B)\\f$.\n  @param[in]\n  block_dim       the block dimension of the BSR matrix \\f$A\\f$, \\f$B\\f$, \\f$C\\f$, and \\f$D\\f$.\n  @param[in]\n  alpha           scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr_A         descriptor of the sparse BSR matrix \\f$A\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnzb_A          number of non-zero block entries of the sparse BSR matrix \\f$A\\f$.\n  @param[in]\n  bsr_row_ptr_A   array of \\p mb+1 elements (\\f$op(A) == A\\f$, \\p kb+1 otherwise)\n                  that point to the start of every block row of the sparse BSR matrix\n                  \\f$op(A)\\f$.\n  @param[in]\n  bsr_col_ind_A   array of \\p nnzb_A elements containing the block column indices of the\n                  sparse BSR matrix \\f$A\\f$.\n  @param[in]\n  descr_B         descriptor of the sparse BSR matrix \\f$B\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnzb_B          number of non-zero block entries of the sparse BSR matrix \\f$B\\f$.\n  @param[in]\n  bsr_row_ptr_B   array of \\p kb+1 elements (\\f$op(B) == B\\f$, \\p mb+1 otherwise)\n                  that point to the start of every block row of the sparse BSR matrix\n                  \\f$op(B)\\f$.\n  @param[in]\n  bsr_col_ind_B   array of \\p nnzb_B elements containing the block column indices of the\n                  sparse BSR matrix \\f$B\\f$.\n  @param[in]\n  beta            scalar \\f$\\beta\\f$.\n  @param[in]\n  descr_D         descriptor of the sparse BSR matrix \\f$D\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnzb_D          number of non-zero block entries of the sparse BSR matrix \\f$D\\f$.\n  @param[in]\n  bsr_row_ptr_D   array of \\p mb+1 elements that point to the start of every block row of the\n                  sparse BSR matrix \\f$D\\f$.\n  @param[in]\n  bsr_col_ind_D   array of \\p nnzb_D elements containing the block column indices of the sparse\n                  BSR matrix \\f$D\\f$.\n  @param[inout]\n  info_C          structure that holds meta data for the sparse BSR matrix \\f$C\\f$.\n  @param[out]\n  buffer_size     number of bytes of the temporary storage buffer required by\n                  rocsparse_bsrgemm_nnzb(), rocsparse_sbsrgemm(), rocsparse_dbsrgemm(),\n                  rocsparse_cbsrgemm() and rocsparse_zbsrgemm().\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_size \\p mb, \\p nb, \\p kb, \\p block_dim, \\p nnzb_A, \\p nnzb_B or\n          \\p nnzb_D is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p alpha and \\p beta are invalid,\n          \\p descr_A, \\p bsr_row_ptr_A, \\p bsr_col_ind_A, \\p descr_B,\n          \\p bsr_row_ptr_B or \\p bsr_col_ind_B are invalid if \\p alpha is valid,\n          \\p descr_D, \\p bsr_row_ptr_D or \\p bsr_col_ind_D is invalid if \\p beta is\n          valid, \\p info_C or \\p buffer_size is invalid.\n  \\retval rocsparse_status_not_implemented\n          \\p trans_A != \\ref rocsparse_operation_none,\n          \\p trans_B != \\ref rocsparse_operation_none, or\n          \\p rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
+    #[doc = " \\ingroup extra_module\n  \\brief Sparse matrix sparse matrix multiplication using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrgemm_buffer_size returns the size of the temporary storage buffer\n  that is required by rocsparse_bsrgemm_nnzb(), rocsparse_sbsrgemm(),\n  rocsparse_dbsrgemm(), rocsparse_cbsrgemm() and rocsparse_zbsrgemm(). The temporary\n  storage buffer must be allocated by the user.\n\n  \\note\n  This function is blocking with respect to the host.\n  \\note\n  Currently, only \\p trans_A == \\p trans_B == \\ref rocsparse_operation_none is\n  supported.\n  \\note\n  Currently, only \\ref rocsparse_matrix_type_general is supported.\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  dir             direction that specifies whether to count nonzero elements by \\ref rocsparse_direction_row or by\n                  \\ref rocsparse_direction_row in the BSR matrices \\f$A\\f$, \\f$B\\f$, \\f$C\\f$, and \\f$D\\f$.\n  @param[in]\n  trans_A         matrix \\f$A\\f$ operation type.\n  @param[in]\n  trans_B         matrix \\f$B\\f$ operation type.\n  @param[in]\n  mb              number of block rows in the sparse BSR matrix \\f$op(A)\\f$ and \\f$C\\f$.\n  @param[in]\n  nb              number of block columns of the sparse BSR matrix \\f$op(B)\\f$ and\n                  \\f$C\\f$.\n  @param[in]\n  kb              number of block columns of the sparse BSR matrix \\f$op(A)\\f$ and number of\n                  rows of the sparse BSR matrix \\f$op(B)\\f$.\n  @param[in]\n  block_dim       the block dimension of the BSR matrix \\f$A\\f$, \\f$B\\f$, \\f$C\\f$, and \\f$D\\f$.\n  @param[in]\n  alpha           scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr_A         descriptor of the sparse BSR matrix \\f$A\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnzb_A          number of non-zero block entries of the sparse BSR matrix \\f$A\\f$.\n  @param[in]\n  bsr_row_ptr_A   array of \\p mb+1 elements (\\f$op(A) == A\\f$, \\p kb+1 otherwise)\n                  that point to the start of every block row of the sparse BSR matrix\n                  \\f$op(A)\\f$.\n  @param[in]\n  bsr_col_ind_A   array of \\p nnzb_A elements containing the block column indices of the\n                  sparse BSR matrix \\f$A\\f$.\n  @param[in]\n  descr_B         descriptor of the sparse BSR matrix \\f$B\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnzb_B          number of non-zero block entries of the sparse BSR matrix \\f$B\\f$.\n  @param[in]\n  bsr_row_ptr_B   array of \\p kb+1 elements (\\f$op(B) == B\\f$, \\p mb+1 otherwise)\n                  that point to the start of every block row of the sparse BSR matrix\n                  \\f$op(B)\\f$.\n  @param[in]\n  bsr_col_ind_B   array of \\p nnzb_B elements containing the block column indices of the\n                  sparse BSR matrix \\f$B\\f$.\n  @param[in]\n  beta            scalar \\f$\\beta\\f$.\n  @param[in]\n  descr_D         descriptor of the sparse BSR matrix \\f$D\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnzb_D          number of non-zero block entries of the sparse BSR matrix \\f$D\\f$.\n  @param[in]\n  bsr_row_ptr_D   array of \\p mb+1 elements that point to the start of every block row of the\n                  sparse BSR matrix \\f$D\\f$.\n  @param[in]\n  bsr_col_ind_D   array of \\p nnzb_D elements containing the block column indices of the sparse\n                  BSR matrix \\f$D\\f$.\n  @param[inout]\n  info_C          structure that holds meta data for the sparse BSR matrix \\f$C\\f$.\n  @param[out]\n  buffer_size     number of bytes of the temporary storage buffer required by\n                  rocsparse_bsrgemm_nnzb(), rocsparse_sbsrgemm(), rocsparse_dbsrgemm(),\n                  rocsparse_cbsrgemm() and rocsparse_zbsrgemm().\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_size \\p mb, \\p nb, \\p kb, \\p block_dim, \\p nnzb_A, \\p nnzb_B or\n          \\p nnzb_D is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p alpha and \\p beta are invalid,\n          \\p descr_A, \\p bsr_row_ptr_A, \\p bsr_col_ind_A, \\p descr_B,\n          \\p bsr_row_ptr_B or \\p bsr_col_ind_B are invalid if \\p alpha is valid,\n          \\p descr_D, \\p bsr_row_ptr_D or \\p bsr_col_ind_D is invalid if \\p beta is\n          valid, \\p info_C or \\p buffer_size is invalid.\n  \\retval rocsparse_status_not_implemented\n          \\p trans_A != \\ref rocsparse_operation_none,\n          \\p trans_B != \\ref rocsparse_operation_none, or\n          \\p rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
     pub fn rocsparse_sbsrgemm_buffer_size(
         handle: rocsparse_handle,
         dir: rocsparse_direction,
@@ -5667,7 +4502,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup extra_module\n  \\brief Sparse matrix sparse matrix multiplication using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrgemm_nnzb computes the total BSR non-zero block elements and the BSR block row\n  offsets, that point to the start of every block row of the sparse BSR matrix, of the\n  resulting multiplied matrix C. It is assumed that \\p bsr_row_ptr_C has been allocated\n  with size \\p mb+1.\n  The required buffer size can be obtained by rocsparse_sbsrgemm_buffer_size(),\n  rocsparse_dbsrgemm_buffer_size(), rocsparse_cbsrgemm_buffer_size() and\n  rocsparse_zbsrgemm_buffer_size(), respectively.\n\n  \\note\n  This function is blocking with respect to the host.\n  \\note\n  Currently, only \\p trans_A == \\p trans_B == \\ref rocsparse_operation_none is\n  supported.\n  \\note\n  Currently, only \\ref rocsparse_matrix_type_general is supported.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  dir             direction that specifies whether to count nonzero elements by \\ref rocsparse_direction_row or by\n                  \\ref rocsparse_direction_row in the BSR matrices \\f$A\\f$, \\f$B\\f$, \\f$C\\f$, and \\f$D\\f$.\n  @param[in]\n  trans_A         matrix \\f$A\\f$ operation type.\n  @param[in]\n  trans_B         matrix \\f$B\\f$ operation type.\n  @param[in]\n  mb              number of block rows in the sparse BSR matrix \\f$op(A)\\f$ and \\f$C\\f$.\n  @param[in]\n  nb              number of block columns of the sparse BSR matrix \\f$op(B)\\f$ and\n                  \\f$C\\f$.\n  @param[in]\n  kb              number of block columns of the sparse BSR matrix \\f$op(A)\\f$ and number of\n                  rows of the sparse BSR matrix \\f$op(B)\\f$.\n  @param[in]\n  block_dim       the block dimension of the BSR matrix \\f$A\\f$, \\f$B\\f$, \\f$C\\f$, and \\f$D\\f$.\n  @param[in]\n  descr_A         descriptor of the sparse BSR matrix \\f$A\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnzb_A          number of non-zero block entries of the sparse BSR matrix \\f$A\\f$.\n  @param[in]\n  bsr_row_ptr_A   array of \\p mb+1 block elements (\\f$op(A) == A\\f$, \\p kb+1 otherwise)\n                  that point to the start of every row of the sparse BSR matrix\n                  \\f$op(A)\\f$.\n  @param[in]\n  bsr_col_ind_A   array of \\p nnzb_A block elements containing the block column indices of the\n                  sparse BSR matrix \\f$A\\f$.\n  @param[in]\n  descr_B         descriptor of the sparse BSR matrix \\f$B\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnzb_B          number of non-zero block entries of the sparse BSR matrix \\f$B\\f$.\n  @param[in]\n  bsr_row_ptr_B   array of \\p kb+1 block elements (\\f$op(B) == B\\f$, \\p mb+1 otherwise)\n                  that point to the start of every block row of the sparse BSR matrix\n                  \\f$op(B)\\f$.\n  @param[in]\n  bsr_col_ind_B   array of \\p nnzb_B block elements containing the block column indices of the\n                  sparse BSR matrix \\f$B\\f$.\n  @param[in]\n  descr_D         descriptor of the sparse BSR matrix \\f$D\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnzb_D          number of non-zero block entries of the sparse BSR matrix \\f$D\\f$.\n  @param[in]\n  bsr_row_ptr_D   array of \\p mb+1 block elements that point to the start of every block row of the\n                  sparse BSR matrix \\f$D\\f$.\n  @param[in]\n  bsr_col_ind_D   array of \\p nnzb_D block elements containing the block column indices of the sparse\n                  BSR matrix \\f$D\\f$.\n  @param[in]\n  descr_C         descriptor of the sparse BSR matrix \\f$C\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  bsr_row_ptr_C   array of \\p mb+1 block elements that point to the start of every block row of the\n                  sparse BSR matrix \\f$C\\f$.\n  @param[out]\n  nnzb_C          pointer to the number of non-zero block entries of the sparse BSR\n                  matrix \\f$C\\f$.\n  @param[in]\n  info_C          structure that holds meta data for the sparse BSR matrix \\f$C\\f$.\n  @param[in]\n  temp_buffer     temporary storage buffer allocated by the user, size is returned\n                  by rocsparse_sbsrgemm_buffer_size(),\n                  rocsparse_dbsrgemm_buffer_size(), rocsparse_cbsrgemm_buffer_size() or\n                  rocsparse_zbsrgemm_buffer_size().\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_size \\p mb, \\p nb, \\p kb, \\p block_dim, \\p nnzb_A, \\p nnzb_B or\n          \\p nnzb_D is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p descr_A, \\p bsr_row_ptr_A,\n          \\p bsr_col_ind_A, \\p descr_B, \\p bsr_row_ptr_B, \\p bsr_col_ind_B,\n          \\p descr_D, \\p bsr_row_ptr_D, \\p bsr_col_ind_D, \\p descr_C,\n          \\p bsr_row_ptr_C, \\p nnzb_C, \\p info_C or \\p temp_buffer is invalid.\n  \\retval rocsparse_status_memory_error additional buffer for long rows could not be\n          allocated.\n  \\retval rocsparse_status_not_implemented\n          \\p trans_A != \\ref rocsparse_operation_none,\n          \\p trans_B != \\ref rocsparse_operation_none, or\n          \\p rocsparse_matrix_type != \\ref rocsparse_matrix_type_general."]
+    #[doc = " \\ingroup extra_module\n  \\brief Sparse matrix sparse matrix multiplication using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrgemm_nnzb computes the total BSR non-zero block elements and the BSR block row\n  offsets, that point to the start of every block row of the sparse BSR matrix, of the\n  resulting multiplied matrix C. It is assumed that \\p bsr_row_ptr_C has been allocated\n  with size \\p mb+1.\n  The required buffer size can be obtained by rocsparse_sbsrgemm_buffer_size(),\n  rocsparse_dbsrgemm_buffer_size(), rocsparse_cbsrgemm_buffer_size() and\n  rocsparse_zbsrgemm_buffer_size(), respectively.\n\n  \\note\n  This function is blocking with respect to the host.\n  \\note\n  Currently, only \\p trans_A == \\p trans_B == \\ref rocsparse_operation_none is\n  supported.\n  \\note\n  Currently, only \\ref rocsparse_matrix_type_general is supported.\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  dir             direction that specifies whether to count nonzero elements by \\ref rocsparse_direction_row or by\n                  \\ref rocsparse_direction_row in the BSR matrices \\f$A\\f$, \\f$B\\f$, \\f$C\\f$, and \\f$D\\f$.\n  @param[in]\n  trans_A         matrix \\f$A\\f$ operation type.\n  @param[in]\n  trans_B         matrix \\f$B\\f$ operation type.\n  @param[in]\n  mb              number of block rows in the sparse BSR matrix \\f$op(A)\\f$ and \\f$C\\f$.\n  @param[in]\n  nb              number of block columns of the sparse BSR matrix \\f$op(B)\\f$ and\n                  \\f$C\\f$.\n  @param[in]\n  kb              number of block columns of the sparse BSR matrix \\f$op(A)\\f$ and number of\n                  rows of the sparse BSR matrix \\f$op(B)\\f$.\n  @param[in]\n  block_dim       the block dimension of the BSR matrix \\f$A\\f$, \\f$B\\f$, \\f$C\\f$, and \\f$D\\f$.\n  @param[in]\n  descr_A         descriptor of the sparse BSR matrix \\f$A\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnzb_A          number of non-zero block entries of the sparse BSR matrix \\f$A\\f$.\n  @param[in]\n  bsr_row_ptr_A   array of \\p mb+1 block elements (\\f$op(A) == A\\f$, \\p kb+1 otherwise)\n                  that point to the start of every row of the sparse BSR matrix\n                  \\f$op(A)\\f$.\n  @param[in]\n  bsr_col_ind_A   array of \\p nnzb_A block elements containing the block column indices of the\n                  sparse BSR matrix \\f$A\\f$.\n  @param[in]\n  descr_B         descriptor of the sparse BSR matrix \\f$B\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnzb_B          number of non-zero block entries of the sparse BSR matrix \\f$B\\f$.\n  @param[in]\n  bsr_row_ptr_B   array of \\p kb+1 block elements (\\f$op(B) == B\\f$, \\p mb+1 otherwise)\n                  that point to the start of every block row of the sparse BSR matrix\n                  \\f$op(B)\\f$.\n  @param[in]\n  bsr_col_ind_B   array of \\p nnzb_B block elements containing the block column indices of the\n                  sparse BSR matrix \\f$B\\f$.\n  @param[in]\n  descr_D         descriptor of the sparse BSR matrix \\f$D\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnzb_D          number of non-zero block entries of the sparse BSR matrix \\f$D\\f$.\n  @param[in]\n  bsr_row_ptr_D   array of \\p mb+1 block elements that point to the start of every block row of the\n                  sparse BSR matrix \\f$D\\f$.\n  @param[in]\n  bsr_col_ind_D   array of \\p nnzb_D block elements containing the block column indices of the sparse\n                  BSR matrix \\f$D\\f$.\n  @param[in]\n  descr_C         descriptor of the sparse BSR matrix \\f$C\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  bsr_row_ptr_C   array of \\p mb+1 block elements that point to the start of every block row of the\n                  sparse BSR matrix \\f$C\\f$.\n  @param[out]\n  nnzb_C          pointer to the number of non-zero block entries of the sparse BSR\n                  matrix \\f$C\\f$.\n  @param[in]\n  info_C          structure that holds meta data for the sparse BSR matrix \\f$C\\f$.\n  @param[in]\n  temp_buffer     temporary storage buffer allocated by the user, size is returned\n                  by rocsparse_sbsrgemm_buffer_size(),\n                  rocsparse_dbsrgemm_buffer_size(), rocsparse_cbsrgemm_buffer_size() or\n                  rocsparse_zbsrgemm_buffer_size().\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_size \\p mb, \\p nb, \\p kb, \\p block_dim, \\p nnzb_A, \\p nnzb_B or\n          \\p nnzb_D is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p descr_A, \\p bsr_row_ptr_A,\n          \\p bsr_col_ind_A, \\p descr_B, \\p bsr_row_ptr_B, \\p bsr_col_ind_B,\n          \\p descr_D, \\p bsr_row_ptr_D, \\p bsr_col_ind_D, \\p descr_C,\n          \\p bsr_row_ptr_C, \\p nnzb_C, \\p info_C or \\p temp_buffer is invalid.\n  \\retval rocsparse_status_memory_error additional buffer for long rows could not be\n          allocated.\n  \\retval rocsparse_status_not_implemented\n          \\p trans_A != \\ref rocsparse_operation_none,\n          \\p trans_B != \\ref rocsparse_operation_none, or\n          \\p rocsparse_matrix_type != \\ref rocsparse_matrix_type_general."]
     pub fn rocsparse_bsrgemm_nnzb(
         handle: rocsparse_handle,
         dir: rocsparse_direction,
@@ -5843,7 +4678,124 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup extra_module\n  \\brief Sparse matrix sparse matrix multiplication using CSR storage format\n\n  \\details\n  \\p rocsparse_csrgemm_buffer_size returns the size of the temporary storage buffer\n  that is required by rocsparse_csrgemm_nnz(), rocsparse_scsrgemm(),\n  rocsparse_dcsrgemm(), rocsparse_ccsrgemm() and rocsparse_zcsrgemm(). The temporary\n  storage buffer must be allocated by the user.\n\n  \\note\n  Please note, that for matrix products with more than 4096 non-zero entries per row,\n  additional temporary storage buffer is allocated by the algorithm.\n  \\note\n  Please note, that for matrix products with more than 8192 intermediate products per\n  row, additional temporary storage buffer is allocated by the algorithm.\n  \\note\n  Currently, only \\p trans_A == \\p trans_B == \\ref rocsparse_operation_none is\n  supported.\n  \\note\n  Currently, only \\ref rocsparse_matrix_type_general is supported.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  trans_A         matrix \\f$A\\f$ operation type.\n  @param[in]\n  trans_B         matrix \\f$B\\f$ operation type.\n  @param[in]\n  m               number of rows of the sparse CSR matrix \\f$op(A)\\f$ and \\f$C\\f$.\n  @param[in]\n  n               number of columns of the sparse CSR matrix \\f$op(B)\\f$ and\n                  \\f$C\\f$.\n  @param[in]\n  k               number of columns of the sparse CSR matrix \\f$op(A)\\f$ and number of\n                  rows of the sparse CSR matrix \\f$op(B)\\f$.\n  @param[in]\n  alpha           scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr_A         descriptor of the sparse CSR matrix \\f$A\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnz_A           number of non-zero entries of the sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  csr_row_ptr_A   array of \\p m+1 elements (\\f$op(A) == A\\f$, \\p k+1 otherwise)\n                  that point to the start of every row of the sparse CSR matrix\n                  \\f$op(A)\\f$.\n  @param[in]\n  csr_col_ind_A   array of \\p nnz_A elements containing the column indices of the\n                  sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  descr_B         descriptor of the sparse CSR matrix \\f$B\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnz_B           number of non-zero entries of the sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  csr_row_ptr_B   array of \\p k+1 elements (\\f$op(B) == B\\f$, \\p m+1 otherwise)\n                  that point to the start of every row of the sparse CSR matrix\n                  \\f$op(B)\\f$.\n  @param[in]\n  csr_col_ind_B   array of \\p nnz_B elements containing the column indices of the\n                  sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  beta            scalar \\f$\\beta\\f$.\n  @param[in]\n  descr_D         descriptor of the sparse CSR matrix \\f$D\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnz_D           number of non-zero entries of the sparse CSR matrix \\f$D\\f$.\n  @param[in]\n  csr_row_ptr_D   array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix \\f$D\\f$.\n  @param[in]\n  csr_col_ind_D   array of \\p nnz_D elements containing the column indices of the sparse\n                  CSR matrix \\f$D\\f$.\n  @param[inout]\n  info_C          structure that holds meta data for the sparse CSR matrix \\f$C\\f$.\n  @param[out]\n  buffer_size     number of bytes of the temporary storage buffer required by\n                  rocsparse_csrgemm_nnz(), rocsparse_scsrgemm(), rocsparse_dcsrgemm(),\n                  rocsparse_ccsrgemm() and rocsparse_zcsrgemm().\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_size \\p m, \\p n, \\p k, \\p nnz_A, \\p nnz_B or\n          \\p nnz_D is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p alpha and \\p beta are invalid,\n          \\p descr_A, \\p csr_row_ptr_A, \\p csr_col_ind_A, \\p descr_B,\n          \\p csr_row_ptr_B or \\p csr_col_ind_B are invalid if \\p alpha is valid,\n          \\p descr_D, \\p csr_row_ptr_D or \\p csr_col_ind_D is invalid if \\p beta is\n          valid, \\p info_C or \\p buffer_size is invalid.\n  \\retval rocsparse_status_not_implemented\n          \\p trans_A != \\ref rocsparse_operation_none,\n          \\p trans_B != \\ref rocsparse_operation_none, or\n          \\p rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
+    #[doc = " \\ingroup extra_module\n  \\brief Sparse matrix sparse matrix addition using CSR storage format\n\n  \\details\n  \\p rocsparse_csrgeam_nnz computes the total CSR non-zero elements and the CSR row\n  offsets, that point to the start of every row of the sparse CSR matrix, of the\n  resulting matrix C. It is assumed that \\p csr_row_ptr_C has been allocated with\n  size \\p m+1.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  Currently, only \\ref rocsparse_matrix_type_general is supported.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  m               number of rows of the sparse CSR matrix \\f$A\\f$, \\f$B\\f$ and \\f$C\\f$.\n  @param[in]\n  n               number of columns of the sparse CSR matrix \\f$A\\f$, \\f$B\\f$ and \\f$C\\f$.\n  @param[in]\n  descr_A         descriptor of the sparse CSR matrix \\f$A\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnz_A           number of non-zero entries of the sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  csr_row_ptr_A   array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  csr_col_ind_A   array of \\p nnz_A elements containing the column indices of the\n                  sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  descr_B         descriptor of the sparse CSR matrix \\f$B\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnz_B           number of non-zero entries of the sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  csr_row_ptr_B   array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  csr_col_ind_B   array of \\p nnz_B elements containing the column indices of the\n                  sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  descr_C         descriptor of the sparse CSR matrix \\f$C\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  csr_row_ptr_C   array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix \\f$C\\f$.\n  @param[out]\n  nnz_C           pointer to the number of non-zero entries of the sparse CSR\n                  matrix \\f$C\\f$. \\p nnz_C can be a host or device pointer.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_size \\p m, \\p n, \\p nnz_A or \\p nnz_B is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p descr_A, \\p csr_row_ptr_A,\n          \\p csr_col_ind_A, \\p descr_B, \\p csr_row_ptr_B, \\p csr_col_ind_B,\n          \\p descr_C, \\p csr_row_ptr_C or \\p nnz_C is invalid.\n  \\retval rocsparse_status_not_implemented\n          \\p rocsparse_matrix_type != \\ref rocsparse_matrix_type_general."]
+    pub fn rocsparse_csrgeam_nnz(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        descr_A: rocsparse_mat_descr,
+        nnz_A: rocsparse_int,
+        csr_row_ptr_A: *const rocsparse_int,
+        csr_col_ind_A: *const rocsparse_int,
+        descr_B: rocsparse_mat_descr,
+        nnz_B: rocsparse_int,
+        csr_row_ptr_B: *const rocsparse_int,
+        csr_col_ind_B: *const rocsparse_int,
+        descr_C: rocsparse_mat_descr,
+        csr_row_ptr_C: *mut rocsparse_int,
+        nnz_C: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup extra_module\n  \\brief Sparse matrix sparse matrix addition using CSR storage format\n\n  \\details\n  \\p rocsparse_csrgeam multiplies the scalar \\f$\\alpha\\f$ with the sparse\n  \\f$m \\times n\\f$ matrix \\f$A\\f$, defined in CSR storage format, multiplies the\n  scalar \\f$\\beta\\f$ with the sparse \\f$m \\times n\\f$ matrix \\f$B\\f$, defined in CSR\n  storage format, and adds both resulting matrices to obtain the sparse\n  \\f$m \\times n\\f$ matrix \\f$C\\f$, defined in CSR storage format, such that\n  \\f[\n    C := \\alpha \\cdot A + \\beta \\cdot B.\n  \\f]\n\n  It is assumed that \\p csr_row_ptr_C has already been filled and that \\p csr_val_C and\n  \\p csr_col_ind_C are allocated by the user. \\p csr_row_ptr_C and allocation size of\n  \\p csr_col_ind_C and \\p csr_val_C is defined by the number of non-zero elements of\n  the sparse CSR matrix C. Both can be obtained by rocsparse_csrgeam_nnz().\n\n  \\note Both scalars \\f$\\alpha\\f$ and \\f$beta\\f$ have to be valid.\n\n  \\note Currently, only \\ref rocsparse_matrix_type_general is supported.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  m               number of rows of the sparse CSR matrix \\f$A\\f$, \\f$B\\f$ and \\f$C\\f$.\n  @param[in]\n  n               number of columns of the sparse CSR matrix \\f$A\\f$, \\f$B\\f$ and \\f$C\\f$.\n  @param[in]\n  alpha           scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr_A         descriptor of the sparse CSR matrix \\f$A\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnz_A           number of non-zero entries of the sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  csr_val_A       array of \\p nnz_A elements of the sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  csr_row_ptr_A   array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  csr_col_ind_A   array of \\p nnz_A elements containing the column indices of the\n                  sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  beta            scalar \\f$\\beta\\f$.\n  @param[in]\n  descr_B         descriptor of the sparse CSR matrix \\f$B\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnz_B           number of non-zero entries of the sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  csr_val_B       array of \\p nnz_B elements of the sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  csr_row_ptr_B   array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  csr_col_ind_B   array of \\p nnz_B elements containing the column indices of the\n                  sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  descr_C         descriptor of the sparse CSR matrix \\f$C\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  csr_val_C       array of elements of the sparse CSR matrix \\f$C\\f$.\n  @param[in]\n  csr_row_ptr_C   array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix \\f$C\\f$.\n  @param[out]\n  csr_col_ind_C   array of elements containing the column indices of the\n                  sparse CSR matrix \\f$C\\f$.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_size \\p m, \\p n, \\p nnz_A or \\p nnz_B is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p alpha, \\p descr_A, \\p csr_val_A,\n          \\p csr_row_ptr_A, \\p csr_col_ind_A, \\p beta, \\p descr_B, \\p csr_val_B,\n          \\p csr_row_ptr_B, \\p csr_col_ind_B, \\p descr_C, \\p csr_val_C,\n          \\p csr_row_ptr_C or \\p csr_col_ind_C is invalid.\n  \\retval rocsparse_status_not_implemented\n          \\p rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  This example adds two CSR matrices.\n  \\code{.c}\n  // Initialize scalar multipliers\n  float alpha = 1.0f;\n  float beta  = 1.0f;\n\n  // Create matrix descriptors\n  rocsparse_mat_descr descr_A;\n  rocsparse_mat_descr descr_B;\n  rocsparse_mat_descr descr_C;\n\n  rocsparse_create_mat_descr(&descr_A);\n  rocsparse_create_mat_descr(&descr_B);\n  rocsparse_create_mat_descr(&descr_C);\n\n  // Set pointer mode\n  rocsparse_set_pointer_mode(handle, rocsparse_pointer_mode_host);\n\n  // Obtain number of total non-zero entries in C and row pointers of C\n  rocsparse_int nnz_C;\n  hipMalloc((void**)&csr_row_ptr_C, sizeof(rocsparse_int) * (m + 1));\n\n  rocsparse_csrgeam_nnz(handle,\n                        m,\n                        n,\n                        descr_A,\n                        nnz_A,\n                        csr_row_ptr_A,\n                        csr_col_ind_A,\n                        descr_B,\n                        nnz_B,\n                        csr_row_ptr_B,\n                        csr_col_ind_B,\n                        descr_C,\n                        csr_row_ptr_C,\n                        &nnz_C);\n\n  // Compute column indices and values of C\n  hipMalloc((void**)&csr_col_ind_C, sizeof(rocsparse_int) * nnz_C);\n  hipMalloc((void**)&csr_val_C, sizeof(float) * nnz_C);\n\n  rocsparse_scsrgeam(handle,\n                     m,\n                     n,\n                     &alpha,\n                     descr_A,\n                     nnz_A,\n                     csr_val_A,\n                     csr_row_ptr_A,\n                     csr_col_ind_A,\n                     &beta,\n                     descr_B,\n                     nnz_B,\n                     csr_val_B,\n                     csr_row_ptr_B,\n                     csr_col_ind_B,\n                     descr_C,\n                     csr_val_C,\n                     csr_row_ptr_C,\n                     csr_col_ind_C);\n  \\endcode\n/\n/**@{"]
+    pub fn rocsparse_scsrgeam(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        alpha: *const f32,
+        descr_A: rocsparse_mat_descr,
+        nnz_A: rocsparse_int,
+        csr_val_A: *const f32,
+        csr_row_ptr_A: *const rocsparse_int,
+        csr_col_ind_A: *const rocsparse_int,
+        beta: *const f32,
+        descr_B: rocsparse_mat_descr,
+        nnz_B: rocsparse_int,
+        csr_val_B: *const f32,
+        csr_row_ptr_B: *const rocsparse_int,
+        csr_col_ind_B: *const rocsparse_int,
+        descr_C: rocsparse_mat_descr,
+        csr_val_C: *mut f32,
+        csr_row_ptr_C: *const rocsparse_int,
+        csr_col_ind_C: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcsrgeam(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        alpha: *const f64,
+        descr_A: rocsparse_mat_descr,
+        nnz_A: rocsparse_int,
+        csr_val_A: *const f64,
+        csr_row_ptr_A: *const rocsparse_int,
+        csr_col_ind_A: *const rocsparse_int,
+        beta: *const f64,
+        descr_B: rocsparse_mat_descr,
+        nnz_B: rocsparse_int,
+        csr_val_B: *const f64,
+        csr_row_ptr_B: *const rocsparse_int,
+        csr_col_ind_B: *const rocsparse_int,
+        descr_C: rocsparse_mat_descr,
+        csr_val_C: *mut f64,
+        csr_row_ptr_C: *const rocsparse_int,
+        csr_col_ind_C: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccsrgeam(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        alpha: *const rocsparse_float_complex,
+        descr_A: rocsparse_mat_descr,
+        nnz_A: rocsparse_int,
+        csr_val_A: *const rocsparse_float_complex,
+        csr_row_ptr_A: *const rocsparse_int,
+        csr_col_ind_A: *const rocsparse_int,
+        beta: *const rocsparse_float_complex,
+        descr_B: rocsparse_mat_descr,
+        nnz_B: rocsparse_int,
+        csr_val_B: *const rocsparse_float_complex,
+        csr_row_ptr_B: *const rocsparse_int,
+        csr_col_ind_B: *const rocsparse_int,
+        descr_C: rocsparse_mat_descr,
+        csr_val_C: *mut rocsparse_float_complex,
+        csr_row_ptr_C: *const rocsparse_int,
+        csr_col_ind_C: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcsrgeam(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        alpha: *const rocsparse_double_complex,
+        descr_A: rocsparse_mat_descr,
+        nnz_A: rocsparse_int,
+        csr_val_A: *const rocsparse_double_complex,
+        csr_row_ptr_A: *const rocsparse_int,
+        csr_col_ind_A: *const rocsparse_int,
+        beta: *const rocsparse_double_complex,
+        descr_B: rocsparse_mat_descr,
+        nnz_B: rocsparse_int,
+        csr_val_B: *const rocsparse_double_complex,
+        csr_row_ptr_B: *const rocsparse_int,
+        csr_col_ind_B: *const rocsparse_int,
+        descr_C: rocsparse_mat_descr,
+        csr_val_C: *mut rocsparse_double_complex,
+        csr_row_ptr_C: *const rocsparse_int,
+        csr_col_ind_C: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup extra_module\n  \\brief Sparse matrix sparse matrix multiplication using CSR storage format\n\n  \\details\n  \\p rocsparse_csrgemm_buffer_size returns the size of the temporary storage buffer\n  that is required by rocsparse_csrgemm_nnz(), rocsparse_scsrgemm(),\n  rocsparse_dcsrgemm(), rocsparse_ccsrgemm() and rocsparse_zcsrgemm(). The temporary\n  storage buffer must be allocated by the user.\n\n  \\note\n  Please note, that for matrix products with more than 4096 non-zero entries per row,\n  additional temporary storage buffer is allocated by the algorithm.\n  \\note\n  Please note, that for matrix products with more than 8192 intermediate products per\n  row, additional temporary storage buffer is allocated by the algorithm.\n  \\note\n  Currently, only \\p trans_A == \\p trans_B == \\ref rocsparse_operation_none is\n  supported.\n  \\note\n  Currently, only \\ref rocsparse_matrix_type_general is supported.\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  trans_A         matrix \\f$A\\f$ operation type.\n  @param[in]\n  trans_B         matrix \\f$B\\f$ operation type.\n  @param[in]\n  m               number of rows of the sparse CSR matrix \\f$op(A)\\f$ and \\f$C\\f$.\n  @param[in]\n  n               number of columns of the sparse CSR matrix \\f$op(B)\\f$ and\n                  \\f$C\\f$.\n  @param[in]\n  k               number of columns of the sparse CSR matrix \\f$op(A)\\f$ and number of\n                  rows of the sparse CSR matrix \\f$op(B)\\f$.\n  @param[in]\n  alpha           scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr_A         descriptor of the sparse CSR matrix \\f$A\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnz_A           number of non-zero entries of the sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  csr_row_ptr_A   array of \\p m+1 elements (\\f$op(A) == A\\f$, \\p k+1 otherwise)\n                  that point to the start of every row of the sparse CSR matrix\n                  \\f$op(A)\\f$.\n  @param[in]\n  csr_col_ind_A   array of \\p nnz_A elements containing the column indices of the\n                  sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  descr_B         descriptor of the sparse CSR matrix \\f$B\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnz_B           number of non-zero entries of the sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  csr_row_ptr_B   array of \\p k+1 elements (\\f$op(B) == B\\f$, \\p m+1 otherwise)\n                  that point to the start of every row of the sparse CSR matrix\n                  \\f$op(B)\\f$.\n  @param[in]\n  csr_col_ind_B   array of \\p nnz_B elements containing the column indices of the\n                  sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  beta            scalar \\f$\\beta\\f$.\n  @param[in]\n  descr_D         descriptor of the sparse CSR matrix \\f$D\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnz_D           number of non-zero entries of the sparse CSR matrix \\f$D\\f$.\n  @param[in]\n  csr_row_ptr_D   array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix \\f$D\\f$.\n  @param[in]\n  csr_col_ind_D   array of \\p nnz_D elements containing the column indices of the sparse\n                  CSR matrix \\f$D\\f$.\n  @param[inout]\n  info_C          structure that holds meta data for the sparse CSR matrix \\f$C\\f$.\n  @param[out]\n  buffer_size     number of bytes of the temporary storage buffer required by\n                  rocsparse_csrgemm_nnz(), rocsparse_scsrgemm(), rocsparse_dcsrgemm(),\n                  rocsparse_ccsrgemm() and rocsparse_zcsrgemm().\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_size \\p m, \\p n, \\p k, \\p nnz_A, \\p nnz_B or\n          \\p nnz_D is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p alpha and \\p beta are invalid,\n          \\p descr_A, \\p csr_row_ptr_A, \\p csr_col_ind_A, \\p descr_B,\n          \\p csr_row_ptr_B or \\p csr_col_ind_B are invalid if \\p alpha is valid,\n          \\p descr_D, \\p csr_row_ptr_D or \\p csr_col_ind_D is invalid if \\p beta is\n          valid, \\p info_C or \\p buffer_size is invalid.\n  \\retval rocsparse_status_not_implemented\n          \\p trans_A != \\ref rocsparse_operation_none,\n          \\p trans_B != \\ref rocsparse_operation_none, or\n          \\p rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
     pub fn rocsparse_scsrgemm_buffer_size(
         handle: rocsparse_handle,
         trans_A: rocsparse_operation,
@@ -5952,7 +4904,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup extra_module\n  \\brief Sparse matrix sparse matrix multiplication using CSR storage format\n\n  \\details\n  \\p rocsparse_csrgemm_nnz computes the total CSR non-zero elements and the CSR row\n  offsets, that point to the start of every row of the sparse CSR matrix, of the\n  resulting multiplied matrix C. It is assumed that \\p csr_row_ptr_C has been allocated\n  with size \\p m+1.\n  The required buffer size can be obtained by rocsparse_scsrgemm_buffer_size(),\n  rocsparse_dcsrgemm_buffer_size(), rocsparse_ccsrgemm_buffer_size() and\n  rocsparse_zcsrgemm_buffer_size(), respectively.\n\n  \\note\n  Please note, that for matrix products with more than 8192 intermediate products per\n  row, additional temporary storage buffer is allocated by the algorithm.\n  \\note\n  This function supports unsorted CSR matrices as input, while output will be sorted.\n  Please note that matrices B and D can only be unsorted up to 8192 intermediate\n  products per row. If this number is exceeded, \\ref rocsparse_status_requires_sorted_storage\n  will be returned.\n  \\note\n  This function is blocking with respect to the host.\n  \\note\n  Currently, only \\p trans_A == \\p trans_B == \\ref rocsparse_operation_none is\n  supported.\n  \\note\n  Currently, only \\ref rocsparse_matrix_type_general is supported.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  trans_A         matrix \\f$A\\f$ operation type.\n  @param[in]\n  trans_B         matrix \\f$B\\f$ operation type.\n  @param[in]\n  m               number of rows of the sparse CSR matrix \\f$op(A)\\f$ and \\f$C\\f$.\n  @param[in]\n  n               number of columns of the sparse CSR matrix \\f$op(B)\\f$ and\n                  \\f$C\\f$.\n  @param[in]\n  k               number of columns of the sparse CSR matrix \\f$op(A)\\f$ and number of\n                  rows of the sparse CSR matrix \\f$op(B)\\f$.\n  @param[in]\n  descr_A         descriptor of the sparse CSR matrix \\f$A\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnz_A           number of non-zero entries of the sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  csr_row_ptr_A   array of \\p m+1 elements (\\f$op(A) == A\\f$, \\p k+1 otherwise)\n                  that point to the start of every row of the sparse CSR matrix\n                  \\f$op(A)\\f$.\n  @param[in]\n  csr_col_ind_A   array of \\p nnz_A elements containing the column indices of the\n                  sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  descr_B         descriptor of the sparse CSR matrix \\f$B\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnz_B           number of non-zero entries of the sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  csr_row_ptr_B   array of \\p k+1 elements (\\f$op(B) == B\\f$, \\p m+1 otherwise)\n                  that point to the start of every row of the sparse CSR matrix\n                  \\f$op(B)\\f$.\n  @param[in]\n  csr_col_ind_B   array of \\p nnz_B elements containing the column indices of the\n                  sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  descr_D         descriptor of the sparse CSR matrix \\f$D\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnz_D           number of non-zero entries of the sparse CSR matrix \\f$D\\f$.\n  @param[in]\n  csr_row_ptr_D   array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix \\f$D\\f$.\n  @param[in]\n  csr_col_ind_D   array of \\p nnz_D elements containing the column indices of the sparse\n                  CSR matrix \\f$D\\f$.\n  @param[in]\n  descr_C         descriptor of the sparse CSR matrix \\f$C\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  csr_row_ptr_C   array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix \\f$C\\f$.\n  @param[out]\n  nnz_C           pointer to the number of non-zero entries of the sparse CSR\n                  matrix \\f$C\\f$.\n  @param[in]\n  info_C          structure that holds meta data for the sparse CSR matrix \\f$C\\f$.\n  @param[in]\n  temp_buffer     temporary storage buffer allocated by the user, size is returned\n                  by rocsparse_scsrgemm_buffer_size(),\n                  rocsparse_dcsrgemm_buffer_size(), rocsparse_ccsrgemm_buffer_size() or\n                  rocsparse_zcsrgemm_buffer_size().\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_size \\p m, \\p n, \\p k, \\p nnz_A, \\p nnz_B or\n          \\p nnz_D is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p descr_A, \\p csr_row_ptr_A,\n          \\p csr_col_ind_A, \\p descr_B, \\p csr_row_ptr_B, \\p csr_col_ind_B,\n          \\p descr_D, \\p csr_row_ptr_D, \\p csr_col_ind_D, \\p descr_C,\n          \\p csr_row_ptr_C, \\p nnz_C, \\p info_C or \\p temp_buffer is invalid.\n  \\retval rocsparse_status_memory_error additional buffer for long rows could not be\n          allocated.\n  \\retval rocsparse_status_not_implemented\n          \\p trans_A != \\ref rocsparse_operation_none,\n          \\p trans_B != \\ref rocsparse_operation_none, or\n          \\p rocsparse_matrix_type != \\ref rocsparse_matrix_type_general."]
+    #[doc = " \\ingroup extra_module\n  \\brief Sparse matrix sparse matrix multiplication using CSR storage format\n\n  \\details\n  \\p rocsparse_csrgemm_nnz computes the total CSR non-zero elements and the CSR row\n  offsets, that point to the start of every row of the sparse CSR matrix, of the\n  resulting multiplied matrix C. It is assumed that \\p csr_row_ptr_C has been allocated\n  with size \\p m+1.\n  The required buffer size can be obtained by rocsparse_scsrgemm_buffer_size(),\n  rocsparse_dcsrgemm_buffer_size(), rocsparse_ccsrgemm_buffer_size() and\n  rocsparse_zcsrgemm_buffer_size(), respectively.\n\n  \\note\n  Please note, that for matrix products with more than 8192 intermediate products per\n  row, additional temporary storage buffer is allocated by the algorithm.\n  \\note\n  This function supports unsorted CSR matrices as input, while output will be sorted.\n  Please note that matrices B and D can only be unsorted up to 8192 intermediate\n  products per row. If this number is exceeded, \\ref rocsparse_status_requires_sorted_storage\n  will be returned.\n  \\note\n  This function is blocking with respect to the host.\n  \\note\n  Currently, only \\p trans_A == \\p trans_B == \\ref rocsparse_operation_none is\n  supported.\n  \\note\n  Currently, only \\ref rocsparse_matrix_type_general is supported.\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  trans_A         matrix \\f$A\\f$ operation type.\n  @param[in]\n  trans_B         matrix \\f$B\\f$ operation type.\n  @param[in]\n  m               number of rows of the sparse CSR matrix \\f$op(A)\\f$ and \\f$C\\f$.\n  @param[in]\n  n               number of columns of the sparse CSR matrix \\f$op(B)\\f$ and\n                  \\f$C\\f$.\n  @param[in]\n  k               number of columns of the sparse CSR matrix \\f$op(A)\\f$ and number of\n                  rows of the sparse CSR matrix \\f$op(B)\\f$.\n  @param[in]\n  descr_A         descriptor of the sparse CSR matrix \\f$A\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnz_A           number of non-zero entries of the sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  csr_row_ptr_A   array of \\p m+1 elements (\\f$op(A) == A\\f$, \\p k+1 otherwise)\n                  that point to the start of every row of the sparse CSR matrix\n                  \\f$op(A)\\f$.\n  @param[in]\n  csr_col_ind_A   array of \\p nnz_A elements containing the column indices of the\n                  sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  descr_B         descriptor of the sparse CSR matrix \\f$B\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnz_B           number of non-zero entries of the sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  csr_row_ptr_B   array of \\p k+1 elements (\\f$op(B) == B\\f$, \\p m+1 otherwise)\n                  that point to the start of every row of the sparse CSR matrix\n                  \\f$op(B)\\f$.\n  @param[in]\n  csr_col_ind_B   array of \\p nnz_B elements containing the column indices of the\n                  sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  descr_D         descriptor of the sparse CSR matrix \\f$D\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  nnz_D           number of non-zero entries of the sparse CSR matrix \\f$D\\f$.\n  @param[in]\n  csr_row_ptr_D   array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix \\f$D\\f$.\n  @param[in]\n  csr_col_ind_D   array of \\p nnz_D elements containing the column indices of the sparse\n                  CSR matrix \\f$D\\f$.\n  @param[in]\n  descr_C         descriptor of the sparse CSR matrix \\f$C\\f$. Currenty, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  csr_row_ptr_C   array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix \\f$C\\f$.\n  @param[out]\n  nnz_C           pointer to the number of non-zero entries of the sparse CSR\n                  matrix \\f$C\\f$.\n  @param[in]\n  info_C          structure that holds meta data for the sparse CSR matrix \\f$C\\f$.\n  @param[in]\n  temp_buffer     temporary storage buffer allocated by the user, size is returned\n                  by rocsparse_scsrgemm_buffer_size(),\n                  rocsparse_dcsrgemm_buffer_size(), rocsparse_ccsrgemm_buffer_size() or\n                  rocsparse_zcsrgemm_buffer_size().\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_size \\p m, \\p n, \\p k, \\p nnz_A, \\p nnz_B or\n          \\p nnz_D is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p descr_A, \\p csr_row_ptr_A,\n          \\p csr_col_ind_A, \\p descr_B, \\p csr_row_ptr_B, \\p csr_col_ind_B,\n          \\p descr_D, \\p csr_row_ptr_D, \\p csr_col_ind_D, \\p descr_C,\n          \\p csr_row_ptr_C, \\p nnz_C, \\p info_C or \\p temp_buffer is invalid.\n  \\retval rocsparse_status_memory_error additional buffer for long rows could not be\n          allocated.\n  \\retval rocsparse_status_not_implemented\n          \\p trans_A != \\ref rocsparse_operation_none,\n          \\p trans_B != \\ref rocsparse_operation_none, or\n          \\p rocsparse_matrix_type != \\ref rocsparse_matrix_type_general."]
     pub fn rocsparse_csrgemm_nnz(
         handle: rocsparse_handle,
         trans_A: rocsparse_operation,
@@ -6287,6 +5239,3238 @@ extern "C" {
         temp_buffer: *mut ::std::os::raw::c_void,
     ) -> rocsparse_status;
 }
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup generic_module\n  \\brief Scale a sparse vector and add it to a scaled dense vector.\n\n  \\details\n  \\ref rocsparse_axpby multiplies the sparse vector \\f$x\\f$ with scalar \\f$\\alpha\\f$ and\n  adds the result to the dense vector \\f$y\\f$ that is multiplied with scalar\n  \\f$\\beta\\f$, such that\n\n  \\f[\n      y := \\alpha \\cdot x + \\beta \\cdot y\n  \\f]\n\n  \\code{.c}\n      for(i = 0; i < nnz; ++i)\n      {\n          y[x_ind[i]] = alpha * x_val[i] + beta * y[x_ind[i]]\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  x           sparse matrix descriptor.\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  y           dense matrix descriptor.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_pointer \\p alpha, \\p x, \\p beta or \\p y pointer is\n          invalid.\n\n  \\par Example\n  \\code{.c}\n   // Number of non-zeros of the sparse vector\n   int nnz = 3;\n\n   // Size of sparse and dense vector\n   int size = 9;\n\n   // Sparse index vector\n   std::vector<int> hx_ind = {0, 3, 5};\n\n   // Sparse value vector\n   std::vector<float> hx_val = {1.0f, 2.0f, 3.0f};\n\n   // Dense vector\n   std::vector<float> hy = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f};\n\n   // Scalar alpha\n   float alpha = 3.7f;\n\n   // Scalar beta\n   float beta = 1.2f;\n\n   // Offload data to device\n   int* dx_ind;\n   float* dx_val;\n   float* dy;\n   hipMalloc((void**)&dx_ind, sizeof(int) * nnz);\n   hipMalloc((void**)&dx_val, sizeof(float) * nnz);\n   hipMalloc((void**)&dy, sizeof(float) * size);\n\n   hipMemcpy(dx_ind, hx_ind.data(), sizeof(int) * nnz, hipMemcpyHostToDevice);\n   hipMemcpy(dx_val, hx_val.data(), sizeof(float) * nnz, hipMemcpyHostToDevice);\n   hipMemcpy(dy, hy.data(), sizeof(float) * size, hipMemcpyHostToDevice);\n\n   rocsparse_handle     handle;\n   rocsparse_spvec_descr vecX;\n   rocsparse_dnvec_descr vecY;\n\n   rocsparse_indextype idx_type = rocsparse_indextype_i32;\n   rocsparse_datatype  data_type = rocsparse_datatype_f32_r;\n   rocsparse_index_base idx_base = rocsparse_index_base_zero;\n\n   rocsparse_create_handle(&handle);\n\n   // Create sparse vector X\n   rocsparse_create_spvec_descr(&vecX,\n                                size,\n                                nnz,\n                                dx_ind,\n                                dx_val,\n                                idx_type,\n                                idx_base,\n                                data_type);\n\n   // Create dense vector Y\n   rocsparse_create_dnvec_descr(&vecY,\n                                size,\n                                dy,\n                                data_type);\n\n   // Call axpby to perform y = beta * y + alpha * x\n   rocsparse_axpby(handle,\n                   &alpha,\n                   vecX,\n                   &beta,\n                   vecY);\n\n   rocsparse_dnvec_get_values(vecY, (void**)&dy);\n\n   // Copy result back to host\n   hipMemcpy(hy.data(), dy, sizeof(float) * size, hipMemcpyDeviceToHost);\n\n   std::cout << \"y\" << std::endl;\n   for(size_t i = 0; i < hy.size(); ++i)\n   {\n       std::cout << hy[i] << \" \";\n   }\n   std::cout << std::endl;\n\n   // Clear rocSPARSE\n   rocsparse_destroy_spvec_descr(vecX);\n   rocsparse_destroy_dnvec_descr(vecY);\n   rocsparse_destroy_handle(handle);\n\n   // Clear device memory\n   hipFree(dx_ind);\n   hipFree(dx_val);\n   hipFree(dy);\n  \\endcode"]
+    pub fn rocsparse_axpby(
+        handle: rocsparse_handle,
+        alpha: *const ::std::os::raw::c_void,
+        x: rocsparse_const_spvec_descr,
+        beta: *const ::std::os::raw::c_void,
+        y: rocsparse_dnvec_descr,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup generic_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_spmat checks if the input matrix is valid.\n\n  \\note\n  This function writes the required allocation size (in bytes) to \\p buffer_size and\n  returns without performing the checking operation, when stage is equal to\n  \\ref rocsparse_check_spmat_stage_buffer_size.\n\n  \\note\n  The sparse matrix formats currently supported are: rocsparse_format_coo, rocsparse_format_csr,\n  rocsparse_format_csc and rocsparse_format_ell.\n\n  \\note check_spmat requires two stages to complete. The first stage\n  \\ref rocsparse_check_spmat_stage_buffer_size will return the size of the temporary storage buffer\n  that is required for subsequent calls to \\ref rocsparse_check_spmat.\n  In the final stage \\ref rocsparse_check_spmat_stage_compute, the actual computation is performed.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  mat         matrix descriptor.\n  @param[out]\n  data_status modified to indicate the status of the data\n  @param[in]\n  stage       check_matrix stage for the matrix computation.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer. buffer_size is set when\n              \\p temp_buffer is nullptr.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user. When a nullptr is passed,\n              the required allocation size (in bytes) is written to \\p buffer_size and\n              function returns without performing the checking operation.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p mat, \\p buffer_size, \\p temp_buffer or \\p data_status pointer\n              is invalid.\n  \\retval     rocsparse_status_invalid_value the value of stage is incorrect."]
+    pub fn rocsparse_check_spmat(
+        handle: rocsparse_handle,
+        mat: rocsparse_const_spmat_descr,
+        data_status: *mut rocsparse_data_status,
+        stage: rocsparse_check_spmat_stage,
+        buffer_size: *mut usize,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup generic_module\n  \\brief Dense matrix to sparse matrix conversion\n\n  \\details\n  \\p rocsparse_dense_to_sparse\n  \\p rocsparse_dense_to_sparse performs the conversion of a dense matrix to a sparse matrix in CSR, CSC, or COO format.\n\n  \\note\n  This function writes the required allocation size (in bytes) to \\p buffer_size and\n  returns without performing the dense to sparse operation, when a nullptr is passed for\n  \\p temp_buffer.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  mat_A        dense matrix descriptor.\n  @param[in]\n  mat_B        sparse matrix descriptor.\n  @param[in]\n  alg          algorithm for the sparse to dense computation.\n  @param[out]\n  buffer_size  number of bytes of the temporary storage buffer. buffer_size is set when\n               \\p temp_buffer is nullptr.\n  @param[in]\n  temp_buffer  temporary storage buffer allocated by the user. When a nullptr is passed,\n               the required allocation size (in bytes) is written to \\p buffer_size and\n               function returns without performing the dense to sparse operation.\n\n  \\retval      rocsparse_status_success the operation completed successfully.\n  \\retval      rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval      rocsparse_status_invalid_pointer \\p mat_A, \\p mat_B, or \\p buffer_size\n               pointer is invalid.\n\n  \\par Example\n  \\code{.c}\n   //     1 4 0 0 0 0\n   // A = 0 2 3 0 0 0\n   //     5 0 0 7 8 0\n   //     0 0 9 0 6 0\n   rocsparse_int m   = 4;\n   rocsparse_int n   = 6;\n\n   std::vector<float> hdense = {1, 0, 5, 0, 4, 2, 0, 0, 0, 3, 0, 9, 0, 0, 7, 0, 0, 0, 8, 6, 0, 0, 0, 0};\n\n   // Offload data to device\n   int* dcsr_row_ptr;\n   float* ddense;\n   hipMalloc((void**)&dcsr_row_ptr, sizeof(int) * (m + 1));\n   hipMalloc((void**)&ddense, sizeof(float) * m * n);\n\n   hipMemcpy(ddense, hdense.data(), sizeof(float) * m * n, hipMemcpyHostToDevice);\n\n   rocsparse_handle     handle;\n   rocsparse_dnmat_descr matA;\n   rocsparse_spmat_descr matB;\n\n   rocsparse_indextype row_idx_type = rocsparse_indextype_i32;\n   rocsparse_indextype col_idx_type = rocsparse_indextype_i32;\n   rocsparse_datatype  data_type = rocsparse_datatype_f32_r;\n   rocsparse_index_base idx_base = rocsparse_index_base_zero;\n\n   rocsparse_create_handle(&handle);\n\n   // Create sparse matrix A\n   rocsparse_create_dnmat_descr(&matA, m, n, m, ddense, data_type, rocsparse_order_column);\n\n   // Create dense matrix B\n   rocsparse_create_csr_descr(&matB,\n                              m,\n                              n,\n                              0,\n                              dcsr_row_ptr,\n                              nullptr,\n                              nullptr,\n                              row_idx_type,\n                              col_idx_type,\n                              idx_base,\n                              data_type);\n\n   // Call dense_to_sparse to get required buffer size\n   size_t buffer_size = 0;\n   rocsparse_dense_to_sparse(handle,\n                             matA,\n                             matB,\n                             rocsparse_dense_to_sparse_alg_default,\n                             &buffer_size,\n                             nullptr);\n\n   void* temp_buffer;\n   hipMalloc((void**)&temp_buffer, buffer_size);\n\n   // Call dense_to_sparse to perform analysis\n   rocsparse_dense_to_sparse(handle,\n                             matA,\n                             matB,\n                             rocsparse_dense_to_sparse_alg_default,\n                             nullptr,\n                             temp_buffer);\n\n   int64_t num_rows_tmp, num_cols_tmp, nnz;\n   rocsparse_spmat_get_size(matB, &num_rows_tmp, &num_cols_tmp, &nnz);\n\n   int* dcsr_col_ind;\n   float* dcsr_val;\n   hipMalloc((void**)&dcsr_col_ind, sizeof(int) * nnz);\n   hipMalloc((void**)&dcsr_val, sizeof(float) * nnz);\n\n   rocsparse_csr_set_pointers(matB, dcsr_row_ptr, dcsr_col_ind, dcsr_val);\n\n   // Call dense_to_sparse to complete conversion\n   rocsparse_dense_to_sparse(handle,\n                             matA,\n                             matB,\n                             rocsparse_dense_to_sparse_alg_default,\n                             &buffer_size,\n                             temp_buffer);\n\n   std::vector<int> hcsr_row_ptr(m + 1, 0);\n   std::vector<int> hcsr_col_ind(nnz, 0);\n   std::vector<float> hcsr_val(nnz, 0);\n\n   // Copy result back to host\n   hipMemcpy(hcsr_row_ptr.data(), dcsr_row_ptr, sizeof(int) * (m + 1), hipMemcpyDeviceToHost);\n   hipMemcpy(hcsr_col_ind.data(), dcsr_col_ind, sizeof(int) * nnz, hipMemcpyDeviceToHost);\n   hipMemcpy(hcsr_val.data(), dcsr_val, sizeof(int) * nnz, hipMemcpyDeviceToHost);\n\n   std::cout << \"hcsr_row_ptr\" << std::endl;\n   for(size_t i = 0; i < hcsr_row_ptr.size(); ++i)\n   {\n       std::cout << hcsr_row_ptr[i] << \" \";\n   }\n   std::cout << std::endl;\n\n   std::cout << \"hcsr_col_ind\" << std::endl;\n   for(size_t i = 0; i < hcsr_col_ind.size(); ++i)\n   {\n       std::cout << hcsr_col_ind[i] << \" \";\n   }\n   std::cout << std::endl;\n\n   std::cout << \"hcsr_val\" << std::endl;\n   for(size_t i = 0; i < hcsr_val.size(); ++i)\n   {\n       std::cout << hcsr_val[i] << \" \";\n   }\n   std::cout << std::endl;\n\n   // Clear rocSPARSE\n   rocsparse_destroy_dnmat_descr(matA);\n   rocsparse_destroy_spmat_descr(matB);\n   rocsparse_destroy_handle(handle);\n\n   // Clear device memory\n   hipFree(dcsr_row_ptr);\n   hipFree(dcsr_col_ind);\n   hipFree(dcsr_val);\n   hipFree(ddense);\n  \\endcode"]
+    pub fn rocsparse_dense_to_sparse(
+        handle: rocsparse_handle,
+        mat_A: rocsparse_const_dnmat_descr,
+        mat_B: rocsparse_spmat_descr,
+        alg: rocsparse_dense_to_sparse_alg,
+        buffer_size: *mut usize,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup generic_module\n  \\brief Gather elements from a dense vector and store them into a sparse vector.\n\n  \\details\n  \\ref rocsparse_gather gathers the elements from the dense vector \\f$y\\f$ and stores\n  them in the sparse vector \\f$x\\f$.\n\n  \\code{.c}\n      for(i = 0; i < nnz; ++i)\n      {\n          x_val[i] = y[x_ind[i]];\n      }\n  \\endcode\n\n  \\par Uniform Precisions:\n  <table>\n  <caption id=\"gather_uniform\">Uniform Precisions</caption>\n  <tr><th>X / Y\n  <tr><td>rocsparse_datatype_i8_r\n  <tr><td>rocsparse_datatype_f32_r\n  <tr><td>rocsparse_datatype_f64_r\n  <tr><td>rocsparse_datatype_f32_c\n  <tr><td>rocsparse_datatype_f64_c\n  </table>\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  y            dense vector \\f$y\\f$.\n  @param[out]\n  x            sparse vector \\f$x\\f$.\n\n  \\retval      rocsparse_status_success the operation completed successfully.\n  \\retval      rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval      rocsparse_status_invalid_pointer \\p x or \\p y pointer is invalid.\n\n  \\par Example\n  \\code{.c}\n   // Number of non-zeros of the sparse vector\n   int nnz = 3;\n\n   // Size of sparse and dense vector\n   int size = 9;\n\n   // Sparse index vector\n   std::vector<int> hx_ind = {0, 3, 5};\n\n   // Dense vector\n   std::vector<float> hy = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f};\n\n   // Offload data to device\n   int* dx_ind;\n   float* dx_val;\n   float* dy;\n   hipMalloc((void**)&dx_ind, sizeof(int) * nnz);\n   hipMalloc((void**)&dx_val, sizeof(float) * nnz);\n   hipMalloc((void**)&dy, sizeof(float) * size);\n\n   hipMemcpy(dx_ind, hx_ind.data(), sizeof(int) * nnz, hipMemcpyHostToDevice);\n   hipMemcpy(dy, hy.data(), sizeof(float) * size, hipMemcpyHostToDevice);\n\n   rocsparse_handle     handle;\n   rocsparse_spvec_descr vecX;\n   rocsparse_dnvec_descr vecY;\n\n   rocsparse_indextype idx_type = rocsparse_indextype_i32;\n   rocsparse_datatype  data_type = rocsparse_datatype_f32_r;\n   rocsparse_index_base idx_base = rocsparse_index_base_zero;\n\n   rocsparse_create_handle(&handle);\n\n   // Create sparse vector X\n   rocsparse_create_spvec_descr(&vecX,\n                                size,\n                                nnz,\n                                dx_ind,\n                                dx_val,\n                                idx_type,\n                                idx_base,\n                                data_type);\n\n   // Create dense vector Y\n   rocsparse_create_dnvec_descr(&vecY,\n                                size,\n                                dy,\n                                data_type);\n\n   // Call axpby to perform gather\n   rocsparse_gather(handle, vecY, vecX);\n\n   rocsparse_spvec_get_values(vecX, (void**)&dx_val);\n\n   // Copy result back to host\n   std::vector<float> hx_val(nnz, 0.0f);\n   hipMemcpy(hx_val.data(), dx_val, sizeof(float) * nnz, hipMemcpyDeviceToHost);\n\n   std::cout << \"x\" << std::endl;\n   for(size_t i = 0; i < hx_val.size(); ++i)\n   {\n       std::cout << hx_val[i] << \" \";\n   }\n\n   std::cout << std::endl;\n\n   // Clear rocSPARSE\n   rocsparse_destroy_spvec_descr(vecX);\n   rocsparse_destroy_dnvec_descr(vecY);\n   rocsparse_destroy_handle(handle);\n\n   // Clear device memory\n   hipFree(dx_ind);\n   hipFree(dx_val);\n   hipFree(dy);\n  \\endcode"]
+    pub fn rocsparse_gather(
+        handle: rocsparse_handle,
+        y: rocsparse_const_dnvec_descr,
+        x: rocsparse_spvec_descr,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup generic_module\n  \\brief Apply Givens rotation to a dense and a sparse vector.\n\n  \\details\n  \\ref rocsparse_rot applies the Givens rotation matrix \\f$G\\f$ to the sparse vector\n  \\f$x\\f$ and the dense vector \\f$y\\f$, where\n  \\f[\n    G = \\begin{pmatrix} c & s \\\\ -s & c \\end{pmatrix}\n  \\f]\n\n  \\code{.c}\n      for(i = 0; i < nnz; ++i)\n      {\n          x_tmp = x_val[i];\n          y_tmp = y[x_ind[i]];\n\n          x_val[i]    = c * x_tmp + s * y_tmp;\n          y[x_ind[i]] = c * y_tmp - s * x_tmp;\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  c           pointer to the cosine element of \\f$G\\f$, can be on host or device.\n  @param[in]\n  s           pointer to the sine element of \\f$G\\f$, can be on host or device.\n  @param[inout]\n  x           sparse vector \\f$x\\f$.\n  @param[inout]\n  y           dense vector \\f$y\\f$.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p c, \\p s, \\p x or \\p y pointer is\n              invalid.\n  \\par Example\n  \\code{.c}\n   // Number of non-zeros of the sparse vector\n   int nnz = 3;\n\n   // Size of sparse and dense vector\n   int size = 9;\n\n   // Sparse index vector\n   std::vector<int> hx_ind = {0, 3, 5};\n\n   // Sparse value vector\n   std::vector<float> hx_val = {1.0f, 2.0f, 3.0f};\n\n   // Dense vector\n   std::vector<float> hy = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f};\n\n   // Scalar c\n   float c = 3.7f;\n\n   // Scalar s\n   float s = 1.2f;\n\n   // Offload data to device\n   int* dx_ind;\n   float* dx_val;\n   float* dy;\n   hipMalloc((void**)&dx_ind, sizeof(int) * nnz);\n   hipMalloc((void**)&dx_val, sizeof(float) * nnz);\n   hipMalloc((void**)&dy, sizeof(float) * size);\n\n   hipMemcpy(dx_ind, hx_ind.data(), sizeof(int) * nnz, hipMemcpyHostToDevice);\n   hipMemcpy(dx_val, hx_val.data(), sizeof(float) * nnz, hipMemcpyHostToDevice);\n   hipMemcpy(dy, hy.data(), sizeof(float) * size, hipMemcpyHostToDevice);\n\n   rocsparse_handle     handle;\n   rocsparse_spvec_descr vecX;\n   rocsparse_dnvec_descr vecY;\n\n   rocsparse_indextype idx_type = rocsparse_indextype_i32;\n   rocsparse_datatype  data_type = rocsparse_datatype_f32_r;\n   rocsparse_index_base idx_base = rocsparse_index_base_zero;\n\n   rocsparse_create_handle(&handle);\n\n   // Create sparse vector X\n   rocsparse_create_spvec_descr(&vecX,\n                                size,\n                                nnz,\n                                dx_ind,\n                                dx_val,\n                                idx_type,\n                                idx_base,\n                                data_type);\n\n   // Create dense vector Y\n   rocsparse_create_dnvec_descr(&vecY,\n                                size,\n                                dy,\n                                data_type);\n\n   // Call rot\n   rocsparse_rot(handle, (void*)&c, (void*)&s, vecX, vecY);\n\n   rocsparse_spvec_get_values(vecX, (void**)&dx_val);\n   rocsparse_dnvec_get_values(vecY, (void**)&dy);\n\n   // Copy result back to host\n   hipMemcpy(hx_val.data(), dx_val, sizeof(float) * nnz, hipMemcpyDeviceToHost);\n   hipMemcpy(hy.data(), dy, sizeof(float) * size, hipMemcpyDeviceToHost);\n\n   std::cout << \"x\" << std::endl;\n   for(size_t i = 0; i < hx_val.size(); ++i)\n   {\n       std::cout << hx_val[i] << \" \";\n   }\n\n   std::cout << std::endl;\n\n   std::cout << \"y\" << std::endl;\n   for(size_t i = 0; i < hy.size(); ++i)\n   {\n       std::cout << hy[i] << \" \";\n   }\n\n   std::cout << std::endl;\n\n   // Clear rocSPARSE\n   rocsparse_destroy_spvec_descr(vecX);\n   rocsparse_destroy_dnvec_descr(vecY);\n   rocsparse_destroy_handle(handle);\n\n   // Clear device memory\n   hipFree(dx_ind);\n   hipFree(dx_val);\n   hipFree(dy);\n  \\endcode"]
+    pub fn rocsparse_rot(
+        handle: rocsparse_handle,
+        c: *const ::std::os::raw::c_void,
+        s: *const ::std::os::raw::c_void,
+        x: rocsparse_spvec_descr,
+        y: rocsparse_dnvec_descr,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup generic_module\n  \\brief Scatter elements from a sparse vector into a dense vector.\n\n  \\details\n  \\ref rocsparse_scatter scatters the elements from the sparse vector \\f$x\\f$ in the dense\n  vector \\f$y\\f$.\n\n  \\code{.c}\n      for(i = 0; i < nnz; ++i)\n      {\n          y[x_ind[i]] = x_val[i];\n      }\n  \\endcode\n\n  \\par Uniform Precisions:\n  <table>\n  <caption id=\"scatter_uniform\">Uniform Precisions</caption>\n  <tr><th>X / Y\n  <tr><td>rocsparse_datatype_i8_r\n  <tr><td>rocsparse_datatype_f32_r\n  <tr><td>rocsparse_datatype_f64_r\n  <tr><td>rocsparse_datatype_f32_c\n  <tr><td>rocsparse_datatype_f64_c\n  </table>\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  x            sparse vector \\f$x\\f$.\n  @param[out]\n  y            dense vector \\f$y\\f$.\n\n  \\retval      rocsparse_status_success the operation completed successfully.\n  \\retval      rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval      rocsparse_status_invalid_pointer \\p x or \\p y pointer is invalid.\n\n  \\par Example\n  \\code{.c}\n   // Number of non-zeros of the sparse vector\n   int nnz = 3;\n\n   // Size of sparse and dense vector\n   int size = 9;\n\n   // Sparse index vector\n   std::vector<int> hx_ind = {0, 3, 5};\n\n   // Sparse value vector\n   std::vector<float> hx_val = {1.0f, 2.0f, 3.0f};\n\n   // Dense vector\n   std::vector<float> hy = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f};\n\n   // Offload data to device\n   int* dx_ind;\n   float* dx_val;\n   float* dy;\n   hipMalloc((void**)&dx_ind, sizeof(int) * nnz);\n   hipMalloc((void**)&dx_val, sizeof(float) * nnz);\n   hipMalloc((void**)&dy, sizeof(float) * size);\n\n   hipMemcpy(dx_ind, hx_ind.data(), sizeof(int) * nnz, hipMemcpyHostToDevice);\n   hipMemcpy(dx_val, hx_val.data(), sizeof(float) * nnz, hipMemcpyHostToDevice);\n   hipMemcpy(dy, hy.data(), sizeof(float) * size, hipMemcpyHostToDevice);\n\n   rocsparse_handle     handle;\n   rocsparse_spvec_descr vecX;\n   rocsparse_dnvec_descr vecY;\n\n   rocsparse_indextype idx_type = rocsparse_indextype_i32;\n   rocsparse_datatype  data_type = rocsparse_datatype_f32_r;\n   rocsparse_index_base idx_base = rocsparse_index_base_zero;\n\n   rocsparse_create_handle(&handle);\n\n   // Create sparse vector X\n   rocsparse_create_spvec_descr(&vecX,\n                                size,\n                                nnz,\n                                dx_ind,\n                                dx_val,\n                                idx_type,\n                                idx_base,\n                                data_type);\n\n   // Create dense vector Y\n   rocsparse_create_dnvec_descr(&vecY,\n                                size,\n                                dy,\n                                data_type);\n\n   // Call axpby to perform scatter\n   rocsparse_scatter(handle, vecX, vecY);\n\n   rocsparse_dnvec_get_values(vecY, (void**)&dy);\n\n   // Copy result back to host\n   hipMemcpy(hy.data(), dy, sizeof(float) * size, hipMemcpyDeviceToHost);\n\n   std::cout << \"y\" << std::endl;\n   for(size_t i = 0; i < hy.size(); ++i)\n   {\n       std::cout << hy[i] << \" \";\n   }\n\n   std::cout << std::endl;\n\n   // Clear rocSPARSE\n   rocsparse_destroy_spvec_descr(vecX);\n   rocsparse_destroy_dnvec_descr(vecY);\n   rocsparse_destroy_handle(handle);\n\n   // Clear device memory\n   hipFree(dx_ind);\n   hipFree(dx_val);\n   hipFree(dy);\n  \\endcode"]
+    pub fn rocsparse_scatter(
+        handle: rocsparse_handle,
+        x: rocsparse_const_spvec_descr,
+        y: rocsparse_dnvec_descr,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup generic_module\n  \\brief Calculate the size in bytes of the required buffer for the use of \\ref rocsparse_sddmm and \\ref rocsparse_sddmm_preprocess\n\n  \\details\n  \\ref rocsparse_sddmm_buffer_size returns the size of the required buffer to execute the SDDMM operation from a given configuration.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  opA      dense matrix \\f$A\\f$ operation type.\n  @param[in]\n  opB      dense matrix \\f$B\\f$ operation type.\n  @param[in]\n  alpha        scalar \\f$\\alpha\\f$.\n  @param[in]\n  A            dense matrix \\f$A\\f$ descriptor.\n  @param[in]\n  B            dense matrix \\f$B\\f$ descriptor.\n  @param[in]\n  beta         scalar \\f$\\beta\\f$.\n  @param[inout]\n  C            sparse matrix \\f$C\\f$ descriptor.\n  @param[in]\n  compute_type floating point precision for the SDDMM computation.\n  @param[in]\n  alg specification of the algorithm to use.\n  @param[out]\n  buffer_size  number of bytes of the temporary storage buffer.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_value the value of \\p trans\\_A or \\p trans\\_B is incorrect.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_pointer \\p alpha and \\p beta are invalid,\n          \\p A, \\p B, \\p D, \\p C or \\p buffer_size pointer is invalid.\n  \\retval rocsparse_status_not_implemented\n          \\p opA == \\ref rocsparse_operation_conjugate_transpose or\n          \\p opB == \\ref rocsparse_operation_conjugate_transpose."]
+    pub fn rocsparse_sddmm_buffer_size(
+        handle: rocsparse_handle,
+        opA: rocsparse_operation,
+        opB: rocsparse_operation,
+        alpha: *const ::std::os::raw::c_void,
+        A: rocsparse_const_dnmat_descr,
+        B: rocsparse_const_dnmat_descr,
+        beta: *const ::std::os::raw::c_void,
+        C: rocsparse_spmat_descr,
+        compute_type: rocsparse_datatype,
+        alg: rocsparse_sddmm_alg,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup generic_module\n  \\brief Preprocess data before the use of \\ref rocsparse_sddmm.\n\n  \\details\n  \\ref rocsparse_sddmm_preprocess executes a part of the algorithm that can be calculated once in the context of multiple\n  calls of the \\ref rocsparse_sddmm with the same sparsity pattern.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  opA      dense matrix \\f$A\\f$ operation type.\n  @param[in]\n  opB      dense matrix \\f$B\\f$ operation type.\n  @param[in]\n  alpha        scalar \\f$\\alpha\\f$.\n  @param[in]\n  A            dense matrix \\f$A\\f$ descriptor.\n  @param[in]\n  B            dense matrix \\f$B\\f$ descriptor.\n  @param[in]\n  beta         scalar \\f$\\beta\\f$.\n  @param[inout]\n  C            sparse matrix \\f$C\\f$ descriptor.\n  @param[in]\n  compute_type floating point precision for the SDDMM computation.\n  @param[in]\n  alg specification of the algorithm to use.\n  @param[in]\n  temp_buffer  temporary storage buffer allocated by the user.\n  The size must be greater or equal to the size obtained with \\ref rocsparse_sddmm_buffer_size.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_value the value of \\p trans\\_A or \\p trans\\_B is incorrect.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_pointer \\p alpha and \\p beta are invalid,\n          \\p A, \\p B, \\p D, \\p C or \\p temp_buffer pointer is invalid.\n  \\retval rocsparse_status_not_implemented\n          \\p opA == \\ref rocsparse_operation_conjugate_transpose or\n          \\p opB == \\ref rocsparse_operation_conjugate_transpose."]
+    pub fn rocsparse_sddmm_preprocess(
+        handle: rocsparse_handle,
+        opA: rocsparse_operation,
+        opB: rocsparse_operation,
+        alpha: *const ::std::os::raw::c_void,
+        A: rocsparse_const_dnmat_descr,
+        B: rocsparse_const_dnmat_descr,
+        beta: *const ::std::os::raw::c_void,
+        C: rocsparse_spmat_descr,
+        compute_type: rocsparse_datatype,
+        alg: rocsparse_sddmm_alg,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup generic_module\n  \\brief  Sampled Dense-Dense Matrix Multiplication.\n\n  \\details\n  \\ref rocsparse_sddmm multiplies the scalar \\f$\\alpha\\f$ with the dense\n  \\f$m \\times k\\f$ matrix \\f$A\\f$, the dense \\f$k \\times n\\f$ matrix \\f$B\\f$, filtered by the sparsity pattern of the \\f$m \\times n\\f$ sparse matrix \\f$C\\f$ and\n  adds the result to \\f$C\\f$ scaled by\n  \\f$\\beta\\f$. The final result is stored in the sparse \\f$m \\times n\\f$ matrix \\f$C\\f$,\n  such that\n  \\f[\n    C := \\alpha ( op(A) \\cdot op(B) ) \\cdot spy(C) + \\beta C,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if op(A) == rocsparse_operation_none} \\\\\n        A^T,   & \\text{if op(A) == rocsparse_operation_transpose} \\\\\n    \\end{array}\n    \\right.\n  \\f],\n  \\f[\n    op(B) = \\left\\{\n    \\begin{array}{ll}\n        B,   & \\text{if op(B) == rocsparse_operation_none} \\\\\n        B^T,   & \\text{if op(B) == rocsparse_operation_transpose} \\\\\n    \\end{array}\n    \\right.\n  \\f]\n   and\n  \\f[\n    spy(C)_ij = \\left\\{\n    \\begin{array}{ll}\n        1 \\text{ if i == j},   & 0 \\text{ if i != j} \\\\\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\note \\p opA == \\ref rocsparse_operation_conjugate_transpose is not supported.\n  \\note \\p opB == \\ref rocsparse_operation_conjugate_transpose is not supported.\n  \\note\n  This routine supports execution in a hipGraph context only when \\p alg == \\ref rocsparse_sddmm_alg_default.\n\n  \\note\n  Different algorithms are available which can provide better performance for different matrices.\n  Currently, the available algorithms are rocsparse_sddmm_alg_default or rocsparse_sddmm_alg_dense.\n  The algorithm rocsparse_sddmm_alg_default uses the sparsity pattern of matrix C to perform a limited set of dot products.\n  On the other hand, rocsparse_sddmm_alg_dense explicitly converts the matrix C into a dense matrix to perform a dense matrix multiply and add.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  opA      dense matrix \\f$A\\f$ operation type.\n  @param[in]\n  opB      dense matrix \\f$B\\f$ operation type.\n  @param[in]\n  alpha        scalar \\f$\\alpha\\f$.\n  @param[in]\n  A            dense matrix \\f$A\\f$ descriptor.\n  @param[in]\n  B            dense matrix \\f$B\\f$ descriptor.\n  @param[in]\n  beta         scalar \\f$\\beta\\f$.\n  @param[inout]\n  C            sparse matrix \\f$C\\f$ descriptor.\n  @param[in]\n  compute_type floating point precision for the SDDMM computation.\n  @param[in]\n  alg specification of the algorithm to use.\n  @param[in]\n  temp_buffer  temporary storage buffer allocated by the user.\n  The size must be greater or equal to the size obtained with \\ref rocsparse_sddmm_buffer_size.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_value the value of \\p trans\\_A, \\p trans\\_B, \\p compute\\_type or alg is incorrect.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_pointer \\p alpha and \\p beta are invalid,\n          \\p A, \\p B, \\p D, \\p C or \\p temp_buffer pointer is invalid.\n  \\retval rocsparse_status_not_implemented\n          \\p opA == \\ref rocsparse_operation_conjugate_transpose or\n          \\p opB == \\ref rocsparse_operation_conjugate_transpose."]
+    pub fn rocsparse_sddmm(
+        handle: rocsparse_handle,
+        opA: rocsparse_operation,
+        opB: rocsparse_operation,
+        alpha: *const ::std::os::raw::c_void,
+        A: rocsparse_const_dnmat_descr,
+        B: rocsparse_const_dnmat_descr,
+        beta: *const ::std::os::raw::c_void,
+        C: rocsparse_spmat_descr,
+        compute_type: rocsparse_datatype,
+        alg: rocsparse_sddmm_alg,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup generic_module\n  \\brief Sparse matrix to dense matrix conversion\n\n  \\details\n  \\p rocsparse_sparse_to_dense\n  \\p rocsparse_sparse_to_dense performs the conversion of a sparse matrix in CSR, CSC, or COO format to\n     a dense matrix\n  \\note\n  This function writes the required allocation size (in bytes) to \\p buffer_size and\n  returns without performing the sparse to dense operation, when a nullptr is passed for\n  \\p temp_buffer.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  mat_A        sparse matrix descriptor.\n  @param[in]\n  mat_B        dense matrix descriptor.\n  @param[in]\n  alg          algorithm for the sparse to dense computation.\n  @param[out]\n  buffer_size  number of bytes of the temporary storage buffer. buffer_size is set when\n               \\p temp_buffer is nullptr.\n  @param[in]\n  temp_buffer  temporary storage buffer allocated by the user. When a nullptr is passed,\n               the required allocation size (in bytes) is written to \\p buffer_size and\n               function returns without performing the sparse to dense operation.\n\n  \\retval      rocsparse_status_success the operation completed successfully.\n  \\retval      rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval      rocsparse_status_invalid_pointer \\p mat_A, \\p mat_B, or \\p buffer_size\n               pointer is invalid.\n\n  \\par Example\n  \\code{.c}\n   //     1 4 0 0 0 0\n   // A = 0 2 3 0 0 0\n   //     5 0 0 7 8 0\n   //     0 0 9 0 6 0\n   rocsparse_int m   = 4;\n   rocsparse_int n   = 6;\n\n   std::vector<int> hcsr_row_ptr = {0, 2, 4, 7, 9};\n   std::vector<int> hcsr_col_ind = {0, 1, 1, 2, 0, 3, 4, 2, 4};\n   std::vector<float> hcsr_val   = {1, 4, 2, 3, 5, 7, 8, 9, 6};\n   std::vector<float> hdense(m * n, 0.0f);\n\n   rocsparse_int nnz = hcsr_row_ptr[m] - hcsr_row_ptr[0];\n\n   // Offload data to device\n   int* dcsr_row_ptr;\n   int* dcsr_col_ind;\n   float* dcsr_val;\n   float* ddense;\n   hipMalloc((void**)&dcsr_row_ptr, sizeof(int) * (m + 1));\n   hipMalloc((void**)&dcsr_col_ind, sizeof(int) * nnz);\n   hipMalloc((void**)&dcsr_val, sizeof(float) * nnz);\n   hipMalloc((void**)&ddense, sizeof(float) * m * n);\n\n   hipMemcpy(dcsr_row_ptr, hcsr_row_ptr.data(), sizeof(int) * (m + 1), hipMemcpyHostToDevice);\n   hipMemcpy(dcsr_col_ind, hcsr_col_ind.data(), sizeof(int) * nnz, hipMemcpyHostToDevice);\n   hipMemcpy(dcsr_val, hcsr_val.data(), sizeof(float) * nnz, hipMemcpyHostToDevice);\n   hipMemcpy(ddense, hdense.data(), sizeof(float) * m * n, hipMemcpyHostToDevice);\n\n   rocsparse_handle     handle;\n   rocsparse_spmat_descr matA;\n   rocsparse_dnmat_descr matB;\n\n   rocsparse_indextype row_idx_type = rocsparse_indextype_i32;\n   rocsparse_indextype col_idx_type = rocsparse_indextype_i32;\n   rocsparse_datatype  data_type = rocsparse_datatype_f32_r;\n   rocsparse_index_base idx_base = rocsparse_index_base_zero;\n\n   rocsparse_create_handle(&handle);\n\n   // Create sparse matrix A\n   rocsparse_create_csr_descr(&matA,\n                              m,\n                              n,\n                              nnz,\n                              dcsr_row_ptr,\n                              dcsr_col_ind,\n                              dcsr_val,\n                              row_idx_type,\n                              col_idx_type,\n                              idx_base,\n                              data_type);\n\n   // Create dense matrix B\n   rocsparse_create_dnmat_descr(&matB, m, n, m, ddense, data_type, rocsparse_order_column);\n\n   // Call sparse_to_dense\n   size_t buffer_size = 0;\n   rocsparse_sparse_to_dense(handle,\n                             matA,\n                             matB,\n                             rocsparse_sparse_to_dense_alg_default,\n                             &buffer_size,\n                             nullptr);\n\n   void* temp_buffer;\n   hipMalloc((void**)&temp_buffer, buffer_size);\n\n   rocsparse_sparse_to_dense(handle,\n                             matA,\n                             matB,\n                             rocsparse_sparse_to_dense_alg_default,\n                             &buffer_size,\n                             temp_buffer);\n\n   // Copy result back to host\n   hipMemcpy(hdense.data(), ddense, sizeof(float) * m * n, hipMemcpyDeviceToHost);\n\n   std::cout << \"hdense\" << std::endl;\n   for(size_t i = 0; i < hdense.size(); ++i)\n   {\n       std::cout << hdense[i] << \" \";\n   }\n   std::cout << std::endl;\n\n   // Clear rocSPARSE\n   rocsparse_destroy_spmat_descr(matA);\n   rocsparse_destroy_dnmat_descr(matB);\n   rocsparse_destroy_handle(handle);\n\n   // Clear device memory\n   hipFree(dcsr_row_ptr);\n   hipFree(dcsr_col_ind);\n   hipFree(dcsr_val);\n   hipFree(ddense);\n  \\endcode"]
+    pub fn rocsparse_sparse_to_dense(
+        handle: rocsparse_handle,
+        mat_A: rocsparse_const_spmat_descr,
+        mat_B: rocsparse_dnmat_descr,
+        alg: rocsparse_sparse_to_dense_alg,
+        buffer_size: *mut usize,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct _rocsparse_sparse_to_sparse_descr {
+    _unused: [u8; 0],
+}
+#[doc = " \\ingroup generic_module\n \\brief rocsparse_sparse_to_sparse_descr is a structure holding the rocsparse sparse_to_sparse\n descr data. It must be initialized using\n the rocsparse_create_sparse_to_sparse_descr() routine. It should be destroyed at the\n end using rocsparse_destroy_sparse_to_sparse_descr()."]
+pub type rocsparse_sparse_to_sparse_descr = *mut _rocsparse_sparse_to_sparse_descr;
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup generic_module\n  \\brief Sparse matrix to sparse matrix conversion.\n\n  \\details\n  \\p rocsparse_create_sparse_to_sparse_descr\n  \\p rocsparse_create_sparse_to_sparse_descr creates the descriptor of the sparse_to_sparse algorithm.\n\n  @param[out]\n  descr        pointer to the descriptor of the sparse_to_sparse algorithm.\n  @param[in]\n  source       source sparse matrix descriptor.\n  @param[in]\n  target       target sparse matrix descriptor.\n  @param[in]\n  alg          algorithm for the sparse_to_sparse computation.\n\n  \\retval      rocsparse_status_success the operation completed successfully.\n  \\retval      rocsparse_status_invalid_value if any required enumeration is invalid.\n  \\retval      rocsparse_status_invalid_pointer \\p descr, \\p source, or \\p target\n               pointer is invalid."]
+    pub fn rocsparse_create_sparse_to_sparse_descr(
+        descr: *mut rocsparse_sparse_to_sparse_descr,
+        source: rocsparse_const_spmat_descr,
+        target: rocsparse_spmat_descr,
+        alg: rocsparse_sparse_to_sparse_alg,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup generic_module\n  \\brief Sparse matrix to sparse matrix conversion.\n\n  \\details\n  \\p rocsparse_sparse_to_sparse_permissive\n  \\p rocsparse_sparse_to_sparse_permissive allows the routine to allocate an intermediate sparse matrix\n  in order to perform the conversion. By default, the routine is not permissive.\n  @param[in]\n  descr        descriptor of the sparse_to_sparse algorithm.\n  \\retval      rocsparse_status_success the operation completed successfully."]
+    pub fn rocsparse_sparse_to_sparse_permissive(
+        descr: rocsparse_sparse_to_sparse_descr,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup generic_module\n  \\brief Sparse matrix to sparse matrix conversion.\n\n  \\details\n  \\p rocsparse_destroy_sparse_to_sparse_descr\n  \\p rocsparse_destroy_sparse_to_sparse_descr destroys the descriptor of the sparse_to_sparse algorithm.\n\n  @param[in]\n  descr        descriptor of the sparse_to_sparse algorithm.\n  \\retval      rocsparse_status_success the operation completed successfully."]
+    pub fn rocsparse_destroy_sparse_to_sparse_descr(
+        descr: rocsparse_sparse_to_sparse_descr,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup generic_module\n  \\brief Sparse matrix to sparse matrix conversion.\n\n  \\details\n  \\p rocsparse_sparse_to_sparse_buffer_size\n  \\p rocsparse_sparse_to_sparse_buffer_size calculates the required buffer size in bytes for a given stage \\p stage.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  descr        descriptor of the sparse_to_sparse algorithm.\n  @param[in]\n  source       source sparse matrix descriptor.\n  @param[in]\n  target       target sparse matrix descriptor.\n  @param[in]\n  stage        stage of the sparse_to_sparse computation.\n  @param[out]\n  buffer_size_in_bytes  size in bytes of the \\p buffer\n\n  \\retval      rocsparse_status_success the operation completed successfully.\n  \\retval      rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval      rocsparse_status_invalid_value if any required enumeration is invalid.\n  \\retval      rocsparse_status_invalid_pointer \\p mat_A, \\p mat_B, or \\p buffer_size_in_bytes\n               pointer is invalid."]
+    pub fn rocsparse_sparse_to_sparse_buffer_size(
+        handle: rocsparse_handle,
+        descr: rocsparse_sparse_to_sparse_descr,
+        source: rocsparse_const_spmat_descr,
+        target: rocsparse_spmat_descr,
+        stage: rocsparse_sparse_to_sparse_stage,
+        buffer_size_in_bytes: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup generic_module\n  \\brief Sparse matrix to sparse matrix conversion.\n\n  \\details\n  \\p rocsparse_sparse_to_sparse\n  \\p rocsparse_sparse_to_sparse performs the conversion of a sparse matrix to a sparse matrix.\n\n  \\note\n  The required allocation size (in bytes) to \\p buffer_size_in_bytes must be obtained from \\ref rocsparse_sparse_to_sparse_buffer_size\n  for each stage, indeed the required buffer size can be different between stages.\n\n  \\note\n  The format \\ref rocsparse_format_bell is not supported.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  descr        descriptor of the sparse_to_sparse algorithm.\n  @param[in]\n  source       sparse matrix descriptor.\n  @param[in]\n  target       sparse matrix descriptor.\n  @param[in]\n  stage        stage of the sparse_to_sparse computation.\n  @param[in]\n  buffer_size_in_bytes  size in bytes of the \\p buffer\n  @param[in]\n  buffer  temporary storage buffer allocated by the user.\n\n  \\retval      rocsparse_status_success the operation completed successfully.\n  \\par Example\n  This example converts a CSR matrix into an ELL matrix.\n  \\code{.c}\n\n      // It assumes the CSR arrays (ptr, ind, val) have already been allocated and filled.\n      // Build Source\n      rocsparse_spmat_descr source;\n      rocsparse_create_csr_descr(&source, M, N, nnz, ptr, ind, val, rocsparse_indextype_i32, rocsparse_indextype_i32, rocsparse_index_base_zero, rocsparse_datatype_f32_r);\n\n      // Build target\n      void * ell_ind, * ell_val;\n      int64_t ell_width = 0;\n      rocsparse_spmat_descr target;\n      rocsparse_create_ell_descr(&target, M, N, ell_ind, ell_val, ell_width, rocsparse_indextype_i32, rocsparse_index_base_zero, rocsparse_datatype_f32_r);\n\n      // Create descriptor\n      rocsparse_sparse_to_sparse_descr descr;\n      rocsparse_sparse_to_sparse_create_descr(&descr, source, target,  rocsparse_sparse_to_sparse_alg_default);\n\n      // Analysis phase\n      rocsparse_sparse_to_sparse_buffer_size(handle, descr, source, target, rocsparse_sparse_to_sparse_stage_analysis, &buffer_size);\n      hipMalloc(&buffer,buffer_size);\n      rocsparse_sparse_to_sparse(handle, descr, source, target, rocsparse_sparse_to_sparse_stage_analysis, buffer_size, buffer);\n      hipFree(buffer);\n\n      //\n      // the user is responsible to allocate target arrays after the analysis phase.\n      //\n      { int64_t rows, cols, ell_width;\n        void * ind, * val;\n        rocsparse_indextype        idx_type;\n        rocsparse_index_base       idx_base;\n        rocsparse_datatype         data_type;\n\n         rocsparse_ell_get(target,\n                           &rows,\n                           &cols,\n                           &ind,\n                           &val,\n                           &ell_width,\n                           &idx_type,\n                           &idx_base,\n                           &data_type);\n         hipMalloc(&ell_ind,ell_width * M * sizeof(int32_t));\n         hipMalloc(&ell_val,ell_width * M * sizeof(float)));\n         rocsparse_ell_set_pointers(target, ell_ind, ell_val); }\n\n      // Calculation phase\n      rocsparse_sparse_to_sparse_buffer_size(handle, descr, source, target, rocsparse_sparse_to_sparse_stage_compute, &buffer_size);\n      hipMalloc(&buffer,buffer_size);\n      rocsparse_sparse_to_sparse(handle, descr, source, target, rocsparse_sparse_to_sparse_stage_compute, buffer_size, buffer);\n      hipFree(buffer);\n  \\endcode"]
+    pub fn rocsparse_sparse_to_sparse(
+        handle: rocsparse_handle,
+        descr: rocsparse_sparse_to_sparse_descr,
+        source: rocsparse_const_spmat_descr,
+        target: rocsparse_spmat_descr,
+        stage: rocsparse_sparse_to_sparse_stage,
+        buffer_size_in_bytes: usize,
+        buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup generic_module\n  \\brief Sparse matrix sparse matrix multiplication\n\n  \\details\n  \\ref rocsparse_spgemm multiplies the scalar \\f$\\alpha\\f$ with the sparse\n  \\f$m \\times k\\f$ matrix \\f$A\\f$ and the sparse \\f$k \\times n\\f$ matrix \\f$B\\f$ and\n  adds the result to the sparse \\f$m \\times n\\f$ matrix \\f$D\\f$ that is multiplied by\n  \\f$\\beta\\f$. The final result is stored in the sparse \\f$m \\times n\\f$ matrix \\f$C\\f$,\n  such that\n  \\f[\n    C := \\alpha \\cdot op(A) \\cdot op(B) + \\beta \\cdot D,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans_A == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans_A == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans_A == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n  and\n  \\f[\n    op(B) = \\left\\{\n    \\begin{array}{ll}\n        B,   & \\text{if trans_B == rocsparse_operation_none} \\\\\n        B^T, & \\text{if trans_B == rocsparse_operation_transpose} \\\\\n        B^H, & \\text{if trans_B == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\note SpGEMM requires three stages to complete. The first stage\n  \\ref rocsparse_spgemm_stage_buffer_size will return the size of the temporary storage buffer\n  that is required for subsequent calls to \\ref rocsparse_spgemm. The second stage\n  \\ref rocsparse_spgemm_stage_nnz will determine the number of non-zero elements of the\n  resulting \\f$C\\f$ matrix. If the sparsity pattern of \\f$C\\f$ is already known, this\n  stage can be skipped. In the final stage \\ref rocsparse_spgemm_stage_compute, the actual\n  computation is performed.\n  \\note If \\f$\\alpha == 0\\f$, then \\f$C = \\beta \\cdot D\\f$ will be computed.\n  \\note If \\f$\\beta == 0\\f$, then \\f$C = \\alpha \\cdot op(A) \\cdot op(B)\\f$ will be\n  computed.\n  \\note Currently only CSR and BSR formats are supported.\n  \\note If \\ref rocsparse_spgemm_stage_symbolic is selected then the symbolic computation is performed only.\n  \\note If \\ref rocsparse_spgemm_stage_numeric is selected then the numeric computation is performed only.\n  \\note For the \\ref rocsparse_spgemm_stage_symbolic and \\ref rocsparse_spgemm_stage_numeric stages, only\n  CSR matrix format is currently supported.\n  \\note \\f$\\alpha == beta == 0\\f$ is invalid.\n  \\note It is allowed to pass the same sparse matrix for \\f$C\\f$ and \\f$D\\f$, if both\n  matrices have the same sparsity pattern.\n  \\note Currently, only \\p trans_A == \\ref rocsparse_operation_none is supported.\n  \\note Currently, only \\p trans_B == \\ref rocsparse_operation_none is supported.\n  \\note This function is non blocking and executed asynchronously with respect to the\n        host. It may return before the actual computation has finished.\n  \\note Please note, that for rare matrix products with more than 4096 non-zero entries\n  per row, additional temporary storage buffer is allocated by the algorithm.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  trans_A      sparse matrix \\f$A\\f$ operation type.\n  @param[in]\n  trans_B      sparse matrix \\f$B\\f$ operation type.\n  @param[in]\n  alpha        scalar \\f$\\alpha\\f$.\n  @param[in]\n  A            sparse matrix \\f$A\\f$ descriptor.\n  @param[in]\n  B            sparse matrix \\f$B\\f$ descriptor.\n  @param[in]\n  beta         scalar \\f$\\beta\\f$.\n  @param[in]\n  D            sparse matrix \\f$D\\f$ descriptor.\n  @param[out]\n  C            sparse matrix \\f$C\\f$ descriptor.\n  @param[in]\n  compute_type floating point precision for the SpGEMM computation.\n  @param[in]\n  alg          SpGEMM algorithm for the SpGEMM computation.\n  @param[in]\n  stage        SpGEMM stage for the SpGEMM computation.\n  @param[out]\n  buffer_size  number of bytes of the temporary storage buffer. buffer_size is set when\n               \\p temp_buffer is nullptr.\n  @param[in]\n  temp_buffer  temporary storage buffer allocated by the user. When a nullptr is passed,\n               the required allocation size (in bytes) is written to \\p buffer_size and\n               function returns without performing the SpGEMM operation.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_pointer \\p alpha and \\p beta are invalid,\n          \\p A, \\p B, \\p D, \\p C or \\p buffer_size pointer is invalid.\n  \\retval rocsparse_status_memory_error additional buffer for long rows could not be\n          allocated.\n  \\retval rocsparse_status_not_implemented\n          \\p trans_A != \\ref rocsparse_operation_none or\n          \\p trans_B != \\ref rocsparse_operation_none.\n\n  \\par Example\n  \\code{.c}\n   // A - m x k\n   // B - k x n\n   // C - m x n\n   int m = 400;\n   int n = 400;\n   int k = 300;\n\n   std::vector<int> hcsr_row_ptr_A = {...}; // host A m x k matrix\n   std::vector<int> hcsr_col_ind_A = {...}; // host A m x k matrix\n   std::vector<float> hcsr_val_A = {...};   // host A m x k matrix\n\n   std::vector<int> hcsr_row_ptr_B = {...}; // host B k x n matrix\n   std::vector<int> hcsr_col_ind_B = {...}; // host B k x n matrix\n   std::vector<float> hcsr_val_B = {...};   // host B k x n matrix\n\n   int nnz_A = hcsr_val_A.size();\n   int nnz_B = hcsr_val_B.size();\n\n   float alpha            = 1.0f;\n   float beta             = 0.0f;\n\n   int* dcsr_row_ptr_A = nullptr;\n   int* dcsr_col_ind_A = nullptr;\n   float* dcsr_val_A = nullptr;\n\n   int* dcsr_row_ptr_B = nullptr;\n   int* dcsr_col_ind_B = nullptr;\n   float* dcsr_val_B = nullptr;\n\n   int* dcsr_row_ptr_C = nullptr;\n\n   hipMalloc((void**)&dcsr_row_ptr_A, (m + 1) * sizeof(int));\n   hipMalloc((void**)&dcsr_col_ind_A, nnz_A * sizeof(int));\n   hipMalloc((void**)&dcsr_val_A, nnz_A * sizeof(float));\n\n   hipMalloc((void**)&dcsr_row_ptr_B, (k + 1) * sizeof(int));\n   hipMalloc((void**)&dcsr_col_ind_B, nnz_B * sizeof(int));\n   hipMalloc((void**)&dcsr_val_B, nnz_B * sizeof(float));\n\n   hipMalloc((void**)&dcsr_row_ptr_C, (m + 1) * sizeof(int));\n\n   hipMemcpy(dcsr_row_ptr_A, hcsr_row_ptr_A.data(), (m + 1) * sizeof(int), hipMemcpyHostToDevice);\n   hipMemcpy(dcsr_col_ind_A, hcsr_col_ind_A.data(), nnz_A * sizeof(int), hipMemcpyHostToDevice);\n   hipMemcpy(dcsr_val_A, hcsr_val_A.data(), nnz_A * sizeof(float), hipMemcpyHostToDevice);\n\n   hipMemcpy(dcsr_row_ptr_B, hcsr_row_ptr_B.data(), (k + 1) * sizeof(int), hipMemcpyHostToDevice);\n   hipMemcpy(dcsr_col_ind_B, hcsr_col_ind_B.data(), nnz_B * sizeof(int), hipMemcpyHostToDevice);\n   hipMemcpy(dcsr_val_B, hcsr_val_B.data(), nnz_B * sizeof(float), hipMemcpyHostToDevice);\n\n   rocsparse_handle     handle;\n   rocsparse_spmat_descr matA, matB, matC, matD;\n   void*                temp_buffer    = NULL;\n   size_t               buffer_size = 0;\n\n   rocsparse_operation trans_A = rocsparse_operation_none;\n   rocsparse_operation trans_B = rocsparse_operation_none;\n   rocsparse_index_base index_base = rocsparse_index_base_zero;\n   rocsparse_indextype itype = rocsparse_indextype_i32;\n   rocsparse_indextype jtype = rocsparse_indextype_i32;\n   rocsparse_datatype  ttype = rocsparse_datatype_f32_r;\n\n   rocsparse_create_handle(&handle);\n\n   // Create sparse matrix A in CSR format\n   rocsparse_create_csr_descr(&matA, m, k, nnz_A,\n                       dcsr_row_ptr_A, dcsr_col_ind_A, dcsr_val_A,\n                       itype, jtype,\n                       index_base, ttype);\n\n   // Create sparse matrix B in CSR format\n   rocsparse_create_csr_descr(&matB, k, n, nnz_B,\n                       dcsr_row_ptr_B, dcsr_col_ind_B, dcsr_val_B,\n                       itype, jtype,\n                       index_base, ttype);\n\n   // Create sparse matrix C in CSR format\n   rocsparse_create_csr_descr(&matC, m, n, 0,\n                       dcsr_row_ptr_C, nullptr, nullptr,\n                       itype, jtype,\n                       index_base, ttype);\n\n   // Create sparse matrix D in CSR format\n   rocsparse_create_csr_descr(&matD, 0, 0, 0,\n                       nullptr, nullptr, nullptr,\n                       itype, jtype,\n                       index_base, ttype);\n\n   Determine buffer size\n   rocsparse_spgemm(handle,\n                    trans_A,\n                    trans_B,\n                    &alpha,\n                    matA,\n                    matB,\n                    &beta,\n                    matD,\n                    matC,\n                    ttype,\n                    rocsparse_spgemm_alg_default,\n                    rocsparse_spgemm_stage_buffer_size,\n                    &buffer_size,\n                    nullptr);\n\n   hipMalloc(&temp_buffer, buffer_size);\n\n   Determine number of non-zeros in C matrix\n   rocsparse_spgemm(handle,\n                    trans_A,\n                    trans_B,\n                    &alpha,\n                    matA,\n                    matB,\n                    &beta,\n                    matD,\n                    matC,\n                    ttype,\n                    rocsparse_spgemm_alg_default,\n                    rocsparse_spgemm_stage_nnz,\n                    &buffer_size,\n                    temp_buffer);\n\n   int64_t rows_C;\n   int64_t cols_C;\n   int64_t nnz_C;\n\n   Extract number of non-zeros in C matrix so we can allocate the column indices and values arrays\n   rocsparse_spmat_get_size(matC, &rows_C, &cols_C, &nnz_C);\n\n   int* dcsr_col_ind_C;\n   float* dcsr_val_C;\n   hipMalloc((void**)&dcsr_col_ind_C, sizeof(int) * nnz_C);\n   hipMalloc((void**)&dcsr_val_C, sizeof(float) * nnz_C);\n\n   // Set C matrix pointers\n   rocsparse_csr_set_pointers(matC, dcsr_row_ptr_C, dcsr_col_ind_C, dcsr_val_C);\n\n   // SpGEMM computation\n   rocsparse_spgemm(handle,\n                    trans_A,\n                    trans_B,\n                    &alpha,\n                    matA,\n                    matB,\n                    &beta,\n                    matD,\n                    matC,\n                    ttype,\n                    rocsparse_spgemm_alg_default,\n                    rocsparse_spgemm_stage_compute,\n                    &buffer_size,\n                    temp_buffer);\n\n   // Copy C matrix result back to host\n   std::vector<int> hcsr_row_ptr_C(m + 1);\n   std::vector<int> hcsr_col_ind_C(nnz_C);\n   std::vector<float>  hcsr_val_C(nnz_C);\n\n   hipMemcpy(hcsr_row_ptr_C.data(), dcsr_row_ptr_C, sizeof(int) * (m + 1), hipMemcpyDeviceToHost);\n   hipMemcpy(hcsr_col_ind_C.data(), dcsr_col_ind_C, sizeof(int) * nnz_C, hipMemcpyDeviceToHost);\n   hipMemcpy(hcsr_val_C.data(), dcsr_val_C, sizeof(float) * nnz_C, hipMemcpyDeviceToHost);\n\n   // Destroy matrix descriptors\n   rocsparse_destroy_spmat_descr(matA);\n   rocsparse_destroy_spmat_descr(matB);\n   rocsparse_destroy_spmat_descr(matC);\n   rocsparse_destroy_spmat_descr(matD);\n   rocsparse_destroy_handle(handle);\n\n   // Free device arrays\n   hipFree(temp_buffer);\n   hipFree(dcsr_row_ptr_A);\n   hipFree(dcsr_col_ind_A);\n   hipFree(dcsr_val_A);\n\n   hipFree(dcsr_row_ptr_B);\n   hipFree(dcsr_col_ind_B);\n   hipFree(dcsr_val_B);\n\n   hipFree(dcsr_row_ptr_C);\n   hipFree(dcsr_col_ind_C);\n   hipFree(dcsr_val_C);\n  \\endcode"]
+    pub fn rocsparse_spgemm(
+        handle: rocsparse_handle,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        alpha: *const ::std::os::raw::c_void,
+        A: rocsparse_const_spmat_descr,
+        B: rocsparse_const_spmat_descr,
+        beta: *const ::std::os::raw::c_void,
+        D: rocsparse_const_spmat_descr,
+        C: rocsparse_spmat_descr,
+        compute_type: rocsparse_datatype,
+        alg: rocsparse_spgemm_alg,
+        stage: rocsparse_spgemm_stage,
+        buffer_size: *mut usize,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup generic_module\n  \\brief Sparse iterative triangular solve\n\n  \\details\n  \\p rocsparse_spitsv solves, using the Jacobi iterative method, a sparse triangular linear system of a sparse\n  \\f$m \\times m\\f$ matrix, defined in CSR format, a dense solution vector\n  \\f$y\\f$ and the right-hand side \\f$x\\f$ that is multiplied by \\f$\\alpha\\f$, such that\n  \\f[\n    op(A) \\cdot y = \\alpha \\cdot x,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\note SpITSV requires three stages to complete. The first stage\n  \\ref rocsparse_spitsv_stage_buffer_size will return the size of the temporary storage buffer\n  that is required for subsequent calls. The second stage\n  \\ref rocsparse_spitsv_stage_preprocess will preprocess data that would be saved in the temporary storage buffer.\n  In the final stage \\ref rocsparse_spitsv_stage_compute, the actual computation is performed.\n\n  \\note\n  Currently, only non-mixed numerical precision is supported.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[inout]\n  host_nmaxiter     maximum number of iteration on input and maximum number of iteration on output.\n  @param[in]\n  host_tol          if the pointer is null then loop will execute \\p nmaxiter[0] iterations. The precision is float for f32 based calculation (including the complex case) and double for f64 based calculation (including the complex case).\n  @param[out]\n  host_history      Optional array to record the history. The precision is float for f32 based calculation (including the complex case) and double for f64 based calculation (including the complex case).\n  @param[in]\n  trans        matrix operation type.\n  @param[in]\n  alpha        scalar \\f$\\alpha\\f$.\n  @param[in]\n  mat          matrix descriptor.\n  @param[in]\n  x            vector descriptor.\n  @param[inout]\n  y            vector descriptor.\n  @param[in]\n  compute_type floating point precision for the SpITSV computation.\n  @param[in]\n  alg          SpITSV algorithm for the SpITSV computation.\n  @param[in]\n  stage        SpITSV stage for the SpITSV computation.\n  @param[out]\n  buffer_size  number of bytes of the temporary storage buffer.\n  @param[in]\n  temp_buffer  temporary storage buffer allocated by the user. When a nullptr is passed,\n               the required allocation size (in bytes) is written to \\p buffer_size and\n               function returns without performing the SpITSV operation.\n\n  \\retval      rocsparse_status_success the operation completed successfully.\n  \\retval      rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval      rocsparse_status_invalid_pointer \\p alpha, \\p mat, \\p x, \\p y, \\p descr or\n               \\p buffer_size pointer is invalid.\n  \\retval      rocsparse_status_not_implemented \\p trans, \\p compute_type, \\p stage or \\p alg is\n               currently not supported."]
+    pub fn rocsparse_spitsv(
+        handle: rocsparse_handle,
+        host_nmaxiter: *mut rocsparse_int,
+        host_tol: *const ::std::os::raw::c_void,
+        host_history: *mut ::std::os::raw::c_void,
+        trans: rocsparse_operation,
+        alpha: *const ::std::os::raw::c_void,
+        mat: rocsparse_spmat_descr,
+        x: rocsparse_dnvec_descr,
+        y: rocsparse_dnvec_descr,
+        compute_type: rocsparse_datatype,
+        alg: rocsparse_spitsv_alg,
+        stage: rocsparse_spitsv_stage,
+        buffer_size: *mut usize,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup generic_module\n  \\brief Sparse matrix dense matrix multiplication, extension routine.\n\n  \\details\n  \\p rocsparse_spmm multiplies the scalar \\f$\\alpha\\f$ with a sparse \\f$m \\times k\\f$\n  matrix \\f$A\\f$, defined in CSR or COO or Blocked ELL storage format, and the dense \\f$k \\times n\\f$\n  matrix \\f$B\\f$ and adds the result to the dense \\f$m \\times n\\f$ matrix \\f$C\\f$ that\n  is multiplied by the scalar \\f$\\beta\\f$, such that\n  \\f[\n    C := \\alpha \\cdot op(A) \\cdot op(B) + \\beta \\cdot C,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans_A == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans_A == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans_A == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n  and\n  \\f[\n    op(B) = \\left\\{\n    \\begin{array}{ll}\n        B,   & \\text{if trans_B == rocsparse_operation_none} \\\\\n        B^T, & \\text{if trans_B == rocsparse_operation_transpose} \\\\\n        B^H, & \\text{if trans_B == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\par Uniform Precisions:\n  <table>\n  <caption id=\"spmm_uniform\">Uniform Precisions</caption>\n  <tr><th>A / B / C / compute_type\n  <tr><td>rocsparse_datatype_f32_r\n  <tr><td>rocsparse_datatype_f64_r\n  <tr><td>rocsparse_datatype_f32_c\n  <tr><td>rocsparse_datatype_f64_c\n  </table>\n\n  \\par Mixed precisions:\n  <table>\n  <caption id=\"spmm_mixed\">Mixed Precisions</caption>\n  <tr><th>A / B                   <th>C                        <th>compute_type\n  <tr><td>rocsparse_datatype_i8_r <td>rocsparse_datatype_i32_r <td>rocsparse_datatype_i32_r\n  <tr><td>rocsparse_datatype_i8_r <td>rocsparse_datatype_f32_r <td>rocsparse_datatype_f32_r\n  </table>\n\n  \\note\n  Mixed precisions only supported for CSR, CSC, and COO matrix formats.\n\n  \\note\n  Only the \\ref rocsparse_spmm_stage_buffer_size stage and the \\ref rocsparse_spmm_stage_compute stage are non blocking\n  and executed asynchronously with respect to the host. They may return before the actual computation has finished.\n  The \\ref rocsparse_spmm_stage_preprocess stage is blocking with respect to the host.\n\n  \\note\n  Currently, only \\p trans_A == \\ref rocsparse_operation_none is supported for COO and Blocked ELL formats.\n\n  \\note\n  Only the \\ref rocsparse_spmm_stage_buffer_size stage and the \\ref rocsparse_spmm_stage_compute stage\n  support execution in a hipGraph context. The \\ref rocsparse_spmm_stage_preprocess stage does not support hipGraph.\n\n  \\note\n  Currently, only CSR, COO and Blocked ELL sparse formats are supported.\n\n  \\note\n  Different algorithms are available which can provide better performance for different matrices.\n  Currently, the available algorithms are rocsparse_spmm_alg_csr, rocsparse_spmm_alg_csr_row_split\n  or rocsparse_spmm_alg_csr_merge for CSR matrices, rocsparse_spmm_alg_bell for Blocked ELL matrices and\n  rocsparse_spmm_alg_coo_segmented or rocsparse_spmm_alg_coo_atomic for COO matrices. Additionally,\n  one can specify the algorithm to be rocsparse_spmm_alg_default. In the case of CSR matrices this will\n  set the algorithm to be rocsparse_spmm_alg_csr, in the case of Blocked ELL matrices this will set the\n  algorithm to be rocsparse_spmm_alg_bell and for COO matrices it will set the algorithm to be\n  rocsparse_spmm_alg_coo_atomic. When A is transposed, rocsparse_spmm will revert to using\n  rocsparse_spmm_alg_csr for CSR format and rocsparse_spmm_alg_coo_atomic for COO format regardless\n  of algorithm selected.\n\n  \\note\n  This function writes the required allocation size (in bytes) to \\p buffer_size and\n  returns without performing the SpMM operation, when a nullptr is passed for\n  \\p temp_buffer.\n\n  \\note SpMM requires three stages to complete. The first stage\n  \\ref rocsparse_spmm_stage_buffer_size will return the size of the temporary storage buffer\n  that is required for subsequent calls to \\ref rocsparse_spmm. The second stage\n  \\ref rocsparse_spmm_stage_preprocess will preprocess data that would be saved in the temporary storage buffer.\n  In the final stage \\ref rocsparse_spmm_stage_compute, the actual computation is performed.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  trans_A      matrix operation type.\n  @param[in]\n  trans_B      matrix operation type.\n  @param[in]\n  alpha        scalar \\f$\\alpha\\f$.\n  @param[in]\n  mat_A        matrix descriptor.\n  @param[in]\n  mat_B        matrix descriptor.\n  @param[in]\n  beta         scalar \\f$\\beta\\f$.\n  @param[in]\n  mat_C        matrix descriptor.\n  @param[in]\n  compute_type floating point precision for the SpMM computation.\n  @param[in]\n  alg          SpMM algorithm for the SpMM computation.\n  @param[in]\n  stage        SpMM stage for the SpMM computation.\n  @param[out]\n  buffer_size  number of bytes of the temporary storage buffer. buffer_size is set when\n               \\p temp_buffer is nullptr.\n  @param[in]\n  temp_buffer  temporary storage buffer allocated by the user. When a nullptr is passed,\n               the required allocation size (in bytes) is written to \\p buffer_size and\n               function returns without performing the SpMM operation.\n\n  \\retval      rocsparse_status_success the operation completed successfully.\n  \\retval      rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval      rocsparse_status_invalid_pointer \\p alpha, \\p mat_A, \\p mat_B, \\p mat_C, \\p beta, or\n               \\p buffer_size pointer is invalid.\n  \\retval      rocsparse_status_not_implemented \\p trans_A, \\p trans_B, \\p compute_type or \\p alg is\n               currently not supported.\n  \\par Example\n  This example performs sparse matrix-dense matrix multiplication, C = alpha * A * B + beta * C\n  \\code{.c}\n      //     1 4 0 0 0 0\n      // A = 0 2 3 0 0 0\n      //     5 0 0 7 8 0\n      //     0 0 9 0 6 0\n\n      //     1 4 2\n      //     1 2 3\n      // B = 5 4 0\n      //     3 1 9\n      //     1 2 2\n      //     0 3 0\n\n      //     1 1 5\n      // C = 1 2 1\n      //     1 3 1\n      //     6 2 4\n\n      rocsparse_int m   = 4;\n      rocsparse_int k   = 6;\n      rocsparse_int n   = 3;\n\n      csr_row_ptr[m + 1] = {0, 1, 3};                                              // device memory\n      csr_col_ind[nnz]   = {0, 0, 1};                                              // device memory\n      csr_val[nnz]       = {1, 0, 4, 2, 0, 3, 5, 0, 0, 0, 0, 9, 7, 0, 8, 6, 0, 0}; // device memory\n\n      B[k * n]       = {1, 1, 5, 3, 1, 0, 4, 2, 4, 1, 2, 3, 2, 3, 0, 9, 2, 0};     // device memory\n      C[m * n]       = {1, 1, 1, 6, 1, 2, 3, 2, 5, 1, 1, 4};                       // device memory\n\n      rocsparse_int nnz = csr_row_ptr[m] - csr_row_ptr[0];\n\n      float alpha = 1.0f;\n      float beta  = 0.0f;\n\n      // Create CSR arrays on device\n      rocsparse_int* csr_row_ptr;\n      rocsparse_int* csr_col_ind;\n      float* csr_val;\n      float* B;\n      float* C;\n      hipMalloc((void**)&csr_row_ptr, sizeof(rocsparse_int) * (m + 1));\n      hipMalloc((void**)&csr_col_ind, sizeof(rocsparse_int) * nnz);\n      hipMalloc((void**)&csr_val, sizeof(float) * nnz);\n      hipMalloc((void**)&B, sizeof(float) * k * n);\n      hipMalloc((void**)&C, sizeof(float) * m * n);\n\n      // Create rocsparse handle\n      rocsparse_local_handle handle;\n\n      // Types\n      rocsparse_indextype itype = rocsparse_indextype_i32;\n      rocsparse_indextype jtype = rocsparse_indextype_i32;\n      rocsparse_datatype  ttype = rocsparse_datatype_f32_r;\n\n      // Create descriptors\n      rocsparse_spmat_descr mat_A;\n      rocsparse_dnmat_descr mat_B;\n      rocsparse_dnmat_descr mat_C;\n\n      rocsparse_create_csr_descr(&mat_A, m, k, nnz, csr_row_ptr, csr_col_ind, csr_val, itype, jtype, rocsparse_index_base_zero, ttype);\n      rocsparse_create_dnmat_descr(&mat_B, k, n, k, B, ttype, rocsparse_order_column);\n      rocsparse_create_dnmat_descr(&mat_C, m, n, m, C, ttype, rocsparse_order_column);\n\n      // Query SpMM buffer\n      size_t buffer_size;\n      rocsparse_spmm(handle,\n                     rocsparse_operation_none,\n                     rocsparse_operation_none,\n                     &alpha,\n                     mat_A,\n                     mat_B,\n                     &beta,\n                     mat_C,\n                     ttype,\n                     rocsparse_spmm_alg_default,\n                     rocsparse_spmm_stage_buffer_size,\n                     &buffer_size,\n                     nullptr));\n\n      // Allocate buffer\n      void* buffer;\n      hipMalloc(&buffer, buffer_size);\n\n      rocsparse_spmm(handle,\n                     rocsparse_operation_none,\n                     rocsparse_operation_none,\n                     &alpha,\n                     mat_A,\n                     mat_B,\n                     &beta,\n                     mat_C,\n                     ttype,\n                     rocsparse_spmm_alg_default,\n                     rocsparse_spmm_stage_preprocess,\n                     &buffer_size,\n                     buffer));\n\n      // Pointer mode host\n      rocsparse_spmm(handle,\n                     rocsparse_operation_none,\n                     rocsparse_operation_none,\n                     &alpha,\n                     mat_A,\n                     mat_B,\n                     &beta,\n                     mat_C,\n                     ttype,\n                     rocsparse_spmm_alg_default,\n                     rocsparse_spmm_stage_compute,\n                     &buffer_size,\n                     buffer));\n\n      // Clear up on device\n      hipFree(csr_row_ptr);\n      hipFree(csr_col_ind);\n      hipFree(csr_val);\n      hipFree(B);\n      hipFree(C);\n      hipFree(temp_buffer);\n\n      rocsparse_destroy_spmat_descr(mat_A);\n      rocsparse_destroy_dnmat_descr(mat_B);\n      rocsparse_destroy_dnmat_descr(mat_C);\n  \\endcode\n\n  \\par Example\n  SpMM also supports batched computation for CSR and COO matrices. There are three supported batch modes:\n      C_i = A * B_i\n      C_i = A_i * B\n      C_i = A_i * B_i\n  The batch mode is determined by the batch count and stride passed for each matrix. For example\n  to use the first batch mode (C_i = A * B_i) with 100 batches for non-transposed A, B, and C, one passes:\n      batch_count_A = 1\n      batch_count_B = 100\n      batch_count_C = 100\n      offsets_batch_stride_A        = 0\n      columns_values_batch_stride_A = 0\n      batch_stride_B                = k * n\n      batch_stride_C                = m * n\n  To use the second batch mode (C_i = A_i * B) one could use:\n      batch_count_A = 100\n      batch_count_B = 1\n      batch_count_C = 100\n      offsets_batch_stride_A        = m + 1\n      columns_values_batch_stride_A = nnz\n      batch_stride_B                = 0\n      batch_stride_C                = m * n\n  And to use the third batch mode (C_i = A_i * B_i) one could use:\n      batch_count_A = 100\n      batch_count_B = 100\n      batch_count_C = 100\n      offsets_batch_stride_A        = m + 1\n      columns_values_batch_stride_A = nnz\n      batch_stride_B                = k * n\n      batch_stride_C                = m * n\n  An example of the first batch mode (C_i = A * B_i) is provided below.\n  \\code{.c}\n      //     1 4 0 0 0 0\n      // A = 0 2 3 0 0 0\n      //     5 0 0 7 8 0\n      //     0 0 9 0 6 0\n\n      rocsparse_int m   = 4;\n      rocsparse_int k   = 6;\n      rocsparse_int n   = 3;\n\n      csr_row_ptr[m + 1] = {0, 1, 3};                                              // device memory\n      csr_col_ind[nnz]   = {0, 0, 1};                                              // device memory\n      csr_val[nnz]       = {1, 0, 4, 2, 0, 3, 5, 0, 0, 0, 0, 9, 7, 0, 8, 6, 0, 0}; // device memory\n\n      B[batch_count_B * k * n]       = {...};     // device memory\n      C[batch_count_C * m * n]       = {...};     // device memory\n\n      rocsparse_int nnz = csr_row_ptr[m] - csr_row_ptr[0];\n\n      rocsparse_int batch_count_A = 1;\n      rocsparse_int batch_count_B = 100;\n      rocsparse_int batch_count_C = 100;\n\n      rocsparse_int offsets_batch_stride_A        = 0;\n      rocsparse_int columns_values_batch_stride_A = 0;\n      rocsparse_int batch_stride_B                = k * n;\n      rocsparse_int batch_stride_C                = m * n;\n\n      float alpha = 1.0f;\n      float beta  = 0.0f;\n\n      // Create CSR arrays on device\n      rocsparse_int* csr_row_ptr;\n      rocsparse_int* csr_col_ind;\n      float* csr_val;\n      float* B;\n      float* C;\n      hipMalloc((void**)&csr_row_ptr, sizeof(rocsparse_int) * (m + 1));\n      hipMalloc((void**)&csr_col_ind, sizeof(rocsparse_int) * nnz);\n      hipMalloc((void**)&csr_val, sizeof(float) * nnz);\n      hipMalloc((void**)&B, sizeof(float) * batch_count_B * k * n);\n      hipMalloc((void**)&C, sizeof(float) * batch_count_C * m * n);\n\n      // Create rocsparse handle\n      rocsparse_local_handle handle;\n\n      // Types\n      rocsparse_indextype itype = rocsparse_indextype_i32;\n      rocsparse_indextype jtype = rocsparse_indextype_i32;\n      rocsparse_datatype  ttype = rocsparse_datatype_f32_r;\n\n      // Create descriptors\n      rocsparse_spmat_descr mat_A;\n      rocsparse_dnmat_descr mat_B;\n      rocsparse_dnmat_descr mat_C;\n\n      rocsparse_create_csr_descr(&mat_A, m, k, nnz, csr_row_ptr, csr_col_ind, csr_val, itype, jtype, rocsparse_index_base_zero, ttype);\n      rocsparse_create_dnmat_descr(&mat_B, k, n, k, B, ttype, rocsparse_order_column);\n      rocsparse_create_dnmat_descr(&mat_C, m, n, m, C, ttype, rocsparse_order_column);\n\n      rocsparse_csr_set_strided_batch(mat_A, batch_count_A, offsets_batch_stride_A, columns_values_batch_stride_A);\n      rocsparse_dnmat_set_strided_batch(B, batch_count_B, batch_stride_B);\n      rocsparse_dnmat_set_strided_batch(C, batch_count_C, batch_stride_C);\n\n      // Query SpMM buffer\n      size_t buffer_size;\n      rocsparse_spmm(handle,\n                     rocsparse_operation_none,\n                     rocsparse_operation_none,\n                     &alpha,\n                     mat_A,\n                     mat_B,\n                     &beta,\n                     mat_C,\n                     ttype,\n                     rocsparse_spmm_alg_default,\n                     rocsparse_spmm_stage_buffer_size,\n                     &buffer_size,\n                     nullptr));\n\n      // Allocate buffer\n      void* buffer;\n      hipMalloc(&buffer, buffer_size);\n\n      rocsparse_spmm(handle,\n                     rocsparse_operation_none,\n                     rocsparse_operation_none,\n                     &alpha,\n                     mat_A,\n                     mat_B,\n                     &beta,\n                     mat_C,\n                     ttype,\n                     rocsparse_spmm_alg_default,\n                     rocsparse_spmm_stage_preprocess,\n                     &buffer_size,\n                     buffer));\n\n      // Pointer mode host\n      rocsparse_spmm(handle,\n                     rocsparse_operation_none,\n                     rocsparse_operation_none,\n                     &alpha,\n                     mat_A,\n                     mat_B,\n                     &beta,\n                     mat_C,\n                     ttype,\n                     rocsparse_spmm_alg_default,\n                     rocsparse_spmm_stage_compute,\n                     &buffer_size,\n                     buffer));\n\n      // Clear up on device\n      hipFree(csr_row_ptr);\n      hipFree(csr_col_ind);\n      hipFree(csr_val);\n      hipFree(B);\n      hipFree(C);\n      hipFree(temp_buffer);\n\n      rocsparse_destroy_spmat_descr(mat_A);\n      rocsparse_destroy_dnmat_descr(mat_B);\n      rocsparse_destroy_dnmat_descr(mat_C);\n  \\endcode\n/\n/**@{"]
+    pub fn rocsparse_spmm(
+        handle: rocsparse_handle,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        alpha: *const ::std::os::raw::c_void,
+        mat_A: rocsparse_const_spmat_descr,
+        mat_B: rocsparse_const_dnmat_descr,
+        beta: *const ::std::os::raw::c_void,
+        mat_C: rocsparse_dnmat_descr,
+        compute_type: rocsparse_datatype,
+        alg: rocsparse_spmm_alg,
+        stage: rocsparse_spmm_stage,
+        buffer_size: *mut usize,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup generic_module\n  \\brief Sparse matrix vector multiplication\n\n  \\details\n  \\ref rocsparse_spmv multiplies the scalar \\f$\\alpha\\f$ with a sparse \\f$m \\times n\\f$\n  matrix and the dense vector \\f$x\\f$ and adds the result to the dense vector \\f$y\\f$\n  that is multiplied by the scalar \\f$\\beta\\f$, such that\n  \\f[\n    y := \\alpha \\cdot op(A) \\cdot x + \\beta \\cdot y,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\details\n  \\ref rocsparse_spmv supports multiple different algorithms. These algorithms have different trade offs depending on the sparsity\n  pattern of the matrix, whether or not the results need to be deterministic, and how many times the sparse-vector product will\n  be performed.\n\n  <table>\n  <caption id=\"spmv_csr_algorithms\">CSR Algorithms</caption>\n  <tr><th>Algorithm                            <th>Deterministic  <th>Preprocessing  <th>Notes\n  <tr><td>rocsparse_spmv_alg_csr_stream</td>   <td>Yes</td>       <td>No</td>        <td>Is best suited for matrices with all rows having a similar number of non-zeros. Can out perform adaptive and LRB algirthms in certain sparsity patterns. Will perform very poorly if some rows have few non-zeros and some rows have many non-zeros.</td>\n  <tr><td>rocsparse_spmv_alg_csr_adaptive</td> <td>No</td>        <td>Yes</td>       <td>Generally the fastest algorithm across all matrix sparsity patterns. This includes matrices that have some rows with many non-zeros and some rows with few non-zeros. Requires a lengthy preprocessing that needs to be amortized over many subsequent sparse vector products.</td>\n  <tr><td>rocsparse_spmv_alg_csr_lrb</td>      <td>No</td>        <td>Yes</td>       <td>Like adaptive algorithm, generally performs well accross all matrix sparsity patterns. Generally not as fast as adaptive algorithm, however uses a much faster pre-processing step. Good for when only a few number of sparse vector products will be performed.</td>\n  </table>\n\n  <table>\n  <caption id=\"spmv_coo_algorithms\">COO Algorithms</caption>\n  <tr><th>Algorithm                          <th>Deterministic   <th>Preprocessing <th>Notes\n  <tr><td>rocsparse_spmv_alg_coo</td>        <td>Yes</td>        <td>Yes</td>      <td>Generally not as fast as atomic algorithm but is deterministic</td>\n  <tr><td>rocsparse_spmv_alg_coo_atomic</td> <td>No</td>         <td>No</td>       <td>Generally the fastest COO algorithm</td>\n  </table>\n\n  <table>\n  <caption id=\"spmv_ell_algorithms\">ELL Algorithms</caption>\n  <tr><th>Algorithm                     <th>Deterministic   <th>Preprocessing <th>Notes\n  <tr><td>rocsparse_spmv_alg_ell</td>   <td>Yes</td>        <td>No</td>       <td></td>\n  </table>\n\n  <table>\n  <caption id=\"spmv_bsr_algorithms\">BSR Algorithms</caption>\n  <tr><th>Algorithm                     <th>Deterministic   <th>Preprocessing <th>Notes\n  <tr><td>rocsparse_spmv_alg_bsr</td>   <td>Yes</td>        <td>No</td>       <td></td>\n  </table>\n\n  \\note\n  None of the algorithms above are deterministic when A is transposed.\n\n  \\details\n  \\ref rocsparse_spmv supports multiple combinations of data types and compute types. The tables below indicate the currently\n  supported different data types that can be used for for the sparse matrix A and the dense vectors X and Y and the compute\n  type for \\f$\\alpha\\f$ and \\f$\\beta\\f$. The advantage of using different data types is to save on memory bandwidth and storage\n  when a user application allows while performing the actual computation in a higher precision.\n\n  \\par Uniform Precisions:\n  <table>\n  <caption id=\"spmv_uniform\">Uniform Precisions</caption>\n  <tr><th>A / X / Y / compute_type\n  <tr><td>rocsparse_datatype_f32_r\n  <tr><td>rocsparse_datatype_f64_r\n  <tr><td>rocsparse_datatype_f32_c\n  <tr><td>rocsparse_datatype_f64_c\n  </table>\n\n  \\par Mixed precisions:\n  <table>\n  <caption id=\"spmv_mixed\">Mixed Precisions</caption>\n  <tr><th>A / X                   <th>Y                        <th>compute_type\n  <tr><td>rocsparse_datatype_i8_r <td>rocsparse_datatype_i32_r <td>rocsparse_datatype_i32_r\n  <tr><td>rocsparse_datatype_i8_r <td>rocsparse_datatype_f32_r <td>rocsparse_datatype_f32_r\n  </table>\n\n  \\par Mixed-regular real precisions\n  <table>\n  <caption id=\"spmv_mixed_regular_real\">Mixed-regular real precisions</caption>\n  <tr><th>A                        <th>X / Y / compute_type\n  <tr><td>rocsparse_datatype_f32_r <td>rocsparse_datatype_f64_r\n  <tr><td>rocsparse_datatype_f32_c <td>rocsparse_datatype_f64_c\n  </table>\n\n  \\par Mixed-regular Complex precisions\n  <table>\n  <caption id=\"spmv_mixed_regular_complex\">Mixed-regular Complex precisions</caption>\n  <tr><th>A                        <th>X / Y / compute_type\n  <tr><td>rocsparse_datatype_f32_r <td>rocsparse_datatype_f32_c\n  <tr><td>rocsparse_datatype_f64_r <td>rocsparse_datatype_f64_c\n  </table>\n\n  \\note\n  This function writes the required allocation size (in bytes) to \\p buffer_size and\n  returns without performing the SpMV operation, when a nullptr is passed for\n  \\p temp_buffer.\n\n  \\note\n  Only the \\ref rocsparse_spmv_stage_buffer_size stage and the \\ref rocsparse_spmv_stage_compute stage are non blocking\n  and executed asynchronously with respect to the host. They may return before the actual computation has finished.\n  The \\ref rocsparse_spmv_stage_preprocess stage is blocking with respect to the host.\n\n  \\note\n  Only the \\ref rocsparse_spmv_stage_buffer_size stage and the \\ref rocsparse_spmv_stage_compute stage\n  support execution in a hipGraph context. The \\ref rocsparse_spmv_stage_preprocess stage does not support hipGraph.\n\n  \\note\n  The sparse matrix formats currently supported are: rocsparse_format_bsr, rocsparse_format_coo,\n  rocsparse_format_coo_aos, rocsparse_format_csr, rocsparse_format_csc and rocsparse_format_ell.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  trans        matrix operation type.\n  @param[in]\n  alpha        scalar \\f$\\alpha\\f$.\n  @param[in]\n  mat          matrix descriptor.\n  @param[in]\n  x            vector descriptor.\n  @param[in]\n  beta         scalar \\f$\\beta\\f$.\n  @param[inout]\n  y            vector descriptor.\n  @param[in]\n  compute_type floating point precision for the SpMV computation.\n  @param[in]\n  alg          SpMV algorithm for the SpMV computation.\n  @param[in]\n  stage        SpMV stage for the SpMV computation.\n  @param[out]\n  buffer_size  number of bytes of the temporary storage buffer. buffer_size is set when\n               \\p temp_buffer is nullptr.\n  @param[in]\n  temp_buffer  temporary storage buffer allocated by the user. When a nullptr is passed,\n               the required allocation size (in bytes) is written to \\p buffer_size and\n               function returns without performing the SpMV operation.\n\n  \\retval      rocsparse_status_success the operation completed successfully.\n  \\retval      rocsparse_status_invalid_handle the library context \\p handle was not initialized.\n  \\retval      rocsparse_status_invalid_pointer \\p alpha, \\p mat, \\p x, \\p beta, \\p y or\n               \\p buffer_size pointer is invalid.\n  \\retval      rocsparse_status_invalid_value the value of \\p trans, \\p compute_type, \\p alg, or \\p stage is incorrect.\n  \\retval      rocsparse_status_not_implemented \\p compute_type or \\p alg is\n               currently not supported.\n\n  \\par Example\n  \\code{.c}\n   //     1 4 0 0 0 0\n   // A = 0 2 3 0 0 0\n   //     5 0 0 7 8 0\n   //     0 0 9 0 6 0\n   rocsparse_int m   = 4;\n   rocsparse_int n   = 6;\n\n   std::vector<int> hcsr_row_ptr = {0, 2, 4, 7, 9};\n   std::vector<int> hcsr_col_ind = {0, 1, 1, 2, 0, 3, 4, 2, 4};\n   std::vector<float> hcsr_val   = {1, 4, 2, 3, 5, 7, 8, 9, 6};\n   std::vector<float> hx(n, 1.0f);\n   std::vector<float> hy(m, 0.0f);\n\n   // Scalar alpha\n   float alpha = 3.7f;\n\n   // Scalar beta\n   float beta = 0.0f;\n\n   rocsparse_int nnz = hcsr_row_ptr[m] - hcsr_row_ptr[0];\n\n   // Offload data to device\n   int* dcsr_row_ptr;\n   int* dcsr_col_ind;\n   float* dcsr_val;\n   float* dx;\n   float* dy;\n   hipMalloc((void**)&dcsr_row_ptr, sizeof(int) * (m + 1));\n   hipMalloc((void**)&dcsr_col_ind, sizeof(int) * nnz);\n   hipMalloc((void**)&dcsr_val, sizeof(float) * nnz);\n   hipMalloc((void**)&dx, sizeof(float) * n);\n   hipMalloc((void**)&dy, sizeof(float) * m);\n\n   hipMemcpy(dcsr_row_ptr, hcsr_row_ptr.data(), sizeof(int) * (m + 1), hipMemcpyHostToDevice);\n   hipMemcpy(dcsr_col_ind, hcsr_col_ind.data(), sizeof(int) * nnz, hipMemcpyHostToDevice);\n   hipMemcpy(dcsr_val, hcsr_val.data(), sizeof(float) * nnz, hipMemcpyHostToDevice);\n   hipMemcpy(dx, hx.data(), sizeof(float) * n, hipMemcpyHostToDevice);\n\n   rocsparse_handle     handle;\n   rocsparse_spmat_descr matA;\n   rocsparse_dnvec_descr vecX;\n   rocsparse_dnvec_descr vecY;\n\n   rocsparse_indextype row_idx_type = rocsparse_indextype_i32;\n   rocsparse_indextype col_idx_type = rocsparse_indextype_i32;\n   rocsparse_datatype  data_type = rocsparse_datatype_f32_r;\n   rocsparse_datatype  compute_type = rocsparse_datatype_f32_r;\n   rocsparse_index_base idx_base = rocsparse_index_base_zero;\n   rocsparse_operation trans = rocsparse_operation_none;\n\n   rocsparse_create_handle(&handle);\n\n   // Create sparse matrix A\n   rocsparse_create_csr_descr(&matA,\n                              m,\n                              n,\n                              nnz,\n                              dcsr_row_ptr,\n                              dcsr_col_ind,\n                              dcsr_val,\n                              row_idx_type,\n                              col_idx_type,\n                              idx_base,\n                              data_type);\n\n   // Create dense vector X\n   rocsparse_create_dnvec_descr(&vecX,\n                                n,\n                                dx,\n                                data_type);\n\n   // Create dense vector Y\n   rocsparse_create_dnvec_descr(&vecY,\n                                m,\n                                dy,\n                                data_type);\n\n   // Call spmv to get buffer size\n   size_t buffer_size;\n   rocsparse_spmv(handle,\n                  trans,\n                  &alpha,\n                  matA,\n                  vecX,\n                  &beta,\n                  vecY,\n                  compute_type,\n                  rocsparse_spmv_alg_csr_adaptive,\n                  rocsparse_spmv_stage_buffer_size,\n                  &buffer_size,\n                  nullptr);\n\n   void* temp_buffer;\n   hipMalloc((void**)&temp_buffer, buffer_size);\n\n   // Call spmv to perform analysis\n   rocsparse_spmv(handle,\n                  trans,\n                  &alpha,\n                  matA,\n                  vecX,\n                  &beta,\n                  vecY,\n                  compute_type,\n                  rocsparse_spmv_alg_csr_adaptive,\n                  rocsparse_spmv_stage_preprocess,\n                  &buffer_size,\n                  temp_buffer);\n\n   // Call spmv to perform computation\n   rocsparse_spmv(handle,\n                  trans,\n                  &alpha,\n                  matA,\n                  vecX,\n                  &beta,\n                  vecY,\n                  compute_type,\n                  rocsparse_spmv_alg_csr_adaptive,\n                  rocsparse_spmv_stage_compute,\n                  &buffer_size,\n                  temp_buffer);\n\n   // Copy result back to host\n   hipMemcpy(hy.data(), dy, sizeof(float) * m, hipMemcpyDeviceToHost);\n\n   std::cout << \"hy\" << std::endl;\n   for(size_t i = 0; i < hy.size(); ++i)\n   {\n       std::cout << hy[i] << \" \";\n   }\n   std::cout << std::endl;\n\n   // Clear rocSPARSE\n   rocsparse_destroy_spmat_descr(matA);\n   rocsparse_destroy_dnvec_descr(vecX);\n   rocsparse_destroy_dnvec_descr(vecY);\n   rocsparse_destroy_handle(handle);\n\n   // Clear device memory\n   hipFree(dcsr_row_ptr);\n   hipFree(dcsr_col_ind);\n   hipFree(dcsr_val);\n   hipFree(dx);\n   hipFree(dy);\n   hipFree(temp_buffer);\n  \\endcode"]
+    pub fn rocsparse_spmv(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        alpha: *const ::std::os::raw::c_void,
+        mat: rocsparse_const_spmat_descr,
+        x: rocsparse_const_dnvec_descr,
+        beta: *const ::std::os::raw::c_void,
+        y: rocsparse_dnvec_descr,
+        compute_type: rocsparse_datatype,
+        alg: rocsparse_spmv_alg,
+        stage: rocsparse_spmv_stage,
+        buffer_size: *mut usize,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup generic_module\n  \\brief Sparse matrix vector multiplication\n\n  \\details\n  \\ref rocsparse_spmv_ex multiplies the scalar \\f$\\alpha\\f$ with a sparse \\f$m \\times n\\f$\n  matrix and the dense vector \\f$x\\f$ and adds the result to the dense vector \\f$y\\f$\n  that is multiplied by the scalar \\f$\\beta\\f$, such that\n  \\f[\n    y := \\alpha \\cdot op(A) \\cdot x + \\beta \\cdot y,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\details\n  \\ref rocsparse_spmv supports multiple different algorithms. These algorithms have different trade offs depending on the sparsity\n  pattern of the matrix, whether or not the results need to be deterministic, and how many times the sparse-vector product will\n  be performed.\n\n  <table>\n  <caption id=\"spmv_ex_csr_algorithms\">CSR Algorithms</caption>\n  <tr><th>Algorithm                            <th>Deterministic  <th>Preprocessing  <th>Notes\n  <tr><td>rocsparse_spmv_alg_csr_stream</td>   <td>Yes</td>       <td>No</td>        <td>Is best suited for matrices with all rows having a similar number of non-zeros. Can out perform adaptive and LRB algirthms in certain sparsity patterns. Will perform very poorly if some rows have few non-zeros and some rows have many non-zeros.</td>\n  <tr><td>rocsparse_spmv_alg_csr_adaptive</td> <td>No</td>        <td>Yes</td>       <td>Generally the fastest algorithm across all matrix sparsity patterns. This includes matrices that have some rows with many non-zeros and some rows with few non-zeros. Requires a lengthy preprocessing that needs to be amortized over many subsequent sparse vector products.</td>\n  <tr><td>rocsparse_spmv_alg_csr_lrb</td>      <td>No</td>        <td>Yes</td>       <td>Like adaptive algorithm, generally performs well accross all matrix sparsity patterns. Generally not as fast as adaptive algorithm, however uses a much faster pre-processing step. Good for when only a few number of sparse vector products will be performed.</td>\n  </table>\n\n  <table>\n  <caption id=\"spmv_ex_coo_algorithms\">COO Algorithms</caption>\n  <tr><th>Algorithm                          <th>Deterministic   <th>Preprocessing <th>Notes\n  <tr><td>rocsparse_spmv_alg_coo</td>        <td>Yes</td>        <td>Yes</td>      <td>Generally not as fast as atomic algorithm but is deterministic</td>\n  <tr><td>rocsparse_spmv_alg_coo_atomic</td> <td>No</td>         <td>No</td>       <td>Generally the fastest COO algorithm</td>\n  </table>\n\n  <table>\n  <caption id=\"spmv_ex_ell_algorithms\">ELL Algorithms</caption>\n  <tr><th>Algorithm                     <th>Deterministic   <th>Preprocessing <th>Notes\n  <tr><td>rocsparse_spmv_alg_ell</td>   <td>Yes</td>        <td>No</td>       <td></td>\n  </table>\n\n  <table>\n  <caption id=\"spmv_ex_bsr_algorithms\">BSR Algorithms</caption>\n  <tr><th>Algorithm                     <th>Deterministic   <th>Preprocessing <th>Notes\n  <tr><td>rocsparse_spmv_alg_bsr</td>   <td>Yes</td>        <td>No</td>       <td></td>\n  </table>\n\n  \\note\n  None of the algorithms above are deterministic when A is transposed.\n\n  \\details\n  \\ref rocsparse_spmv_ex supports multiple combinations of data types and compute types. The tables below indicate the currently\n  supported different data types that can be used for for the sparse matrix A and the dense vectors X and Y and the compute\n  type for \\f$\\alpha\\f$ and \\f$\\beta\\f$. The advantage of using different data types is to save on memory bandwidth and storage\n  when a user application allows while performing the actual computation in a higher precision.\n\n  \\par Uniform Precisions:\n  <table>\n  <caption id=\"spmv_uniform_ex\">Uniform Precisions</caption>\n  <tr><th>A / X / Y / compute_type\n  <tr><td>rocsparse_datatype_f32_r\n  <tr><td>rocsparse_datatype_f64_r\n  <tr><td>rocsparse_datatype_f32_c\n  <tr><td>rocsparse_datatype_f64_c\n  </table>\n\n  \\par Mixed precisions:\n  <table>\n  <caption id=\"spmv_mixed_ex\">Mixed Precisions</caption>\n  <tr><th>A / X                   <th>Y                        <th>compute_type\n  <tr><td>rocsparse_datatype_i8_r <td>rocsparse_datatype_i32_r <td>rocsparse_datatype_i32_r\n  <tr><td>rocsparse_datatype_i8_r <td>rocsparse_datatype_f32_r <td>rocsparse_datatype_f32_r\n  </table>\n\n  \\par Mixed-regular real precisions\n  <table>\n  <caption id=\"spmv_mixed_regular_real_ex\">Mixed-regular real precisions</caption>\n  <tr><th>A                        <th>X / Y / compute_type\n  <tr><td>rocsparse_datatype_f32_r <td>rocsparse_datatype_f64_r\n  <tr><td>rocsparse_datatype_f32_c <td>rocsparse_datatype_f64_c\n  </table>\n\n  \\par Mixed-regular Complex precisions\n  <table>\n  <caption id=\"spmv_mixed_regular_complex_ex\">Mixed-regular Complex precisions</caption>\n  <tr><th>A                        <th>X / Y / compute_type\n  <tr><td>rocsparse_datatype_f32_r <td>rocsparse_datatype_f32_c\n  <tr><td>rocsparse_datatype_f64_r <td>rocsparse_datatype_f64_c\n  </table>\n\n  \\note\n  This function writes the required allocation size (in bytes) to \\p buffer_size and\n  returns without performing the SpMV operation, when a nullptr is passed for\n  \\p temp_buffer.\n\n  \\note\n  The sparse matrix formats currently supported are: rocsparse_format_bsr, rocsparse_format_coo,\n  rocsparse_format_coo_aos, rocsparse_format_csr, rocsparse_format_csc and rocsparse_format_ell.\n\n  \\note SpMV_ex requires three stages to complete. The first stage\n  \\ref rocsparse_spmv_stage_buffer_size will return the size of the temporary storage buffer\n  that is required for subsequent calls to \\ref rocsparse_spmv_ex. The second stage\n  \\ref rocsparse_spmv_stage_preprocess will preprocess data that would be saved in the temporary storage buffer.\n  In the final stage \\ref rocsparse_spmv_stage_compute, the actual computation is performed.\n\n  \\note\n  Only the \\ref rocsparse_spmv_stage_buffer_size stage and the \\ref rocsparse_spmv_stage_compute stage are non blocking\n  and executed asynchronously with respect to the host. They may return before the actual computation has finished.\n  The \\ref rocsparse_spmv_stage_preprocess stage is blocking with respect to the host.\n\n  \\note\n  Only the \\ref rocsparse_spmv_stage_buffer_size stage and the \\ref rocsparse_spmv_stage_compute stage\n  support execution in a hipGraph context. The \\ref rocsparse_spmv_stage_preprocess stage does not support hipGraph.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  trans        matrix operation type.\n  @param[in]\n  alpha        scalar \\f$\\alpha\\f$.\n  @param[in]\n  mat          matrix descriptor.\n  @param[in]\n  x            vector descriptor.\n  @param[in]\n  beta         scalar \\f$\\beta\\f$.\n  @param[inout]\n  y            vector descriptor.\n  @param[in]\n  compute_type floating point precision for the SpMV computation.\n  @param[in]\n  alg          SpMV algorithm for the SpMV computation.\n  @param[in]\n  stage        SpMV stage for the SpMV computation.\n  @param[out]\n  buffer_size  number of bytes of the temporary storage buffer. buffer_size is set when\n               \\p temp_buffer is nullptr.\n  @param[in]\n  temp_buffer  temporary storage buffer allocated by the user. When a nullptr is passed,\n               the required allocation size (in bytes) is written to \\p buffer_size and\n               function returns without performing the SpMV operation.\n\n  \\retval      rocsparse_status_success the operation completed successfully.\n  \\retval      rocsparse_status_invalid_handle the library context \\p handle was not initialized.\n  \\retval      rocsparse_status_invalid_pointer \\p alpha, \\p mat, \\p x, \\p beta, \\p y or\n               \\p buffer_size pointer is invalid.\n  \\retval      rocsparse_status_invalid_value the value of \\p trans, \\p compute_type, \\p alg or \\p stage is incorrect.\n  \\retval      rocsparse_status_not_implemented \\p compute_type or \\p alg is\n               currently not supported."]
+    pub fn rocsparse_spmv_ex(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        alpha: *const ::std::os::raw::c_void,
+        mat: rocsparse_spmat_descr,
+        x: rocsparse_dnvec_descr,
+        beta: *const ::std::os::raw::c_void,
+        y: rocsparse_dnvec_descr,
+        compute_type: rocsparse_datatype,
+        alg: rocsparse_spmv_alg,
+        stage: rocsparse_spmv_stage,
+        buffer_size: *mut usize,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup generic_module\n  \\brief Sparse triangular system solve\n\n  \\details\n  \\p rocsparse_spsm_solve solves a sparse triangular linear system of a sparse\n  \\f$m \\times m\\f$ matrix, defined in CSR or COO storage format, a dense solution matrix\n  \\f$C\\f$ and the right-hand side \\f$B\\f$ that is multiplied by \\f$\\alpha\\f$, such that\n  \\f[\n    op(A) \\cdot C = \\alpha \\cdot op(B),\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n  and\n  \\f[\n    op(B) = \\left\\{\n    \\begin{array}{ll}\n        B,   & \\text{if trans_B == rocsparse_operation_none} \\\\\n        B^T, & \\text{if trans_B == rocsparse_operation_transpose} \\\\\n        B^H, & \\text{if trans_B == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\note SpSM requires three stages to complete. The first stage\n  \\ref rocsparse_spsm_stage_buffer_size will return the size of the temporary storage buffer\n  that is required for subsequent calls. The second stage\n  \\ref rocsparse_spsm_stage_preprocess will preprocess data that would be saved in the temporary storage buffer.\n  In the final stage \\ref rocsparse_spsm_stage_compute, the actual computation is performed.\n\n  \\note\n  Only the \\ref rocsparse_spsm_stage_buffer_size stage and the \\ref rocsparse_spsm_stage_compute stage are non blocking\n  and executed asynchronously with respect to the host. They may return before the actual computation has finished.\n  The \\ref rocsparse_spsm_stage_preprocess stage is blocking with respect to the host.\n\n  \\note\n  Currently, only \\p trans_A == \\ref rocsparse_operation_none and \\p trans_A == \\ref rocsparse_operation_transpose is supported.\n  Currently, only \\p trans_B == \\ref rocsparse_operation_none and \\p trans_B == \\ref rocsparse_operation_transpose is supported.\n\n  \\note\n  Only the \\ref rocsparse_spsm_stage_buffer_size stage and the \\ref rocsparse_spsm_stage_compute stage\n  support execution in a hipGraph context. The \\ref rocsparse_spsm_stage_preprocess stage does not support hipGraph.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  trans_A      matrix operation type for the sparse matrix A.\n  @param[in]\n  trans_B      matrix operation type for the dense matrix B.\n  @param[in]\n  alpha        scalar \\f$\\alpha\\f$.\n  @param[in]\n  matA          sparse matrix descriptor.\n  @param[in]\n  matB          dense matrix descriptor.\n  @param[inout]\n  matC          dense matrix descriptor.\n  @param[in]\n  compute_type floating point precision for the SpSM computation.\n  @param[in]\n  alg          SpSM algorithm for the SpSM computation.\n  @param[in]\n  stage        SpSM stage for the SpSM computation.\n  @param[out]\n  buffer_size  number of bytes of the temporary storage buffer.\n  @param[in]\n  temp_buffer  temporary storage buffer allocated by the user. When a nullptr is passed,\n               the required allocation size (in bytes) is written to \\p buffer_size and\n               function returns without performing the SpSM operation.\n\n  \\retval      rocsparse_status_success the operation completed successfully.\n  \\retval      rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval      rocsparse_status_invalid_pointer \\p alpha, \\p matA, \\p matB, \\p matC, \\p descr or\n               \\p buffer_size pointer is invalid.\n  \\retval      rocsparse_status_not_implemented \\p trans_A, \\p trans_B, \\p compute_type, \\p stage or \\p alg is\n               currently not supported.\n\n  \\par Example\n  \\code{.c}\n   //     1 0 0 0\n   // A = 4 2 0 0\n   //     0 3 7 0\n   //     0 0 0 1\n   rocsparse_int m   = 4;\n   rocsparse_int n   = 2;\n\n   std::vector<int> hcsr_row_ptr = {0, 1, 3, 5, 6};\n   std::vector<int> hcsr_col_ind = {0, 0, 1, 1, 2, 3};\n   std::vector<float> hcsr_val   = {1, 4, 2, 3, 7, 1};\n   std::vector<float> hB(m * n);\n   std::vector<float> hC(m * n);\n\n   for(int i = 0; i < n; i++)\n   {\n       for(int j = 0; j < m; j++)\n       {\n           hB[m * i + j] = static_cast<float>(i + 1);\n       }\n   }\n\n   // Scalar alpha\n   float alpha = 1.0f;\n\n   rocsparse_int nnz = hcsr_row_ptr[m] - hcsr_row_ptr[0];\n\n   // Offload data to device\n   int* dcsr_row_ptr;\n   int* dcsr_col_ind;\n   float* dcsr_val;\n   float* dB;\n   float* dC;\n   hipMalloc((void**)&dcsr_row_ptr, sizeof(int) * (m + 1));\n   hipMalloc((void**)&dcsr_col_ind, sizeof(int) * nnz);\n   hipMalloc((void**)&dcsr_val, sizeof(float) * nnz);\n   hipMalloc((void**)&dB, sizeof(float) * m * n);\n   hipMalloc((void**)&dC, sizeof(float) * m * n);\n\n   hipMemcpy(dcsr_row_ptr, hcsr_row_ptr.data(), sizeof(int) * (m + 1), hipMemcpyHostToDevice);\n   hipMemcpy(dcsr_col_ind, hcsr_col_ind.data(), sizeof(int) * nnz, hipMemcpyHostToDevice);\n   hipMemcpy(dcsr_val, hcsr_val.data(), sizeof(float) * nnz, hipMemcpyHostToDevice);\n   hipMemcpy(dB, hB.data(), sizeof(float) * m * n, hipMemcpyHostToDevice);\n\n   rocsparse_handle     handle;\n   rocsparse_spmat_descr matA;\n   rocsparse_dnmat_descr matB;\n   rocsparse_dnmat_descr matC;\n\n   rocsparse_indextype row_idx_type = rocsparse_indextype_i32;\n   rocsparse_indextype col_idx_type = rocsparse_indextype_i32;\n   rocsparse_datatype  data_type = rocsparse_datatype_f32_r;\n   rocsparse_datatype  compute_type = rocsparse_datatype_f32_r;\n   rocsparse_index_base idx_base = rocsparse_index_base_zero;\n   rocsparse_operation trans_A = rocsparse_operation_none;\n   rocsparse_operation trans_B = rocsparse_operation_none;\n\n   rocsparse_create_handle(&handle);\n\n   // Create sparse matrix A\n   rocsparse_create_csr_descr(&matA,\n                              m,\n                              m,\n                              nnz,\n                              dcsr_row_ptr,\n                              dcsr_col_ind,\n                              dcsr_val,\n                              row_idx_type,\n                              col_idx_type,\n                              idx_base,\n                              data_type);\n\n   // Create dense matrix B\n   rocsparse_create_dnmat_descr(&matB,\n                                m,\n                                n,\n                                m,\n                                dB,\n                                data_type,\n                                rocsparse_order_column);\n\n   // Create dense matrix C\n   rocsparse_create_dnmat_descr(&matC,\n                                m,\n                                n,\n                                m,\n                                dC,\n                                data_type,\n                                rocsparse_order_column);\n\n   // Call spsv to get buffer size\n   size_t buffer_size;\n   rocsparse_spsm(handle,\n                  trans_A,\n                  trans_B,\n                  &alpha,\n                  matA,\n                  matB,\n                  matC,\n                  compute_type,\n                  rocsparse_spsm_alg_default,\n                  rocsparse_spsm_stage_buffer_size,\n                  &buffer_size,\n                  nullptr);\n\n   void* temp_buffer;\n   hipMalloc((void**)&temp_buffer, buffer_size);\n\n   // Call spsv to perform analysis\n   rocsparse_spsm(handle,\n                  trans_A,\n                  trans_B,\n                  &alpha,\n                  matA,\n                  matB,\n                  matC,\n                  compute_type,\n                  rocsparse_spsm_alg_default,\n                  rocsparse_spsm_stage_preprocess,\n                  &buffer_size,\n                  temp_buffer);\n\n   // Call spsv to perform computation\n   rocsparse_spsm(handle,\n                  trans_A,\n                  trans_B,\n                  &alpha,\n                  matA,\n                  matB,\n                  matC,\n                  compute_type,\n                  rocsparse_spsm_alg_default,\n                  rocsparse_spsm_stage_compute,\n                  &buffer_size,\n                  temp_buffer);\n\n   // Copy result back to host\n   hipMemcpy(hC.data(), dC, sizeof(float) * m * n, hipMemcpyDeviceToHost);\n\n   std::cout << \"hC\" << std::endl;\n   for(size_t i = 0; i < hC.size(); ++i)\n   {\n       std::cout << hC[i] << \" \";\n   }\n   std::cout << std::endl;\n\n   // Clear rocSPARSE\n   rocsparse_destroy_spmat_descr(matA);\n   rocsparse_destroy_dnmat_descr(matB);\n   rocsparse_destroy_dnmat_descr(matC);\n   rocsparse_destroy_handle(handle);\n\n   // Clear device memory\n   hipFree(dcsr_row_ptr);\n   hipFree(dcsr_col_ind);\n   hipFree(dcsr_val);\n   hipFree(dB);\n   hipFree(dC);\n   hipFree(temp_buffer);\n  \\endcode"]
+    pub fn rocsparse_spsm(
+        handle: rocsparse_handle,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        alpha: *const ::std::os::raw::c_void,
+        matA: rocsparse_const_spmat_descr,
+        matB: rocsparse_const_dnmat_descr,
+        matC: rocsparse_dnmat_descr,
+        compute_type: rocsparse_datatype,
+        alg: rocsparse_spsm_alg,
+        stage: rocsparse_spsm_stage,
+        buffer_size: *mut usize,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup generic_module\n  \\brief Sparse triangular solve\n\n  \\details\n  \\p rocsparse_spsv_solve solves a sparse triangular linear system of a sparse\n  \\f$m \\times m\\f$ matrix, defined in CSR or COO storage format, a dense solution vector\n  \\f$y\\f$ and the right-hand side \\f$x\\f$ that is multiplied by \\f$\\alpha\\f$, such that\n  \\f[\n    op(A) \\cdot y = \\alpha \\cdot x,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\note SpSV requires three stages to complete. The first stage\n  \\ref rocsparse_spsv_stage_buffer_size will return the size of the temporary storage buffer\n  that is required for subsequent calls. The second stage\n  \\ref rocsparse_spsv_stage_preprocess will preprocess data that would be saved in the temporary storage buffer.\n  In the final stage \\ref rocsparse_spsv_stage_compute, the actual computation is performed.\n\n  \\note\n  Only the \\ref rocsparse_spsv_stage_buffer_size stage and the \\ref rocsparse_spsv_stage_compute stage are non blocking\n  and executed asynchronously with respect to the host. They may return before the actual computation has finished.\n  The \\ref rocsparse_spsv_stage_preprocess stage is blocking with respect to the host.\n\n  \\note\n  Currently, only \\p trans == \\ref rocsparse_operation_none and \\p trans == \\ref rocsparse_operation_transpose is supported.\n\n  \\note\n  Only the \\ref rocsparse_spsv_stage_buffer_size stage and the \\ref rocsparse_spsv_stage_compute stage\n  support execution in a hipGraph context. The \\ref rocsparse_spsv_stage_preprocess stage does not support hipGraph.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  trans        matrix operation type.\n  @param[in]\n  alpha        scalar \\f$\\alpha\\f$.\n  @param[in]\n  mat          matrix descriptor.\n  @param[in]\n  x            vector descriptor.\n  @param[inout]\n  y            vector descriptor.\n  @param[in]\n  compute_type floating point precision for the SpSV computation.\n  @param[in]\n  alg          SpSV algorithm for the SpSV computation.\n  @param[in]\n  stage        SpSV stage for the SpSV computation.\n  @param[out]\n  buffer_size  number of bytes of the temporary storage buffer.\n  @param[in]\n  temp_buffer  temporary storage buffer allocated by the user. When a nullptr is passed,\n               the required allocation size (in bytes) is written to \\p buffer_size and\n               function returns without performing the SpSV operation.\n\n  \\retval      rocsparse_status_success the operation completed successfully.\n  \\retval      rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval      rocsparse_status_invalid_pointer \\p alpha, \\p mat, \\p x, \\p y or\n               \\p buffer_size pointer is invalid.\n  \\retval      rocsparse_status_not_implemented \\p trans, \\p compute_type, \\p stage or \\p alg is\n               currently not supported.\n\n  \\par Example\n  \\code{.c}\n   //     1 0 0 0\n   // A = 4 2 0 0\n   //     0 3 7 0\n   //     0 0 0 1\n   rocsparse_int m   = 4;\n\n   std::vector<int> hcsr_row_ptr = {0, 1, 3, 5, 6};\n   std::vector<int> hcsr_col_ind = {0, 0, 1, 1, 2, 3};\n   std::vector<float> hcsr_val   = {1, 4, 2, 3, 7, 1};\n   std::vector<float> hx(m, 1.0f);\n   std::vector<float> hy(m, 0.0f);\n\n   // Scalar alpha\n   float alpha = 1.0f;\n\n   rocsparse_int nnz = hcsr_row_ptr[m] - hcsr_row_ptr[0];\n\n   // Offload data to device\n   int* dcsr_row_ptr;\n   int* dcsr_col_ind;\n   float* dcsr_val;\n   float* dx;\n   float* dy;\n   hipMalloc((void**)&dcsr_row_ptr, sizeof(int) * (m + 1));\n   hipMalloc((void**)&dcsr_col_ind, sizeof(int) * nnz);\n   hipMalloc((void**)&dcsr_val, sizeof(float) * nnz);\n   hipMalloc((void**)&dx, sizeof(float) * m);\n   hipMalloc((void**)&dy, sizeof(float) * m);\n\n   hipMemcpy(dcsr_row_ptr, hcsr_row_ptr.data(), sizeof(int) * (m + 1), hipMemcpyHostToDevice);\n   hipMemcpy(dcsr_col_ind, hcsr_col_ind.data(), sizeof(int) * nnz, hipMemcpyHostToDevice);\n   hipMemcpy(dcsr_val, hcsr_val.data(), sizeof(float) * nnz, hipMemcpyHostToDevice);\n   hipMemcpy(dx, hx.data(), sizeof(float) * m, hipMemcpyHostToDevice);\n\n   rocsparse_handle     handle;\n   rocsparse_spmat_descr matA;\n   rocsparse_dnvec_descr vecX;\n   rocsparse_dnvec_descr vecY;\n\n   rocsparse_indextype row_idx_type = rocsparse_indextype_i32;\n   rocsparse_indextype col_idx_type = rocsparse_indextype_i32;\n   rocsparse_datatype  data_type = rocsparse_datatype_f32_r;\n   rocsparse_datatype  compute_type = rocsparse_datatype_f32_r;\n   rocsparse_index_base idx_base = rocsparse_index_base_zero;\n   rocsparse_operation trans = rocsparse_operation_none;\n\n   rocsparse_create_handle(&handle);\n\n   // Create sparse matrix A\n   rocsparse_create_csr_descr(&matA,\n                              m,\n                              m,\n                              nnz,\n                              dcsr_row_ptr,\n                              dcsr_col_ind,\n                              dcsr_val,\n                              row_idx_type,\n                              col_idx_type,\n                              idx_base,\n                              data_type);\n\n   // Create dense vector X\n   rocsparse_create_dnvec_descr(&vecX,\n                                m,\n                                dx,\n                                data_type);\n\n   // Create dense vector Y\n   rocsparse_create_dnvec_descr(&vecY,\n                                m,\n                                dy,\n                                data_type);\n\n   // Call spsv to get buffer size\n   size_t buffer_size;\n   rocsparse_spsv(handle,\n                  trans,\n                  &alpha,\n                  matA,\n                  vecX,\n                  vecY,\n                  compute_type,\n                  rocsparse_spsv_alg_default,\n                  rocsparse_spsv_stage_buffer_size,\n                  &buffer_size,\n                  nullptr);\n\n   void* temp_buffer;\n   hipMalloc((void**)&temp_buffer, buffer_size);\n\n   // Call spsv to perform analysis\n   rocsparse_spsv(handle,\n                  trans,\n                  &alpha,\n                  matA,\n                  vecX,\n                  vecY,\n                  compute_type,\n                  rocsparse_spsv_alg_default,\n                  rocsparse_spsv_stage_preprocess,\n                  &buffer_size,\n                  temp_buffer);\n\n   // Call spsv to perform computation\n   rocsparse_spsv(handle,\n                  trans,\n                  &alpha,\n                  matA,\n                  vecX,\n                  vecY,\n                  compute_type,\n                  rocsparse_spsv_alg_default,\n                  rocsparse_spsv_stage_compute,\n                  &buffer_size,\n                  temp_buffer);\n\n   // Copy result back to host\n   hipMemcpy(hy.data(), dy, sizeof(float) * m, hipMemcpyDeviceToHost);\n\n   std::cout << \"hy\" << std::endl;\n   for(size_t i = 0; i < hy.size(); ++i)\n   {\n       std::cout << hy[i] << \" \";\n   }\n   std::cout << std::endl;\n\n   // Clear rocSPARSE\n   rocsparse_destroy_spmat_descr(matA);\n   rocsparse_destroy_dnvec_descr(vecX);\n   rocsparse_destroy_dnvec_descr(vecY);\n   rocsparse_destroy_handle(handle);\n\n   // Clear device memory\n   hipFree(dcsr_row_ptr);\n   hipFree(dcsr_col_ind);\n   hipFree(dcsr_val);\n   hipFree(dx);\n   hipFree(dy);\n   hipFree(temp_buffer);\n  \\endcode"]
+    pub fn rocsparse_spsv(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        alpha: *const ::std::os::raw::c_void,
+        mat: rocsparse_const_spmat_descr,
+        x: rocsparse_const_dnvec_descr,
+        y: rocsparse_dnvec_descr,
+        compute_type: rocsparse_datatype,
+        alg: rocsparse_spsv_alg,
+        stage: rocsparse_spsv_stage,
+        buffer_size: *mut usize,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup generic_module\n  \\brief Sparse vector inner dot product\n\n  \\details\n  \\ref rocsparse_spvv computes the inner dot product of the sparse vecotr \\f$x\\f$ with the\n  dense vector \\f$y\\f$, such that\n  \\f[\n    \\text{result} := x^{'} \\cdot y,\n  \\f]\n  with\n  \\f[\n    op(x) = \\left\\{\n    \\begin{array}{ll}\n        x,   & \\text{if trans == rocsparse_operation_none} \\\\\n        \\bar{x}, & \\text{if trans == rocsparse_operation_conjugate_transpose} \\\\\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\code{.c}\n      result = 0;\n      for(i = 0; i < nnz; ++i)\n      {\n          result += x_val[i] * y[x_ind[i]];\n      }\n  \\endcode\n\n  \\par Uniform Precisions:\n  <table>\n  <caption id=\"spvv_uniform\">Uniform Precisions</caption>\n  <tr><th>X / Y / compute_type\n  <tr><td>rocsparse_datatype_f32_r\n  <tr><td>rocsparse_datatype_f64_r\n  <tr><td>rocsparse_datatype_f32_c\n  <tr><td>rocsparse_datatype_f64_c\n  </table>\n\n  \\par Mixed precisions:\n  <table>\n  <caption id=\"spvv_mixed\">Mixed Precisions</caption>\n  <tr><th>X / Y                   <th>compute_type / result\n  <tr><td>rocsparse_datatype_i8_r <td>rocsparse_datatype_i32_r\n  <tr><td>rocsparse_datatype_i8_r <td>rocsparse_datatype_f32_r\n  </table>\n\n  \\note\n  This function writes the required allocation size (in bytes) to \\p buffer_size and\n  returns without performing the SpVV operation, when a nullptr is passed for\n  \\p temp_buffer.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  trans        sparse vector operation type.\n  @param[in]\n  x            sparse vector descriptor.\n  @param[in]\n  y            dense vector descriptor.\n  @param[out]\n  result       pointer to the result, can be host or device memory\n  @param[in]\n  compute_type floating point precision for the SpVV computation.\n  @param[out]\n  buffer_size  number of bytes of the temporary storage buffer. buffer_size is set when\n               \\p temp_buffer is nullptr.\n  @param[in]\n  temp_buffer  temporary storage buffer allocated by the user. When a nullptr is passed,\n               the required allocation size (in bytes) is written to \\p buffer_size and\n               function returns without performing the SpVV operation.\n\n  \\retval      rocsparse_status_success the operation completed successfully.\n  \\retval      rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval      rocsparse_status_invalid_pointer \\p x, \\p y, \\p result or \\p buffer_size\n               pointer is invalid.\n  \\retval      rocsparse_status_not_implemented \\p compute_type is currently not\n               supported.\n\n  \\par Example\n  \\code{.c}\n   // Number of non-zeros of the sparse vector\n   int nnz = 3;\n\n   // Size of sparse and dense vector\n   int size = 9;\n\n   // Sparse index vector\n   std::vector<int> hx_ind = {0, 3, 5};\n\n   // Sparse value vector\n   std::vector<float> hx_val = {1.0f, 2.0f, 3.0f};\n\n   // Dense vector\n   std::vector<float> hy = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f};\n\n   // Offload data to device\n   int* dx_ind;\n   float* dx_val;\n   float* dy;\n   hipMalloc((void**)&dx_ind, sizeof(int) * nnz);\n   hipMalloc((void**)&dx_val, sizeof(float) * nnz);\n   hipMalloc((void**)&dy, sizeof(float) * size);\n\n   hipMemcpy(dx_ind, hx_ind.data(), sizeof(int) * nnz, hipMemcpyHostToDevice);\n   hipMemcpy(dx_val, hx_val.data(), sizeof(float) * nnz, hipMemcpyHostToDevice);\n   hipMemcpy(dy, hy.data(), sizeof(float) * size, hipMemcpyHostToDevice);\n\n   rocsparse_handle     handle;\n   rocsparse_spvec_descr vecX;\n   rocsparse_dnvec_descr vecY;\n\n   rocsparse_indextype idx_type = rocsparse_indextype_i32;\n   rocsparse_datatype  data_type = rocsparse_datatype_f32_r;\n   rocsparse_datatype  compute_type = rocsparse_datatype_f32_r;\n   rocsparse_operation trans = rocsparse_operation_none;\n   rocsparse_index_base idx_base = rocsparse_index_base_zero;\n\n   rocsparse_create_handle(&handle);\n\n   // Create sparse vector X\n   rocsparse_create_spvec_descr(&vecX,\n                                size,\n                                nnz,\n                                dx_ind,\n                                dx_val,\n                                idx_type,\n                                idx_base,\n                                data_type);\n\n   // Create dense vector Y\n   rocsparse_create_dnvec_descr(&vecY,\n                                size,\n                                dy,\n                                data_type);\n\n   // Obtain buffer size\n   float hresult = 0.0f;\n   size_t buffer_size;\n   rocsparse_spvv(handle,\n                  trans,\n                  vecX,\n                  vecY,\n                  &hresult,\n                  compute_type,\n                  &buffer_size,\n                  nullptr);\n\n   void* temp_buffer;\n   hipMalloc(&temp_buffer, buffer_size);\n\n   // SpVV\n   rocsparse_spvv(handle,\n                  trans,\n                  vecX,\n                  vecY,\n                  &hresult,\n                  compute_type,\n                  &buffer_size,\n                  temp_buffer);\n\n   hipDeviceSynchronize();\n\n   std::cout << \"hresult: \" << hresult << std::endl;\n\n   // Clear rocSPARSE\n   rocsparse_destroy_spvec_descr(vecX);\n   rocsparse_destroy_dnvec_descr(vecY);\n   rocsparse_destroy_handle(handle);\n\n   // Clear device memory\n   hipFree(dx_ind);\n   hipFree(dx_val);\n   hipFree(dy);\n   hipFree(temp_buffer);\n  \\endcode"]
+    pub fn rocsparse_spvv(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        x: rocsparse_const_spvec_descr,
+        y: rocsparse_const_dnvec_descr,
+        result: *mut ::std::os::raw::c_void,
+        compute_type: rocsparse_datatype,
+        buffer_size: *mut usize,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level1_module\n  \\brief Scale a sparse vector and add it to a dense vector.\n\n  \\details\n  \\p rocsparse_axpyi multiplies the sparse vector \\f$x\\f$ with scalar \\f$\\alpha\\f$ and\n  adds the result to the dense vector \\f$y\\f$, such that\n\n  \\f[\n      y := y + \\alpha \\cdot x\n  \\f]\n\n  \\code{.c}\n      for(i = 0; i < nnz; ++i)\n      {\n          y[x_ind[i]] = y[x_ind[i]] + alpha * x_val[i];\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  nnz         number of non-zero entries of vector \\f$x\\f$.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  x_val       array of \\p nnz elements containing the values of \\f$x\\f$.\n  @param[in]\n  x_ind       array of \\p nnz elements containing the indices of the non-zero\n              values of \\f$x\\f$.\n  @param[inout]\n  y           array of values in dense format.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p idx_base is invalid.\n  \\retval rocsparse_status_invalid_size \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p alpha, \\p x_val, \\p x_ind or \\p y pointer\n          is invalid.\n/\n/**@{"]
+    pub fn rocsparse_saxpyi(
+        handle: rocsparse_handle,
+        nnz: rocsparse_int,
+        alpha: *const f32,
+        x_val: *const f32,
+        x_ind: *const rocsparse_int,
+        y: *mut f32,
+        idx_base: rocsparse_index_base,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_daxpyi(
+        handle: rocsparse_handle,
+        nnz: rocsparse_int,
+        alpha: *const f64,
+        x_val: *const f64,
+        x_ind: *const rocsparse_int,
+        y: *mut f64,
+        idx_base: rocsparse_index_base,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_caxpyi(
+        handle: rocsparse_handle,
+        nnz: rocsparse_int,
+        alpha: *const rocsparse_float_complex,
+        x_val: *const rocsparse_float_complex,
+        x_ind: *const rocsparse_int,
+        y: *mut rocsparse_float_complex,
+        idx_base: rocsparse_index_base,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zaxpyi(
+        handle: rocsparse_handle,
+        nnz: rocsparse_int,
+        alpha: *const rocsparse_double_complex,
+        x_val: *const rocsparse_double_complex,
+        x_ind: *const rocsparse_int,
+        y: *mut rocsparse_double_complex,
+        idx_base: rocsparse_index_base,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level1_module\n  \\brief Compute the dot product of a complex conjugate sparse vector with a dense\n  vector.\n\n  \\details\n  \\p rocsparse_dotci computes the dot product of the complex conjugate sparse vector\n  \\f$x\\f$ with the dense vector \\f$y\\f$, such that\n  \\f[\n    \\text{result} := \\bar{x}^H y\n  \\f]\n\n  \\code{.c}\n      for(i = 0; i < nnz; ++i)\n      {\n          result += conj(x_val[i]) * y[x_ind[i]];\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  nnz         number of non-zero entries of vector \\f$x\\f$.\n  @param[in]\n  x_val       array of \\p nnz values.\n  @param[in]\n  x_ind       array of \\p nnz elements containing the indices of the non-zero\n              values of \\f$x\\f$.\n  @param[in]\n  y           array of values in dense format.\n  @param[out]\n  result      pointer to the result, can be host or device memory\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p idx_base is invalid.\n  \\retval rocsparse_status_invalid_size \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p x_val, \\p x_ind, \\p y or \\p result\n          pointer is invalid.\n  \\retval rocsparse_status_memory_error the buffer for the dot product reduction\n          could not be allocated.\n  \\retval rocsparse_status_internal_error an internal error occurred.\n/\n/**@{"]
+    pub fn rocsparse_cdotci(
+        handle: rocsparse_handle,
+        nnz: rocsparse_int,
+        x_val: *const rocsparse_float_complex,
+        x_ind: *const rocsparse_int,
+        y: *const rocsparse_float_complex,
+        result: *mut rocsparse_float_complex,
+        idx_base: rocsparse_index_base,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zdotci(
+        handle: rocsparse_handle,
+        nnz: rocsparse_int,
+        x_val: *const rocsparse_double_complex,
+        x_ind: *const rocsparse_int,
+        y: *const rocsparse_double_complex,
+        result: *mut rocsparse_double_complex,
+        idx_base: rocsparse_index_base,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level1_module\n  \\brief Compute the dot product of a sparse vector with a dense vector.\n\n  \\details\n  \\p rocsparse_doti computes the dot product of the sparse vector \\f$x\\f$ with the\n  dense vector \\f$y\\f$, such that\n  \\f[\n    \\text{result} := y^T x\n  \\f]\n\n  \\code{.c}\n      for(i = 0; i < nnz; ++i)\n      {\n          result += x_val[i] * y[x_ind[i]];\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  nnz         number of non-zero entries of vector \\f$x\\f$.\n  @param[in]\n  x_val       array of \\p nnz values.\n  @param[in]\n  x_ind       array of \\p nnz elements containing the indices of the non-zero\n              values of \\f$x\\f$.\n  @param[in]\n  y           array of values in dense format.\n  @param[out]\n  result      pointer to the result, can be host or device memory\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p idx_base is invalid.\n  \\retval rocsparse_status_invalid_size \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p x_val, \\p x_ind, \\p y or \\p result\n          pointer is invalid.\n  \\retval rocsparse_status_memory_error the buffer for the dot product reduction\n          could not be allocated.\n  \\retval rocsparse_status_internal_error an internal error occurred.\n/\n/**@{"]
+    pub fn rocsparse_sdoti(
+        handle: rocsparse_handle,
+        nnz: rocsparse_int,
+        x_val: *const f32,
+        x_ind: *const rocsparse_int,
+        y: *const f32,
+        result: *mut f32,
+        idx_base: rocsparse_index_base,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ddoti(
+        handle: rocsparse_handle,
+        nnz: rocsparse_int,
+        x_val: *const f64,
+        x_ind: *const rocsparse_int,
+        y: *const f64,
+        result: *mut f64,
+        idx_base: rocsparse_index_base,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cdoti(
+        handle: rocsparse_handle,
+        nnz: rocsparse_int,
+        x_val: *const rocsparse_float_complex,
+        x_ind: *const rocsparse_int,
+        y: *const rocsparse_float_complex,
+        result: *mut rocsparse_float_complex,
+        idx_base: rocsparse_index_base,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zdoti(
+        handle: rocsparse_handle,
+        nnz: rocsparse_int,
+        x_val: *const rocsparse_double_complex,
+        x_ind: *const rocsparse_int,
+        y: *const rocsparse_double_complex,
+        result: *mut rocsparse_double_complex,
+        idx_base: rocsparse_index_base,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level1_module\n  \\brief Gather elements from a dense vector and store them into a sparse vector.\n\n  \\details\n  \\p rocsparse_gthr gathers the elements that are listed in \\p x_ind from the dense\n  vector \\f$y\\f$ and stores them in the sparse vector \\f$x\\f$.\n\n  \\code{.c}\n      for(i = 0; i < nnz; ++i)\n      {\n          x_val[i] = y[x_ind[i]];\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  nnz         number of non-zero entries of \\f$x\\f$.\n  @param[in]\n  y           array of values in dense format.\n  @param[out]\n  x_val       array of \\p nnz elements containing the values of \\f$x\\f$.\n  @param[in]\n  x_ind       array of \\p nnz elements containing the indices of the non-zero\n              values of \\f$x\\f$.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_value \\p idx_base is invalid.\n  \\retval     rocsparse_status_invalid_size \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p y, \\p x_val or \\p x_ind pointer is\n              invalid.\n/\n/**@{"]
+    pub fn rocsparse_sgthr(
+        handle: rocsparse_handle,
+        nnz: rocsparse_int,
+        y: *const f32,
+        x_val: *mut f32,
+        x_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dgthr(
+        handle: rocsparse_handle,
+        nnz: rocsparse_int,
+        y: *const f64,
+        x_val: *mut f64,
+        x_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cgthr(
+        handle: rocsparse_handle,
+        nnz: rocsparse_int,
+        y: *const rocsparse_float_complex,
+        x_val: *mut rocsparse_float_complex,
+        x_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zgthr(
+        handle: rocsparse_handle,
+        nnz: rocsparse_int,
+        y: *const rocsparse_double_complex,
+        x_val: *mut rocsparse_double_complex,
+        x_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level1_module\n  \\brief Gather and zero out elements from a dense vector and store them into a sparse\n  vector.\n\n  \\details\n  \\p rocsparse_gthrz gathers the elements that are listed in \\p x_ind from the dense\n  vector \\f$y\\f$ and stores them in the sparse vector \\f$x\\f$. The gathered elements\n  in \\f$y\\f$ are replaced by zero.\n\n  \\code{.c}\n      for(i = 0; i < nnz; ++i)\n      {\n          x_val[i]    = y[x_ind[i]];\n          y[x_ind[i]] = 0;\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  nnz         number of non-zero entries of \\f$x\\f$.\n  @param[inout]\n  y           array of values in dense format.\n  @param[out]\n  x_val       array of \\p nnz elements containing the non-zero values of \\f$x\\f$.\n  @param[in]\n  x_ind       array of \\p nnz elements containing the indices of the non-zero\n              values of \\f$x\\f$.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_value \\p idx_base is invalid.\n  \\retval     rocsparse_status_invalid_size \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p y, \\p x_val or \\p x_ind pointer is\n              invalid.\n/\n/**@{"]
+    pub fn rocsparse_sgthrz(
+        handle: rocsparse_handle,
+        nnz: rocsparse_int,
+        y: *mut f32,
+        x_val: *mut f32,
+        x_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dgthrz(
+        handle: rocsparse_handle,
+        nnz: rocsparse_int,
+        y: *mut f64,
+        x_val: *mut f64,
+        x_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cgthrz(
+        handle: rocsparse_handle,
+        nnz: rocsparse_int,
+        y: *mut rocsparse_float_complex,
+        x_val: *mut rocsparse_float_complex,
+        x_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zgthrz(
+        handle: rocsparse_handle,
+        nnz: rocsparse_int,
+        y: *mut rocsparse_double_complex,
+        x_val: *mut rocsparse_double_complex,
+        x_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level1_module\n  \\brief Apply Givens rotation to a dense and a sparse vector.\n\n  \\details\n  \\p rocsparse_roti applies the Givens rotation matrix \\f$G\\f$ to the sparse vector\n  \\f$x\\f$ and the dense vector \\f$y\\f$, where\n  \\f[\n    G = \\begin{pmatrix} c & s \\\\ -s & c \\end{pmatrix}\n  \\f]\n\n  \\code{.c}\n      for(i = 0; i < nnz; ++i)\n      {\n          x_tmp = x_val[i];\n          y_tmp = y[x_ind[i]];\n\n          x_val[i]    = c * x_tmp + s * y_tmp;\n          y[x_ind[i]] = c * y_tmp - s * x_tmp;\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  nnz         number of non-zero entries of \\f$x\\f$.\n  @param[inout]\n  x_val       array of \\p nnz elements containing the non-zero values of \\f$x\\f$.\n  @param[in]\n  x_ind       array of \\p nnz elements containing the indices of the non-zero\n              values of \\f$x\\f$.\n  @param[inout]\n  y           array of values in dense format.\n  @param[in]\n  c           pointer to the cosine element of \\f$G\\f$, can be on host or device.\n  @param[in]\n  s           pointer to the sine element of \\f$G\\f$, can be on host or device.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_value \\p idx_base is invalid.\n  \\retval     rocsparse_status_invalid_size \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p c, \\p s, \\p x_val, \\p x_ind or \\p y\n              pointer is invalid.\n/\n/**@{"]
+    pub fn rocsparse_sroti(
+        handle: rocsparse_handle,
+        nnz: rocsparse_int,
+        x_val: *mut f32,
+        x_ind: *const rocsparse_int,
+        y: *mut f32,
+        c: *const f32,
+        s: *const f32,
+        idx_base: rocsparse_index_base,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_droti(
+        handle: rocsparse_handle,
+        nnz: rocsparse_int,
+        x_val: *mut f64,
+        x_ind: *const rocsparse_int,
+        y: *mut f64,
+        c: *const f64,
+        s: *const f64,
+        idx_base: rocsparse_index_base,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level1_module\n  \\brief Scatter elements from a dense vector across a sparse vector.\n\n  \\details\n  \\p rocsparse_sctr scatters the elements that are listed in \\p x_ind from the sparse\n  vector \\f$x\\f$ into the dense vector \\f$y\\f$. Indices of \\f$y\\f$ that are not listed\n  in \\p x_ind remain unchanged.\n\n  \\code{.c}\n      for(i = 0; i < nnz; ++i)\n      {\n          y[x_ind[i]] = x_val[i];\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  nnz         number of non-zero entries of \\f$x\\f$.\n  @param[in]\n  x_val       array of \\p nnz elements containing the non-zero values of \\f$x\\f$.\n  @param[in]\n  x_ind       array of \\p nnz elements containing the indices of the non-zero\n              values of x.\n  @param[inout]\n  y           array of values in dense format.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_value \\p idx_base is invalid.\n  \\retval     rocsparse_status_invalid_size \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p x_val, \\p x_ind or \\p y pointer is\n              invalid.\n/\n/**@{"]
+    pub fn rocsparse_ssctr(
+        handle: rocsparse_handle,
+        nnz: rocsparse_int,
+        x_val: *const f32,
+        x_ind: *const rocsparse_int,
+        y: *mut f32,
+        idx_base: rocsparse_index_base,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dsctr(
+        handle: rocsparse_handle,
+        nnz: rocsparse_int,
+        x_val: *const f64,
+        x_ind: *const rocsparse_int,
+        y: *mut f64,
+        idx_base: rocsparse_index_base,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_csctr(
+        handle: rocsparse_handle,
+        nnz: rocsparse_int,
+        x_val: *const rocsparse_float_complex,
+        x_ind: *const rocsparse_int,
+        y: *mut rocsparse_float_complex,
+        idx_base: rocsparse_index_base,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zsctr(
+        handle: rocsparse_handle,
+        nnz: rocsparse_int,
+        x_val: *const rocsparse_double_complex,
+        x_ind: *const rocsparse_int,
+        y: *mut rocsparse_double_complex,
+        idx_base: rocsparse_index_base,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_isctr(
+        handle: rocsparse_handle,
+        nnz: rocsparse_int,
+        x_val: *const rocsparse_int,
+        x_ind: *const rocsparse_int,
+        y: *mut rocsparse_int,
+        idx_base: rocsparse_index_base,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Sparse matrix vector multiplication using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrmv_ex_analysis performs the analysis step for rocsparse_sbsrmv(),\n  rocsparse_dbsrmv(), rocsparse_cbsrmv() and rocsparse_zbsrmv(). It is expected that\n  this function will be executed only once for a given matrix and particular operation\n  type. The gathered analysis meta data can be cleared by rocsparse_bsrmv_ex_clear().\n\n  \\note\n  If the matrix sparsity pattern changes, the gathered information will become invalid.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         matrix storage of BSR blocks.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  mb          number of block rows of the sparse BSR matrix.\n  @param[in]\n  nb          number of block columns of the sparse BSR matrix.\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse BSR matrix.\n  @param[in]\n  descr       descriptor of the sparse BSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  bsr_val     array of \\p nnzb blocks of the sparse BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of\n              the sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the block column indices of the sparse\n              BSR matrix.\n  @param[in]\n  block_dim     block dimension of the sparse BSR matrix.\n  @param[out]\n  info        structure that holds the information collected during the analysis step.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nb or \\p nnzb is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p bsr_val, \\p bsr_row_ptr,\n              \\p bsr_col_ind or \\p info pointer is invalid.\n  \\retval     rocsparse_status_memory_error the buffer for the gathered information\n              could not be allocated.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans != \\ref rocsparse_operation_none or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
+    pub fn rocsparse_sbsrmv_ex_analysis(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f32,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dbsrmv_ex_analysis(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f64,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cbsrmv_ex_analysis(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_float_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zbsrmv_ex_analysis(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_double_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Sparse matrix vector multiplication using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrmv_ex multiplies the scalar \\f$\\alpha\\f$ with a sparse\n  \\f$(mb \\cdot \\text{block_dim}) \\times (nb \\cdot \\text{block_dim})\\f$\n  matrix, defined in BSR storage format, and the dense vector \\f$x\\f$ and adds the\n  result to the dense vector \\f$y\\f$ that is multiplied by the scalar \\f$\\beta\\f$,\n  such that\n  \\f[\n    y := \\alpha \\cdot op(A) \\cdot x + \\beta \\cdot y,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  Currently, only \\p trans == \\ref rocsparse_operation_none is supported.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         matrix storage of BSR blocks.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  mb          number of block rows of the sparse BSR matrix.\n  @param[in]\n  nb          number of block columns of the sparse BSR matrix.\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse BSR matrix.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse BSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  bsr_val     array of \\p nnzb blocks of the sparse BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of\n              the sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the block column indices of the sparse\n              BSR matrix.\n  @param[in]\n  block_dim     block dimension of the sparse BSR matrix.\n  @param[in]\n  x           array of \\p nb*block_dim elements (\\f$op(A) = A\\f$) or \\p mb*block_dim\n              elements (\\f$op(A) = A^T\\f$ or \\f$op(A) = A^H\\f$).\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  y           array of \\p mb*block_dim elements (\\f$op(A) = A\\f$) or \\p nb*block_dim\n              elements (\\f$op(A) = A^T\\f$ or \\f$op(A) = A^H\\f$).\n  @param[out]\n  info        structure that holds the information collected during the analysis step.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nb, \\p nnzb or \\p block_dim is\n              invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p bsr_val,\n              \\p bsr_row_ind, \\p bsr_col_ind, \\p x, \\p beta or \\p y pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans != \\ref rocsparse_operation_none or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
+    pub fn rocsparse_sbsrmv_ex(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const f32,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f32,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        x: *const f32,
+        beta: *const f32,
+        y: *mut f32,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dbsrmv_ex(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const f64,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f64,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        x: *const f64,
+        beta: *const f64,
+        y: *mut f64,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cbsrmv_ex(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const rocsparse_float_complex,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_float_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        x: *const rocsparse_float_complex,
+        beta: *const rocsparse_float_complex,
+        y: *mut rocsparse_float_complex,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zbsrmv_ex(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const rocsparse_double_complex,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_double_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        x: *const rocsparse_double_complex,
+        beta: *const rocsparse_double_complex,
+        y: *mut rocsparse_double_complex,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Sparse matrix vector multiplication using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrmv_ex_clear deallocates all memory that was allocated by\n  rocsparse_sbsrmv_ex_analysis(), rocsparse_dbsrmv_ex_analysis(), rocsparse_cbsrmv_ex_analysis()\n  or rocsparse_zbsrmv_ex_analysis(). This is especially useful, if memory is an issue and\n  the analysis data is not required anymore for further computation, e.g. when\n  switching to another sparse matrix format.\n\n  \\note\n  Calling \\p rocsparse_bsrmv_ex_clear is optional. All allocated resources will be\n  cleared, when the opaque \\ref rocsparse_mat_info struct is destroyed using\n  rocsparse_destroy_mat_info().\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[inout]\n  info        structure that holds the information collected during analysis step.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info pointer is invalid.\n  \\retval     rocsparse_status_memory_error the buffer for the gathered information\n              could not be deallocated.\n  \\retval     rocsparse_status_internal_error an internal error occurred."]
+    pub fn rocsparse_bsrmv_ex_clear(
+        handle: rocsparse_handle,
+        info: rocsparse_mat_info,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Sparse matrix vector multiplication using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrmv_analysis performs the analysis step for rocsparse_sbsrmv(),\n  rocsparse_dbsrmv(), rocsparse_cbsrmv() and rocsparse_zbsrmv(). It is expected that\n  this function will be executed only once for a given matrix and particular operation\n  type. The gathered analysis meta data can be cleared by rocsparse_bsrmv_clear().\n\n  \\note\n  If the matrix sparsity pattern changes, the gathered information will become invalid.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         matrix storage of BSR blocks.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  mb          number of block rows of the sparse BSR matrix.\n  @param[in]\n  nb          number of block columns of the sparse BSR matrix.\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse BSR matrix.\n  @param[in]\n  descr       descriptor of the sparse BSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  bsr_val     array of \\p nnzb blocks of the sparse BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of\n              the sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the block column indices of the sparse\n              BSR matrix.\n  @param[in]\n  block_dim     block dimension of the sparse BSR matrix.\n  @param[out]\n  info        structure that holds the information collected during the analysis step.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nb or \\p nnzb is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p bsr_val, \\p bsr_row_ptr,\n              \\p bsr_col_ind or \\p info pointer is invalid.\n  \\retval     rocsparse_status_memory_error the buffer for the gathered information\n              could not be allocated.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans != \\ref rocsparse_operation_none or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
+    pub fn rocsparse_sbsrmv_analysis(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f32,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dbsrmv_analysis(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f64,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cbsrmv_analysis(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_float_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zbsrmv_analysis(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_double_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Sparse matrix vector multiplication using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrmv multiplies the scalar \\f$\\alpha\\f$ with a sparse\n  \\f$(mb \\cdot \\text{block_dim}) \\times (nb \\cdot \\text{block_dim})\\f$\n  matrix, defined in BSR storage format, and the dense vector \\f$x\\f$ and adds the\n  result to the dense vector \\f$y\\f$ that is multiplied by the scalar \\f$\\beta\\f$,\n  such that\n  \\f[\n    y := \\alpha \\cdot op(A) \\cdot x + \\beta \\cdot y,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  Currently, only \\p trans == \\ref rocsparse_operation_none is supported.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         matrix storage of BSR blocks.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  mb          number of block rows of the sparse BSR matrix.\n  @param[in]\n  nb          number of block columns of the sparse BSR matrix.\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse BSR matrix.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse BSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  bsr_val     array of \\p nnzb blocks of the sparse BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of\n              the sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the block column indices of the sparse\n              BSR matrix.\n  @param[in]\n  block_dim     block dimension of the sparse BSR matrix.\n  @param[in]\n  x           array of \\p nb*block_dim elements (\\f$op(A) = A\\f$) or \\p mb*block_dim\n              elements (\\f$op(A) = A^T\\f$ or \\f$op(A) = A^H\\f$).\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  y           array of \\p mb*block_dim elements (\\f$op(A) = A\\f$) or \\p nb*block_dim\n              elements (\\f$op(A) = A^T\\f$ or \\f$op(A) = A^H\\f$).\n  @param[out]\n  info        structure that holds the information collected during the analysis step.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nb, \\p nnzb or \\p block_dim is\n              invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p bsr_val,\n              \\p bsr_row_ind, \\p bsr_col_ind, \\p x, \\p beta or \\p y pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans != \\ref rocsparse_operation_none or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
+    pub fn rocsparse_sbsrmv(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const f32,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f32,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        x: *const f32,
+        beta: *const f32,
+        y: *mut f32,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dbsrmv(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const f64,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f64,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        x: *const f64,
+        beta: *const f64,
+        y: *mut f64,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cbsrmv(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const rocsparse_float_complex,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_float_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        x: *const rocsparse_float_complex,
+        beta: *const rocsparse_float_complex,
+        y: *mut rocsparse_float_complex,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zbsrmv(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const rocsparse_double_complex,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_double_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        x: *const rocsparse_double_complex,
+        beta: *const rocsparse_double_complex,
+        y: *mut rocsparse_double_complex,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Sparse matrix vector multiplication using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrmv_clear deallocates all memory that was allocated by\n  rocsparse_sbsrmv_analysis(), rocsparse_dbsrmv_analysis(), rocsparse_cbsrmv_analysis()\n  or rocsparse_zbsrmv_analysis(). This is especially useful, if memory is an issue and\n  the analysis data is not required anymore for further computation, e.g. when\n  switching to another sparse matrix format.\n\n  \\note\n  Calling \\p rocsparse_bsrmv_clear is optional. All allocated resources will be\n  cleared, when the opaque \\ref rocsparse_mat_info struct is destroyed using\n  rocsparse_destroy_mat_info().\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[inout]\n  info        structure that holds the information collected during analysis step.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info pointer is invalid.\n  \\retval     rocsparse_status_memory_error the buffer for the gathered information\n              could not be deallocated.\n  \\retval     rocsparse_status_internal_error an internal error occurred."]
+    pub fn rocsparse_bsrmv_clear(
+        handle: rocsparse_handle,
+        info: rocsparse_mat_info,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Sparse triangular solve using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrsv_zero_pivot returns \\ref rocsparse_status_zero_pivot, if either a\n  structural or numerical zero has been found during rocsparse_sbsrsv_solve(),\n  rocsparse_dbsrsv_solve(), rocsparse_cbsrsv_solve() or rocsparse_zbsrsv_solve()\n  computation. The first zero pivot \\f$j\\f$ at \\f$A_{j,j}\\f$ is stored in \\p position,\n  using same index base as the BSR matrix.\n\n  \\p position can be in host or device memory. If no zero pivot has been found,\n  \\p position is set to -1 and \\ref rocsparse_status_success is returned instead.\n\n  \\note \\p rocsparse_bsrsv_zero_pivot is a blocking function. It might influence\n  performance negatively.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[inout]\n  position    pointer to zero pivot \\f$j\\f$, can be in host or device memory.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info or \\p position pointer is\n              invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_zero_pivot zero pivot has been found."]
+    pub fn rocsparse_bsrsv_zero_pivot(
+        handle: rocsparse_handle,
+        info: rocsparse_mat_info,
+        position: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Sparse triangular solve using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrsv_buffer_size returns the size of the temporary storage buffer that\n  is required by rocsparse_sbsrsv_analysis(), rocsparse_dbsrsv_analysis(),\n  rocsparse_cbsrsv_analysis(), rocsparse_zbsrsv_analysis(), rocsparse_sbsrsv_solve(),\n  rocsparse_dbsrsv_solve(), rocsparse_cbsrsv_solve() and rocsparse_zbsrsv_solve(). The\n  temporary storage buffer must be allocated by the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         matrix storage of BSR blocks.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  mb          number of block rows of the sparse BSR matrix.\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse BSR matrix.\n  @param[in]\n  descr       descriptor of the sparse BSR matrix.\n  @param[in]\n  bsr_val     array of \\p nnzb blocks of the sparse BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of\n              the sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnz containing the block column indices of the sparse\n              BSR matrix.\n  @param[in]\n  block_dim     block dimension of the sparse BSR matrix.\n  @param[out]\n  info        structure that holds the information collected during the analysis step.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_sbsrsv_analysis(), rocsparse_dbsrsv_analysis(),\n              rocsparse_cbsrsv_analysis(), rocsparse_zbsrsv_analysis(),\n              rocsparse_sbsrsv_solve(), rocsparse_dbsrsv_solve(),\n              rocsparse_cbsrsv_solve() and rocsparse_zbsrsv_solve().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nnzb or \\p block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p bsr_val, \\p bsr_row_ptr,\n              \\p bsr_col_ind, \\p info or \\p buffer_size pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
+    pub fn rocsparse_sbsrsv_buffer_size(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f32,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dbsrsv_buffer_size(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f64,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cbsrsv_buffer_size(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_float_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zbsrsv_buffer_size(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_double_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Sparse triangular solve using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrsv_analysis performs the analysis step for rocsparse_sbsrsv_solve(),\n  rocsparse_dbsrsv_solve(), rocsparse_cbsrsv_solve() and rocsparse_zbsrsv_solve(). It\n  is expected that this function will be executed only once for a given matrix and\n  particular operation type. The analysis meta data can be cleared by\n  rocsparse_bsrsv_clear().\n\n  \\p rocsparse_bsrsv_analysis can share its meta data with\n  rocsparse_sbsrsm_analysis(), rocsparse_dbsrsm_analysis(),\n  rocsparse_cbsrsm_analysis(), rocsparse_zbsrsm_analysis(),\n  rocsparse_sbsrilu0_analysis(), rocsparse_dbsrilu0_analysis(),\n  rocsparse_cbsrilu0_analysis(), rocsparse_zbsrilu0_analysis(),\n  rocsparse_sbsric0_analysis(), rocsparse_dbsric0_analysis(),\n  rocsparse_cbsric0_analysis() and rocsparse_zbsric0_analysis(). Selecting\n  \\ref rocsparse_analysis_policy_reuse policy can greatly improve computation\n  performance of meta data. However, the user need to make sure that the sparsity\n  pattern remains unchanged. If this cannot be assured,\n  \\ref rocsparse_analysis_policy_force has to be used.\n\n  \\note\n  If the matrix sparsity pattern changes, the gathered information will become invalid.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         matrix storage of BSR blocks.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  mb          number of block rows of the sparse BSR matrix.\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse BSR matrix.\n  @param[in]\n  descr       descriptor of the sparse BSR matrix.\n  @param[in]\n  bsr_val     array of \\p nnzb blocks of the sparse BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of\n              the sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnz containing the block column indices of the sparse\n              BSR matrix.\n  @param[in]\n  block_dim     block dimension of the sparse BSR matrix.\n  @param[out]\n  info        structure that holds the information collected during\n              the analysis step.\n  @param[in]\n  analysis    \\ref rocsparse_analysis_policy_reuse or\n              \\ref rocsparse_analysis_policy_force.\n  @param[in]\n  solve       \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nnzb or \\p block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p bsr_row_ptr,\n              \\p bsr_col_ind, \\p info or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
+    pub fn rocsparse_sbsrsv_analysis(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f32,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        analysis: rocsparse_analysis_policy,
+        solve: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dbsrsv_analysis(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f64,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        analysis: rocsparse_analysis_policy,
+        solve: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cbsrsv_analysis(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_float_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        analysis: rocsparse_analysis_policy,
+        solve: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zbsrsv_analysis(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_double_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        analysis: rocsparse_analysis_policy,
+        solve: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Sparse triangular solve using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrsv_clear deallocates all memory that was allocated by\n  rocsparse_sbsrsv_analysis(), rocsparse_dbsrsv_analysis(), rocsparse_cbsrsv_analysis()\n  or rocsparse_zbsrsv_analysis(). This is especially useful, if memory is an issue and\n  the analysis data is not required for further computation, e.g. when switching to\n  another sparse matrix format. Calling \\p rocsparse_bsrsv_clear is optional. All\n  allocated resources will be cleared, when the opaque \\ref rocsparse_mat_info struct\n  is destroyed using rocsparse_destroy_mat_info().\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[inout]\n  info        structure that holds the information collected during the analysis step.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info pointer is invalid.\n  \\retval     rocsparse_status_memory_error the buffer holding the meta data could not\n              be deallocated.\n  \\retval     rocsparse_status_internal_error an internal error occurred."]
+    pub fn rocsparse_bsrsv_clear(
+        handle: rocsparse_handle,
+        info: rocsparse_mat_info,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Sparse triangular solve using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrsv_solve solves a sparse triangular linear system of a sparse\n  \\f$m \\times m\\f$ matrix, defined in BSR storage format, a dense solution vector\n  \\f$y\\f$ and the right-hand side \\f$x\\f$ that is multiplied by \\f$\\alpha\\f$, such that\n  \\f[\n    op(A) \\cdot y = \\alpha \\cdot x,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\p rocsparse_bsrsv_solve requires a user allocated temporary buffer. Its size is\n  returned by rocsparse_sbsrsv_buffer_size(), rocsparse_dbsrsv_buffer_size(),\n  rocsparse_cbsrsv_buffer_size() or rocsparse_zbsrsv_buffer_size(). Furthermore,\n  analysis meta data is required. It can be obtained by rocsparse_sbsrsv_analysis(),\n  rocsparse_dbsrsv_analysis(), rocsparse_cbsrsv_analysis() or\n  rocsparse_zbsrsv_analysis(). \\p rocsparse_bsrsv_solve reports the first zero pivot\n  (either numerical or structural zero). The zero pivot status can be checked calling\n  rocsparse_bsrsv_zero_pivot(). If\n  \\ref rocsparse_diag_type == \\ref rocsparse_diag_type_unit, no zero pivot will be\n  reported, even if \\f$A_{j,j} = 0\\f$ for some \\f$j\\f$.\n\n  \\note\n  The sparse BSR matrix has to be sorted.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  Currently, only \\p trans == \\ref rocsparse_operation_none and\n  \\p trans == \\ref rocsparse_operation_transpose is supported.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         matrix storage of BSR blocks.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  mb          number of block rows of the sparse BSR matrix.\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse BSR matrix.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse BSR matrix.\n  @param[in]\n  bsr_val     array of \\p nnzb blocks of the sparse BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of\n              the sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnz containing the block column indices of the sparse\n              BSR matrix.\n  @param[in]\n  block_dim     block dimension of the sparse BSR matrix.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[in]\n  x           array of \\p m elements, holding the right-hand side.\n  @param[out]\n  y           array of \\p m elements, holding the solution.\n  @param[in]\n  policy      \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nnzb or \\p block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p bsr_val,\n              \\p bsr_row_ptr, \\p bsr_col_ind, \\p x or \\p y pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  Consider the lower triangular \\f$m \\times m\\f$ matrix \\f$L\\f$, stored in BSR\n  storage format with unit diagonal. The following example solves \\f$L \\cdot y = x\\f$.\n  \\code{.c}\n      // Create rocSPARSE handle\n      rocsparse_handle handle;\n      rocsparse_create_handle(&handle);\n\n      // Create matrix descriptor\n      rocsparse_mat_descr descr;\n      rocsparse_create_mat_descr(&descr);\n      rocsparse_set_mat_fill_mode(descr, rocsparse_fill_mode_lower);\n      rocsparse_set_mat_diag_type(descr, rocsparse_diag_type_unit);\n\n      // Create matrix info structure\n      rocsparse_mat_info info;\n      rocsparse_create_mat_info(&info);\n\n      // Obtain required buffer size\n      size_t buffer_size;\n      rocsparse_dbsrsv_buffer_size(handle,\n                                   rocsparse_direction_column,\n                                   rocsparse_operation_none,\n                                   mb,\n                                   nnzb,\n                                   descr,\n                                   bsr_val,\n                                   bsr_row_ptr,\n                                   bsr_col_ind,\n                                   block_dim,\n                                   info,\n                                   &buffer_size);\n\n      // Allocate temporary buffer\n      void* temp_buffer;\n      hipMalloc(&temp_buffer, buffer_size);\n\n      // Perform analysis step\n      rocsparse_dbsrsv_analysis(handle,\n                                rocsparse_direction_column,\n                                rocsparse_operation_none,\n                                mb,\n                                nnzb,\n                                descr,\n                                bsr_val,\n                                bsr_row_ptr,\n                                bsr_col_ind,\n                                block_dim,\n                                info,\n                                rocsparse_analysis_policy_reuse,\n                                rocsparse_solve_policy_auto,\n                                temp_buffer);\n\n      // Solve Ly = x\n      rocsparse_dbsrsv_solve(handle,\n                             rocsparse_direction_column,\n                             rocsparse_operation_none,\n                             mb,\n                             nnzb,\n                             &alpha,\n                             descr,\n                             bsr_val,\n                             bsr_row_ptr,\n                             bsr_col_ind,\n                             block_dim,\n                             info,\n                             x,\n                             y,\n                             rocsparse_solve_policy_auto,\n                             temp_buffer);\n\n      // No zero pivot should be found, with L having unit diagonal\n\n      // Clean up\n      hipFree(temp_buffer);\n      rocsparse_destroy_mat_info(info);\n      rocsparse_destroy_mat_descr(descr);\n      rocsparse_destroy_handle(handle);\n  \\endcode\n/\n/**@{"]
+    pub fn rocsparse_sbsrsv_solve(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const f32,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f32,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        x: *const f32,
+        y: *mut f32,
+        policy: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dbsrsv_solve(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const f64,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f64,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        x: *const f64,
+        y: *mut f64,
+        policy: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cbsrsv_solve(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const rocsparse_float_complex,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_float_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        x: *const rocsparse_float_complex,
+        y: *mut rocsparse_float_complex,
+        policy: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zbsrsv_solve(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const rocsparse_double_complex,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_double_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        x: *const rocsparse_double_complex,
+        y: *mut rocsparse_double_complex,
+        policy: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Sparse matrix vector multiplication with mask operation using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrxmv multiplies the scalar \\f$\\alpha\\f$ with a sparse\n  \\f$(mb \\cdot \\text{block_dim}) \\times (nb \\cdot \\text{block_dim})\\f$\n  modified matrix, defined in BSR storage format, and the dense vector \\f$x\\f$ and adds the\n  result to the dense vector \\f$y\\f$ that is multiplied by the scalar \\f$\\beta\\f$,\n  such that\n  \\f[\n    y := \\left( \\alpha \\cdot op(A) \\cdot x + \\beta \\cdot y \\right)\\left( \\text{mask} \\right),\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  The \\f$\\text{mask}\\f$ is defined as an array of block row indices.\n  The input sparse matrix is defined with a modified BSR storage format where the beginning and the end of each row\n  is defined with two arrays, \\p bsr_row_ptr and \\p bsr_end_ptr (both of size \\p mb), rather the usual \\p bsr_row_ptr of size \\p mb+1.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  Currently, only \\p trans == \\ref rocsparse_operation_none is supported.\n  Currently, \\p block_dim==1 is not supported.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         matrix storage of BSR blocks.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  size_of_mask number of updated block rows of the array \\p y.\n  @param[in]\n  mb          number of block rows of the sparse BSR matrix.\n  @param[in]\n  nb          number of block columns of the sparse BSR matrix.\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse BSR matrix.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse BSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  bsr_val     array of \\p nnzb blocks of the sparse BSR matrix.\n\n  @param[in]\n  bsr_mask_ptr array of \\p size_of_mask elements that give the indices of the updated block rows.\n\n  @param[in]\n  bsr_row_ptr array of \\p mb elements that point to the start of every block row of\n              the sparse BSR matrix.\n  @param[in]\n  bsr_end_ptr array of \\p mb elements that point to the end of every block row of\n              the sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the block column indices of the sparse\n              BSR matrix.\n  @param[in]\n  block_dim     block dimension of the sparse BSR matrix.\n  @param[in]\n  x           array of \\p nb*block_dim elements (\\f$op(A) = A\\f$) or \\p mb*block_dim\n              elements (\\f$op(A) = A^T\\f$ or \\f$op(A) = A^H\\f$).\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  y           array of \\p mb*block_dim elements (\\f$op(A) = A\\f$) or \\p nb*block_dim\n              elements (\\f$op(A) = A^T\\f$ or \\f$op(A) = A^H\\f$).\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nb, \\p nnzb, \\p block_dim or \\p size_of_mask is\n              invalid.\n  \\retval     rocsparse_status_invalid_value \\p size_of_mask is greater than \\p mb.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p bsr_val,\n              \\p bsr_row_ind, \\p bsr_col_ind, \\p x, \\p beta or \\p y pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_not_implemented\n              \\p block_dim==1, \\p trans != \\ref rocsparse_operation_none or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
+    pub fn rocsparse_sbsrxmv(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        size_of_mask: rocsparse_int,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const f32,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f32,
+        bsr_mask_ptr: *const rocsparse_int,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_end_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        x: *const f32,
+        beta: *const f32,
+        y: *mut f32,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dbsrxmv(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        size_of_mask: rocsparse_int,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const f64,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f64,
+        bsr_mask_ptr: *const rocsparse_int,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_end_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        x: *const f64,
+        beta: *const f64,
+        y: *mut f64,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cbsrxmv(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        size_of_mask: rocsparse_int,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const rocsparse_float_complex,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_float_complex,
+        bsr_mask_ptr: *const rocsparse_int,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_end_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        x: *const rocsparse_float_complex,
+        beta: *const rocsparse_float_complex,
+        y: *mut rocsparse_float_complex,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zbsrxmv(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        size_of_mask: rocsparse_int,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const rocsparse_double_complex,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_double_complex,
+        bsr_mask_ptr: *const rocsparse_int,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_end_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        x: *const rocsparse_double_complex,
+        beta: *const rocsparse_double_complex,
+        y: *mut rocsparse_double_complex,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Sparse matrix vector multiplication using COO storage format\n\n  \\details\n  \\p rocsparse_coomv multiplies the scalar \\f$\\alpha\\f$ with a sparse \\f$m \\times n\\f$\n  matrix, defined in COO storage format, and the dense vector \\f$x\\f$ and adds the\n  result to the dense vector \\f$y\\f$ that is multiplied by the scalar \\f$\\beta\\f$,\n  such that\n  \\f[\n    y := \\alpha \\cdot op(A) \\cdot x + \\beta \\cdot y,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  The COO matrix has to be sorted by row indices. This can be achieved by using\n  rocsparse_coosort_by_row().\n\n  \\code{.c}\n      for(i = 0; i < m; ++i)\n      {\n          y[i] = beta * y[i];\n      }\n\n      for(i = 0; i < nnz; ++i)\n      {\n          y[coo_row_ind[i]] += alpha * coo_val[i] * x[coo_col_ind[i]];\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  m           number of rows of the sparse COO matrix.\n  @param[in]\n  n           number of columns of the sparse COO matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse COO matrix.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse COO matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  coo_val     array of \\p nnz elements of the sparse COO matrix.\n  @param[in]\n  coo_row_ind array of \\p nnz elements containing the row indices of the sparse COO\n              matrix.\n  @param[in]\n  coo_col_ind array of \\p nnz elements containing the column indices of the sparse\n              COO matrix.\n  @param[in]\n  x           array of \\p n elements (\\f$op(A) = A\\f$) or \\p m elements\n              (\\f$op(A) = A^T\\f$ or \\f$op(A) = A^H\\f$).\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  y           array of \\p m elements (\\f$op(A) = A\\f$) or \\p n elements\n              (\\f$op(A) = A^T\\f$ or \\f$op(A) = A^H\\f$).\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p coo_val,\n              \\p coo_row_ind, \\p coo_col_ind, \\p x, \\p beta or \\p y pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
+    pub fn rocsparse_scoomv(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const f32,
+        descr: rocsparse_mat_descr,
+        coo_val: *const f32,
+        coo_row_ind: *const rocsparse_int,
+        coo_col_ind: *const rocsparse_int,
+        x: *const f32,
+        beta: *const f32,
+        y: *mut f32,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcoomv(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const f64,
+        descr: rocsparse_mat_descr,
+        coo_val: *const f64,
+        coo_row_ind: *const rocsparse_int,
+        coo_col_ind: *const rocsparse_int,
+        x: *const f64,
+        beta: *const f64,
+        y: *mut f64,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccoomv(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const rocsparse_float_complex,
+        descr: rocsparse_mat_descr,
+        coo_val: *const rocsparse_float_complex,
+        coo_row_ind: *const rocsparse_int,
+        coo_col_ind: *const rocsparse_int,
+        x: *const rocsparse_float_complex,
+        beta: *const rocsparse_float_complex,
+        y: *mut rocsparse_float_complex,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcoomv(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const rocsparse_double_complex,
+        descr: rocsparse_mat_descr,
+        coo_val: *const rocsparse_double_complex,
+        coo_row_ind: *const rocsparse_int,
+        coo_col_ind: *const rocsparse_int,
+        x: *const rocsparse_double_complex,
+        beta: *const rocsparse_double_complex,
+        y: *mut rocsparse_double_complex,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Sparse iterative triangular solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csritsv_zero_pivot returns \\ref rocsparse_status_zero_pivot, if either a\n  structural or numerical zero has been found during rocsparse_csritsv_solve() and or rocsparse_csritsv_analysis(),\n  execution. The first zero pivot \\f$j\\f$ at \\f$A_{j,j}\\f$ is stored in \\p position,\n  using same index base as the CSR matrix.\n\n  \\p position can be in host or device memory. If no zero pivot has been found,\n  \\p position is set to -1 and \\ref rocsparse_status_success is returned instead.\n\n  \\note \\p rocsparse_csritsv_zero_pivot is a blocking function. It might influence\n  performance negatively.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[inout]\n  position    pointer to zero pivot \\f$j\\f$, can be in host or device memory.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info or \\p position pointer is\n              invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_zero_pivot zero pivot has been found."]
+    pub fn rocsparse_csritsv_zero_pivot(
+        handle: rocsparse_handle,
+        descr: rocsparse_mat_descr,
+        info: rocsparse_mat_info,
+        position: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Sparse iterative triangular solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csritsv_buffer_size returns the size of the temporary storage buffer that\n  is required by rocsparse_scsritsv_analysis(), rocsparse_dcsritsv_analysis(),\n  rocsparse_ccsritsv_analysis(), rocsparse_zcsritsv_analysis(), rocsparse_scsritsv_solve(),\n  rocsparse_dcsritsv_solve(), rocsparse_ccsritsv_solve() and rocsparse_zcsritsv_solve(). The\n  temporary storage buffer must be allocated by the user.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[out]\n  info        structure that holds the information collected during the analysis step.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_scsritsv_analysis(), rocsparse_dcsritsv_analysis(),\n              rocsparse_ccsritsv_analysis(), rocsparse_zcsritsv_analysis(),\n              rocsparse_scsritsv_solve(), rocsparse_dcsritsv_solve(),\n              rocsparse_ccsritsv_solve() and rocsparse_zcsritsv_solve().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p csr_val, \\p csr_row_ptr,\n              \\p csr_col_ind, \\p info or \\p buffer_size pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general and \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_triangular.\n/\n/**@{"]
+    pub fn rocsparse_scsritsv_buffer_size(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f32,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcsritsv_buffer_size(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f64,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccsritsv_buffer_size(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_float_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcsritsv_buffer_size(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_double_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Sparse iterative triangular solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csritsv_analysis performs the analysis step for rocsparse_scsritsv_solve(),\n  rocsparse_dcsritsv_solve(), rocsparse_ccsritsv_solve() and rocsparse_zcsritsv_solve(). It\n  is expected that this function will be executed only once for a given matrix and\n  particular operation type. The analysis meta data can be cleared by\n  rocsparse_csritsv_clear().\n\n   Selecting\n  \\ref rocsparse_analysis_policy_reuse policy can greatly improve computation\n  performance of meta data. However, the user need to make sure that the sparsity\n  pattern remains unchanged. If this cannot be assured,\n  \\ref rocsparse_analysis_policy_force has to be used.\n\n  \\note\n  If the matrix sparsity pattern changes, the gathered information will become invalid.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[out]\n  info        structure that holds the information collected during\n              the analysis step.\n  @param[in]\n  analysis    \\ref rocsparse_analysis_policy_reuse or\n              \\ref rocsparse_analysis_policy_force.\n  @param[in]\n  solve       \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p csr_row_ptr,\n              \\p csr_col_ind, \\p info or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general and \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_triangular.\n/\n/**@{"]
+    pub fn rocsparse_scsritsv_analysis(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f32,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        analysis: rocsparse_analysis_policy,
+        solve: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcsritsv_analysis(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f64,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        analysis: rocsparse_analysis_policy,
+        solve: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccsritsv_analysis(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_float_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        analysis: rocsparse_analysis_policy,
+        solve: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcsritsv_analysis(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_double_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        analysis: rocsparse_analysis_policy,
+        solve: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = "@}*/\n/*! \\ingroup level2_module\n  \\brief Sparse triangular solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csritsv_clear deallocates all memory that was allocated by\n  rocsparse_scsritsv_analysis(), rocsparse_dcsritsv_analysis(), rocsparse_ccsritsv_analysis()\n  or rocsparse_zcsritsv_analysis(). This is especially useful, if memory is an issue and\n  the analysis data is not required for further computation, e.g. when switching to\n  another sparse matrix format. Calling \\p rocsparse_csritsv_clear is optional. All\n  allocated resources will be cleared, when the opaque \\ref rocsparse_mat_info struct\n  is destroyed using rocsparse_destroy_mat_info().\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix.\n  @param[inout]\n  info        structure that holds the information collected during the analysis step.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info pointer is invalid.\n  \\retval     rocsparse_status_memory_error the buffer holding the meta data could not\n              be deallocated.\n  \\retval     rocsparse_status_internal_error an internal error occurred."]
+    pub fn rocsparse_csritsv_clear(
+        handle: rocsparse_handle,
+        descr: rocsparse_mat_descr,
+        info: rocsparse_mat_info,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Sparse iterative triangular solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csritsv_solve solves iteratively with the use of the Jacobi method a sparse triangular linear system of a sparse\n  \\f$m \\times m\\f$ matrix, defined in CSR storage format, a dense solution vector\n  \\f$y\\f$ and the right-hand side \\f$x\\f$ that is multiplied by \\f$\\alpha\\f$, such that\n  \\f[\n    op(A) \\cdot y = \\alpha \\cdot x,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\p rocsparse_csritsv_solve requires a user allocated temporary buffer. Its size is\n  returned by rocsparse_scsritsv_buffer_size(), rocsparse_dcsritsv_buffer_size(),\n  rocsparse_ccsritsv_buffer_size() or rocsparse_zcsritsv_buffer_size(). Furthermore,\n  analysis meta data is required. It can be obtained by rocsparse_scsritsv_analysis(),\n  rocsparse_dcsritsv_analysis(), rocsparse_ccsritsv_analysis() or\n  rocsparse_zcsritsv_analysis(). \\p rocsparse_csritsv_solve reports the first zero pivot\n  (either numerical or structural zero). The zero pivot status can be checked calling\n  rocsparse_csritsv_zero_pivot(). If\n  \\ref rocsparse_diag_type == \\ref rocsparse_diag_type_unit, no zero pivot will be\n  reported, even if \\f$A_{j,j} = 0\\f$ for some \\f$j\\f$.\n\n  \\note\n  The sparse CSR matrix has to be sorted. This can be achieved by calling\n  rocsparse_csrsort().\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[inout]\n  host_nmaxiter     maximum number of iteration on input and maximum number of iteration on output.\n  @param[in]\n  host_tol          if the pointer is null then loop will execute \\p nmaxiter[0] iterations.\n  @param[out]\n  host_history      (optional, record history)\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start\n              of every row of the sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[in]\n  x           array of \\p m elements, holding the right-hand side.\n  @param[out]\n  y           array of \\p m elements, holding the solution.\n  @param[in]\n  policy      \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p csr_val,\n              \\p csr_row_ptr, \\p csr_col_ind, \\p x or \\p y pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general and \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_triangular.\n\n  \\par Example\n  Consider the lower triangular \\f$m \\times m\\f$ matrix \\f$L\\f$, stored in CSR\n  storage format with unit diagonal. The following example solves \\f$L \\cdot y = x\\f$.\n  \\code{.c}\n      // Create rocSPARSE handle\n      rocsparse_handle handle;\n      rocsparse_create_handle(&handle);\n\n      // Create matrix descriptor\n      rocsparse_mat_descr descr;\n      rocsparse_create_mat_descr(&descr);\n      rocsparse_set_mat_fill_mode(descr, rocsparse_fill_mode_lower);\n      rocsparse_set_mat_diag_type(descr, rocsparse_diag_type_unit);\n\n      // Create matrix info structure\n      rocsparse_mat_info info;\n      rocsparse_create_mat_info(&info);\n\n      // Obtain required buffer size\n      size_t buffer_size;\n      rocsparse_dcsritsv_buffer_size(handle,\n                                   rocsparse_operation_none,\n                                   m,\n                                   nnz,\n                                   descr,\n                                   csr_val,\n                                   csr_row_ptr,\n                                   csr_col_ind,\n                                   info,\n                                   &buffer_size);\n\n      // Allocate temporary buffer\n      void* temp_buffer;\n      hipMalloc(&temp_buffer, buffer_size);\n\n      // Perform analysis step\n      rocsparse_dcsritsv_analysis(handle,\n                                rocsparse_operation_none,\n                                m,\n                                nnz,\n                                descr,\n                                csr_val,\n                                csr_row_ptr,\n                                csr_col_ind,\n                                info,\n                                rocsparse_analysis_policy_reuse,\n                                rocsparse_solve_policy_auto,\n                                temp_buffer);\n\n      // Solve Ly = x\n      rocsparse_int nmaxiter = 200;\n      rocsparse_int maxiter = nmaxiter;\n      tol = 1.0e-4;\n      history[200];\n      rocsparse_dcsritsv_solve(handle,\n                             &maxiter,\n                             &tol,\n                             history,\n                             rocsparse_operation_none,\n                             m,\n                             nnz,\n                             &alpha,\n                             descr,\n                             csr_val,\n                             csr_row_ptr,\n                             csr_col_ind,\n                             info,\n                             x,\n                             y,\n                             rocsparse_solve_policy_auto,\n                             temp_buffer);\n\n      if (maxiter < nmaxiter) {} // convergence\n      else {} // non converged\n      for (int i=0;i<maxiter;++i) printf(\"iter = %d, max residual=%e\\n\", iter, history[i]);\n      // No zero pivot should be found, with L having unit diagonal\n\n      // Clean up\n      hipFree(temp_buffer);\n      rocsparse_destroy_mat_info(info);\n      rocsparse_destroy_mat_descr(descr);\n      rocsparse_destroy_handle(handle);\n  \\endcode\n/\n/**@{"]
+    pub fn rocsparse_scsritsv_solve(
+        handle: rocsparse_handle,
+        host_nmaxiter: *mut rocsparse_int,
+        host_tol: *const f32,
+        host_history: *mut f32,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const f32,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f32,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        x: *const f32,
+        y: *mut f32,
+        policy: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcsritsv_solve(
+        handle: rocsparse_handle,
+        host_nmaxiter: *mut rocsparse_int,
+        host_tol: *const f64,
+        host_history: *mut f64,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const f64,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f64,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        x: *const f64,
+        y: *mut f64,
+        policy: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccsritsv_solve(
+        handle: rocsparse_handle,
+        host_nmaxiter: *mut rocsparse_int,
+        host_tol: *const f32,
+        host_history: *mut f32,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const rocsparse_float_complex,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_float_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        x: *const rocsparse_float_complex,
+        y: *mut rocsparse_float_complex,
+        policy: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcsritsv_solve(
+        handle: rocsparse_handle,
+        host_nmaxiter: *mut rocsparse_int,
+        host_tol: *const f64,
+        host_history: *mut f64,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const rocsparse_double_complex,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_double_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        x: *const rocsparse_double_complex,
+        y: *mut rocsparse_double_complex,
+        policy: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Sparse matrix vector multiplication using CSR storage format\n\n  \\details\n  \\p rocsparse_csrmv_analysis performs the analysis step for rocsparse_scsrmv(),\n  rocsparse_dcsrmv(), rocsparse_ccsrmv() and rocsparse_zcsrmv(). It is expected that\n  this function will be executed only once for a given matrix and particular operation\n  type. The gathered analysis meta data can be cleared by rocsparse_csrmv_clear().\n\n  \\note\n  If the matrix sparsity pattern changes, the gathered information will become invalid.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  n           number of columns of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[out]\n  info        structure that holds the information collected during the analysis step.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p csr_val, \\p csr_row_ptr,\n              \\p csr_col_ind or \\p info pointer is invalid.\n  \\retval     rocsparse_status_memory_error the buffer for the gathered information\n              could not be allocated.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented if \\ref rocsparse_matrix_type is not one of\n              \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric, or\n              \\ref rocsparse_matrix_type_triangular.\n/\n/**@{"]
+    pub fn rocsparse_scsrmv_analysis(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f32,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcsrmv_analysis(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f64,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccsrmv_analysis(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_float_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcsrmv_analysis(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_double_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Sparse matrix vector multiplication using CSR storage format\n\n  \\details\n  \\p rocsparse_csrmv_clear deallocates all memory that was allocated by\n  rocsparse_scsrmv_analysis(), rocsparse_dcsrmv_analysis(), rocsparse_ccsrmv_analysis()\n  or rocsparse_zcsrmv_analysis(). This is especially useful, if memory is an issue and\n  the analysis data is not required anymore for further computation, e.g. when\n  switching to another sparse matrix format.\n\n  \\note\n  Calling \\p rocsparse_csrmv_clear is optional. All allocated resources will be\n  cleared, when the opaque \\ref rocsparse_mat_info struct is destroyed using\n  rocsparse_destroy_mat_info().\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[inout]\n  info        structure that holds the information collected during analysis step.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info pointer is invalid.\n  \\retval     rocsparse_status_memory_error the buffer for the gathered information\n              could not be deallocated.\n  \\retval     rocsparse_status_internal_error an internal error occurred."]
+    pub fn rocsparse_csrmv_clear(
+        handle: rocsparse_handle,
+        info: rocsparse_mat_info,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Sparse matrix vector multiplication using CSR storage format\n\n  \\details\n  \\p rocsparse_csrmv multiplies the scalar \\f$\\alpha\\f$ with a sparse \\f$m \\times n\\f$\n  matrix, defined in CSR storage format, and the dense vector \\f$x\\f$ and adds the\n  result to the dense vector \\f$y\\f$ that is multiplied by the scalar \\f$\\beta\\f$,\n  such that\n  \\f[\n    y := \\alpha \\cdot op(A) \\cdot x + \\beta \\cdot y,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  The \\p info parameter is optional and contains information collected by\n  rocsparse_scsrmv_analysis(), rocsparse_dcsrmv_analysis(), rocsparse_ccsrmv_analysis()\n  or rocsparse_zcsrmv_analysis(). If present, the information will be used to speed up\n  the \\p csrmv computation. If \\p info == \\p NULL, general \\p csrmv routine will be\n  used instead.\n\n  \\code{.c}\n      for(i = 0; i < m; ++i)\n      {\n          y[i] = beta * y[i];\n\n          for(j = csr_row_ptr[i]; j < csr_row_ptr[i + 1]; ++j)\n          {\n              y[i] = y[i] + alpha * csr_val[j] * x[csr_col_ind[j]];\n          }\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  n           number of columns of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start\n              of every row of the sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[in]\n  info        information collected by rocsparse_scsrmv_analysis(),\n              rocsparse_dcsrmv_analysis(), rocsparse_ccsrmv_analysis() or\n              rocsparse_dcsrmv_analysis(), can be \\p NULL if no information is\n              available.\n  @param[in]\n  x           array of \\p n elements (\\f$op(A) == A\\f$) or \\p m elements\n              (\\f$op(A) == A^T\\f$ or \\f$op(A) == A^H\\f$).\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  y           array of \\p m elements (\\f$op(A) == A\\f$) or \\p n elements\n              (\\f$op(A) == A^T\\f$ or \\f$op(A) == A^H\\f$).\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p csr_val,\n              \\p csr_row_ptr, \\p csr_col_ind, \\p x, \\p beta or \\p y pointer is\n              invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  This example performs a sparse matrix vector multiplication in CSR format\n  using additional meta data to improve performance.\n  \\code{.c}\n      // Create matrix info structure\n      rocsparse_mat_info info;\n      rocsparse_create_mat_info(&info);\n\n      // Perform analysis step to obtain meta data\n      rocsparse_scsrmv_analysis(handle,\n                                rocsparse_operation_none,\n                                m,\n                                n,\n                                nnz,\n                                descr,\n                                csr_val,\n                                csr_row_ptr,\n                                csr_col_ind,\n                                info);\n\n      // Compute y = Ax\n      rocsparse_scsrmv(handle,\n                       rocsparse_operation_none,\n                       m,\n                       n,\n                       nnz,\n                       &alpha,\n                       descr,\n                       csr_val,\n                       csr_row_ptr,\n                       csr_col_ind,\n                       info,\n                       x,\n                       &beta,\n                       y);\n\n      // Do more work\n      // ...\n\n      // Clean up\n      rocsparse_destroy_mat_info(info);\n  \\endcode\n/\n/**@{"]
+    pub fn rocsparse_scsrmv(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const f32,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f32,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        x: *const f32,
+        beta: *const f32,
+        y: *mut f32,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcsrmv(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const f64,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f64,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        x: *const f64,
+        beta: *const f64,
+        y: *mut f64,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccsrmv(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const rocsparse_float_complex,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_float_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        x: *const rocsparse_float_complex,
+        beta: *const rocsparse_float_complex,
+        y: *mut rocsparse_float_complex,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcsrmv(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const rocsparse_double_complex,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_double_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        x: *const rocsparse_double_complex,
+        beta: *const rocsparse_double_complex,
+        y: *mut rocsparse_double_complex,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Sparse triangular solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csrsv_zero_pivot returns \\ref rocsparse_status_zero_pivot, if either a\n  structural or numerical zero has been found during rocsparse_scsrsv_solve(),\n  rocsparse_dcsrsv_solve(), rocsparse_ccsrsv_solve() or rocsparse_zcsrsv_solve()\n  computation. The first zero pivot \\f$j\\f$ at \\f$A_{j,j}\\f$ is stored in \\p position,\n  using same index base as the CSR matrix.\n\n  \\p position can be in host or device memory. If no zero pivot has been found,\n  \\p position is set to -1 and \\ref rocsparse_status_success is returned instead.\n\n  \\note \\p rocsparse_csrsv_zero_pivot is a blocking function. It might influence\n  performance negatively.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[inout]\n  position    pointer to zero pivot \\f$j\\f$, can be in host or device memory.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info or \\p position pointer is\n              invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_zero_pivot zero pivot has been found."]
+    pub fn rocsparse_csrsv_zero_pivot(
+        handle: rocsparse_handle,
+        descr: rocsparse_mat_descr,
+        info: rocsparse_mat_info,
+        position: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Sparse triangular solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csrsv_buffer_size returns the size of the temporary storage buffer that\n  is required by rocsparse_scsrsv_analysis(), rocsparse_dcsrsv_analysis(),\n  rocsparse_ccsrsv_analysis(), rocsparse_zcsrsv_analysis(), rocsparse_scsrsv_solve(),\n  rocsparse_dcsrsv_solve(), rocsparse_ccsrsv_solve() and rocsparse_zcsrsv_solve(). The\n  temporary storage buffer must be allocated by the user. The size of the temporary\n  storage buffer is identical to the size returned by rocsparse_scsrilu0_buffer_size(),\n  rocsparse_dcsrilu0_buffer_size(), rocsparse_ccsrilu0_buffer_size() and\n  rocsparse_zcsrilu0_buffer_size() if the matrix sparsity pattern is identical. The\n  user allocated buffer can thus be shared between subsequent calls to those functions.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[out]\n  info        structure that holds the information collected during the analysis step.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_scsrsv_analysis(), rocsparse_dcsrsv_analysis(),\n              rocsparse_ccsrsv_analysis(), rocsparse_zcsrsv_analysis(),\n              rocsparse_scsrsv_solve(), rocsparse_dcsrsv_solve(),\n              rocsparse_ccsrsv_solve() and rocsparse_zcsrsv_solve().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p csr_val, \\p csr_row_ptr,\n              \\p csr_col_ind, \\p info or \\p buffer_size pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
+    pub fn rocsparse_scsrsv_buffer_size(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f32,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcsrsv_buffer_size(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f64,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccsrsv_buffer_size(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_float_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcsrsv_buffer_size(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_double_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = "@}*/\n/*! \\ingroup level2_module\n  \\brief Sparse triangular solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csrsv_analysis performs the analysis step for rocsparse_scsrsv_solve(),\n  rocsparse_dcsrsv_solve(), rocsparse_ccsrsv_solve() and rocsparse_zcsrsv_solve(). It\n  is expected that this function will be executed only once for a given matrix and\n  particular operation type. The analysis meta data can be cleared by\n  rocsparse_csrsv_clear().\n\n  \\p rocsparse_csrsv_analysis can share its meta data with\n  rocsparse_scsrsm_analysis(), rocsparse_dcsrsm_analysis(),\n  rocsparse_ccsrsm_analysis(), rocsparse_zcsrsm_analysis(),\n  rocsparse_scsrilu0_analysis(), rocsparse_dcsrilu0_analysis(),\n  rocsparse_ccsrilu0_analysis(), rocsparse_zcsrilu0_analysis(),\n  rocsparse_scsric0_analysis(), rocsparse_dcsric0_analysis(),\n  rocsparse_ccsric0_analysis() and rocsparse_zcsric0_analysis(). Selecting\n  \\ref rocsparse_analysis_policy_reuse policy can greatly improve computation\n  performance of meta data. However, the user need to make sure that the sparsity\n  pattern remains unchanged. If this cannot be assured,\n  \\ref rocsparse_analysis_policy_force has to be used.\n\n  \\note\n  If the matrix sparsity pattern changes, the gathered information will become invalid.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[out]\n  info        structure that holds the information collected during\n              the analysis step.\n  @param[in]\n  analysis    \\ref rocsparse_analysis_policy_reuse or\n              \\ref rocsparse_analysis_policy_force.\n  @param[in]\n  solve       \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p csr_row_ptr,\n              \\p csr_col_ind, \\p info or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
+    pub fn rocsparse_scsrsv_analysis(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f32,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        analysis: rocsparse_analysis_policy,
+        solve: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcsrsv_analysis(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f64,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        analysis: rocsparse_analysis_policy,
+        solve: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccsrsv_analysis(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_float_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        analysis: rocsparse_analysis_policy,
+        solve: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcsrsv_analysis(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        nnz: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_double_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        analysis: rocsparse_analysis_policy,
+        solve: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Sparse triangular solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csrsv_clear deallocates all memory that was allocated by\n  rocsparse_scsrsv_analysis(), rocsparse_dcsrsv_analysis(), rocsparse_ccsrsv_analysis()\n  or rocsparse_zcsrsv_analysis(). This is especially useful, if memory is an issue and\n  the analysis data is not required for further computation, e.g. when switching to\n  another sparse matrix format. Calling \\p rocsparse_csrsv_clear is optional. All\n  allocated resources will be cleared, when the opaque \\ref rocsparse_mat_info struct\n  is destroyed using rocsparse_destroy_mat_info().\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix.\n  @param[inout]\n  info        structure that holds the information collected during the analysis step.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info pointer is invalid.\n  \\retval     rocsparse_status_memory_error the buffer holding the meta data could not\n              be deallocated.\n  \\retval     rocsparse_status_internal_error an internal error occurred."]
+    pub fn rocsparse_csrsv_clear(
+        handle: rocsparse_handle,
+        descr: rocsparse_mat_descr,
+        info: rocsparse_mat_info,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Sparse triangular solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csrsv_solve solves a sparse triangular linear system of a sparse\n  \\f$m \\times m\\f$ matrix, defined in CSR storage format, a dense solution vector\n  \\f$y\\f$ and the right-hand side \\f$x\\f$ that is multiplied by \\f$\\alpha\\f$, such that\n  \\f[\n    op(A) \\cdot y = \\alpha \\cdot x,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\p rocsparse_csrsv_solve requires a user allocated temporary buffer. Its size is\n  returned by rocsparse_scsrsv_buffer_size(), rocsparse_dcsrsv_buffer_size(),\n  rocsparse_ccsrsv_buffer_size() or rocsparse_zcsrsv_buffer_size(). Furthermore,\n  analysis meta data is required. It can be obtained by rocsparse_scsrsv_analysis(),\n  rocsparse_dcsrsv_analysis(), rocsparse_ccsrsv_analysis() or\n  rocsparse_zcsrsv_analysis(). \\p rocsparse_csrsv_solve reports the first zero pivot\n  (either numerical or structural zero). The zero pivot status can be checked calling\n  rocsparse_csrsv_zero_pivot(). If\n  \\ref rocsparse_diag_type == \\ref rocsparse_diag_type_unit, no zero pivot will be\n  reported, even if \\f$A_{j,j} = 0\\f$ for some \\f$j\\f$.\n\n  \\note\n  The sparse CSR matrix has to be sorted. This can be achieved by calling\n  rocsparse_csrsort().\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  Currently, only \\p trans == \\ref rocsparse_operation_none and\n  \\p trans == \\ref rocsparse_operation_transpose is supported.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start\n              of every row of the sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[in]\n  x           array of \\p m elements, holding the right-hand side.\n  @param[out]\n  y           array of \\p m elements, holding the solution.\n  @param[in]\n  policy      \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p csr_val,\n              \\p csr_row_ptr, \\p csr_col_ind, \\p x or \\p y pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  Consider the lower triangular \\f$m \\times m\\f$ matrix \\f$L\\f$, stored in CSR\n  storage format with unit diagonal. The following example solves \\f$L \\cdot y = x\\f$.\n  \\code{.c}\n      // Create rocSPARSE handle\n      rocsparse_handle handle;\n      rocsparse_create_handle(&handle);\n\n      // Create matrix descriptor\n      rocsparse_mat_descr descr;\n      rocsparse_create_mat_descr(&descr);\n      rocsparse_set_mat_fill_mode(descr, rocsparse_fill_mode_lower);\n      rocsparse_set_mat_diag_type(descr, rocsparse_diag_type_unit);\n\n      // Create matrix info structure\n      rocsparse_mat_info info;\n      rocsparse_create_mat_info(&info);\n\n      // Obtain required buffer size\n      size_t buffer_size;\n      rocsparse_dcsrsv_buffer_size(handle,\n                                   rocsparse_operation_none,\n                                   m,\n                                   nnz,\n                                   descr,\n                                   csr_val,\n                                   csr_row_ptr,\n                                   csr_col_ind,\n                                   info,\n                                   &buffer_size);\n\n      // Allocate temporary buffer\n      void* temp_buffer;\n      hipMalloc(&temp_buffer, buffer_size);\n\n      // Perform analysis step\n      rocsparse_dcsrsv_analysis(handle,\n                                rocsparse_operation_none,\n                                m,\n                                nnz,\n                                descr,\n                                csr_val,\n                                csr_row_ptr,\n                                csr_col_ind,\n                                info,\n                                rocsparse_analysis_policy_reuse,\n                                rocsparse_solve_policy_auto,\n                                temp_buffer);\n\n      // Solve Ly = x\n      rocsparse_dcsrsv_solve(handle,\n                             rocsparse_operation_none,\n                             m,\n                             nnz,\n                             &alpha,\n                             descr,\n                             csr_val,\n                             csr_row_ptr,\n                             csr_col_ind,\n                             info,\n                             x,\n                             y,\n                             rocsparse_solve_policy_auto,\n                             temp_buffer);\n\n      // No zero pivot should be found, with L having unit diagonal\n\n      // Clean up\n      hipFree(temp_buffer);\n      rocsparse_destroy_mat_info(info);\n      rocsparse_destroy_mat_descr(descr);\n      rocsparse_destroy_handle(handle);\n  \\endcode\n/\n/**@{"]
+    pub fn rocsparse_scsrsv_solve(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const f32,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f32,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        x: *const f32,
+        y: *mut f32,
+        policy: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcsrsv_solve(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const f64,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f64,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        x: *const f64,
+        y: *mut f64,
+        policy: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccsrsv_solve(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const rocsparse_float_complex,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_float_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        x: *const rocsparse_float_complex,
+        y: *mut rocsparse_float_complex,
+        policy: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcsrsv_solve(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const rocsparse_double_complex,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_double_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        info: rocsparse_mat_info,
+        x: *const rocsparse_double_complex,
+        y: *mut rocsparse_double_complex,
+        policy: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Sparse matrix vector multiplication using ELL storage format\n\n  \\details\n  \\p rocsparse_ellmv multiplies the scalar \\f$\\alpha\\f$ with a sparse \\f$m \\times n\\f$\n  matrix, defined in ELL storage format, and the dense vector \\f$x\\f$ and adds the\n  result to the dense vector \\f$y\\f$ that is multiplied by the scalar \\f$\\beta\\f$,\n  such that\n  \\f[\n    y := \\alpha \\cdot op(A) \\cdot x + \\beta \\cdot y,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\code{.c}\n      for(i = 0; i < m; ++i)\n      {\n          y[i] = beta * y[i];\n\n          for(p = 0; p < ell_width; ++p)\n          {\n              idx = p * m + i;\n\n              if((ell_col_ind[idx] >= 0) && (ell_col_ind[idx] < n))\n              {\n                  y[i] = y[i] + alpha * ell_val[idx] * x[ell_col_ind[idx]];\n              }\n          }\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  m           number of rows of the sparse ELL matrix.\n  @param[in]\n  n           number of columns of the sparse ELL matrix.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse ELL matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  ell_val     array that contains the elements of the sparse ELL matrix. Padded\n              elements should be zero.\n  @param[in]\n  ell_col_ind array that contains the column indices of the sparse ELL matrix.\n              Padded column indices should be -1.\n  @param[in]\n  ell_width   number of non-zero elements per row of the sparse ELL matrix.\n  @param[in]\n  x           array of \\p n elements (\\f$op(A) == A\\f$) or \\p m elements\n              (\\f$op(A) == A^T\\f$ or \\f$op(A) == A^H\\f$).\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  y           array of \\p m elements (\\f$op(A) == A\\f$) or \\p n elements\n              (\\f$op(A) == A^T\\f$ or \\f$op(A) == A^H\\f$).\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p ell_width is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p ell_val,\n              \\p ell_col_ind, \\p x, \\p beta or \\p y pointer is invalid.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
+    pub fn rocsparse_sellmv(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        alpha: *const f32,
+        descr: rocsparse_mat_descr,
+        ell_val: *const f32,
+        ell_col_ind: *const rocsparse_int,
+        ell_width: rocsparse_int,
+        x: *const f32,
+        beta: *const f32,
+        y: *mut f32,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dellmv(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        alpha: *const f64,
+        descr: rocsparse_mat_descr,
+        ell_val: *const f64,
+        ell_col_ind: *const rocsparse_int,
+        ell_width: rocsparse_int,
+        x: *const f64,
+        beta: *const f64,
+        y: *mut f64,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cellmv(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        alpha: *const rocsparse_float_complex,
+        descr: rocsparse_mat_descr,
+        ell_val: *const rocsparse_float_complex,
+        ell_col_ind: *const rocsparse_int,
+        ell_width: rocsparse_int,
+        x: *const rocsparse_float_complex,
+        beta: *const rocsparse_float_complex,
+        y: *mut rocsparse_float_complex,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zellmv(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        alpha: *const rocsparse_double_complex,
+        descr: rocsparse_mat_descr,
+        ell_val: *const rocsparse_double_complex,
+        ell_col_ind: *const rocsparse_int,
+        ell_width: rocsparse_int,
+        x: *const rocsparse_double_complex,
+        beta: *const rocsparse_double_complex,
+        y: *mut rocsparse_double_complex,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Sparse matrix vector multiplication using GEBSR storage format\n\n  \\details\n  \\p rocsparse_gebsrmv multiplies the scalar \\f$\\alpha\\f$ with a sparse\n  \\f$(mb \\cdot \\text{row_block_dim}) \\times (nb \\cdot \\text{col_block_dim})\\f$\n  matrix, defined in GEBSR storage format, and the dense vector \\f$x\\f$ and adds the\n  result to the dense vector \\f$y\\f$ that is multiplied by the scalar \\f$\\beta\\f$,\n  such that\n  \\f[\n    y := \\alpha \\cdot op(A) \\cdot x + \\beta \\cdot y,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  Currently, only \\p trans == \\ref rocsparse_operation_none is supported.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         matrix storage of GEBSR blocks.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  mb          number of block rows of the sparse GEBSR matrix.\n  @param[in]\n  nb          number of block columns of the sparse GEBSR matrix.\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse GEBSR matrix.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse GEBSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  bsr_val     array of \\p nnzb blocks of the sparse GEBSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of\n              the sparse GEBSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnz containing the block column indices of the sparse\n              GEBSR matrix.\n  @param[in]\n  row_block_dim row block dimension of the sparse GEBSR matrix.\n  @param[in]\n  col_block_dim column block dimension of the sparse GEBSR matrix.\n  @param[in]\n  x           array of \\p nb*col_block_dim elements (\\f$op(A) = A\\f$) or \\p mb*row_block_dim\n              elements (\\f$op(A) = A^T\\f$ or \\f$op(A) = A^H\\f$).\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  y           array of \\p mb*row_block_dim elements (\\f$op(A) = A\\f$) or \\p nb*col_block_dim\n              elements (\\f$op(A) = A^T\\f$ or \\f$op(A) = A^H\\f$).\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nb, \\p nnzb, \\p row_block_dim\n              or \\p col_block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p bsr_val,\n              \\p bsr_row_ind, \\p bsr_col_ind, \\p x, \\p beta or \\p y pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans != \\ref rocsparse_operation_none or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
+    pub fn rocsparse_sgebsrmv(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const f32,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f32,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        x: *const f32,
+        beta: *const f32,
+        y: *mut f32,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dgebsrmv(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const f64,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f64,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        x: *const f64,
+        beta: *const f64,
+        y: *mut f64,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cgebsrmv(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const rocsparse_float_complex,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_float_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        x: *const rocsparse_float_complex,
+        beta: *const rocsparse_float_complex,
+        y: *mut rocsparse_float_complex,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zgebsrmv(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans: rocsparse_operation,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const rocsparse_double_complex,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_double_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        x: *const rocsparse_double_complex,
+        beta: *const rocsparse_double_complex,
+        y: *mut rocsparse_double_complex,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Dense matrix sparse vector multiplication\n\n  \\details\n  \\p rocsparse_gemvi_buffer_size returns the size of the temporary storage buffer\n  required by rocsparse_sgemvi(), rocsparse_dgemvi(), rocsparse_cgemvi() or\n  rocsparse_zgemvi(). The temporary storage buffer must be allocated by the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  m           number of rows of the dense matrix.\n  @param[in]\n  n           number of columns of the dense matrix.\n  @param[in]\n  nnz         number of non-zero entries in the sparse vector.\n  @param[out]\n  buffer_size temporary storage buffer size.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n, or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p buffer_size pointer is invalid.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans != \\ref rocsparse_operation_none or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
+    pub fn rocsparse_sgemvi_buffer_size(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dgemvi_buffer_size(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cgemvi_buffer_size(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zgemvi_buffer_size(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Dense matrix sparse vector multiplication\n\n  \\details\n  \\p rocsparse_gemvi multiplies the scalar \\f$\\alpha\\f$ with a dense \\f$m \\times n\\f$\n  matrix \\f$A\\f$ and the sparse vector \\f$x\\f$ and adds the result to the dense vector\n  \\f$y\\f$ that is multiplied by the scalar \\f$\\beta\\f$, such that\n  \\f[\n    y := \\alpha \\cdot op(A) \\cdot x + \\beta \\cdot y,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\p rocsparse_gemvi requires a user allocated temporary buffer. Its size is returned\n  by rocsparse_sgemvi_buffer_size(), rocsparse_dgemvi_buffer_size(),\n  rocsparse_cgemvi_buffer_size() or rocsparse_zgemvi_buffer_size().\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  Currently, only \\p trans == \\ref rocsparse_operation_none is supported.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  m           number of rows of the dense matrix.\n  @param[in]\n  n           number of columns of the dense matrix.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  A           pointer to the dense matrix.\n  @param[in]\n  lda         leading dimension of the dense matrix\n  @param[in]\n  nnz         number of non-zero entries in the sparse vector\n  @param[in]\n  x_val       array of \\p nnz elements containing the values of the sparse vector\n  @param[in]\n  x_ind       array of \\p nnz elements containing the indices of the sparse vector\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  y           array of \\p m elements (\\f$op(A) == A\\f$) or \\p n elements\n              (\\f$op(A) == A^T\\f$ or \\f$op(A) == A^H\\f$).\n  @param[in]\n  idx_base    rocsparse_index_base_zero or rocsparse_index_base_one.\n  @param[in]\n  temp_buffer temporary storage buffer\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n, \\p lda or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p alpha, \\p A, \\p x_val, \\p x_ind,\n              \\p beta, \\p y or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans != \\ref rocsparse_operation_none or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
+    pub fn rocsparse_sgemvi(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocsparse_int,
+        nnz: rocsparse_int,
+        x_val: *const f32,
+        x_ind: *const rocsparse_int,
+        beta: *const f32,
+        y: *mut f32,
+        idx_base: rocsparse_index_base,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dgemvi(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocsparse_int,
+        nnz: rocsparse_int,
+        x_val: *const f64,
+        x_ind: *const rocsparse_int,
+        beta: *const f64,
+        y: *mut f64,
+        idx_base: rocsparse_index_base,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cgemvi(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        alpha: *const rocsparse_float_complex,
+        A: *const rocsparse_float_complex,
+        lda: rocsparse_int,
+        nnz: rocsparse_int,
+        x_val: *const rocsparse_float_complex,
+        x_ind: *const rocsparse_int,
+        beta: *const rocsparse_float_complex,
+        y: *mut rocsparse_float_complex,
+        idx_base: rocsparse_index_base,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zgemvi(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        alpha: *const rocsparse_double_complex,
+        A: *const rocsparse_double_complex,
+        lda: rocsparse_int,
+        nnz: rocsparse_int,
+        x_val: *const rocsparse_double_complex,
+        x_ind: *const rocsparse_int,
+        beta: *const rocsparse_double_complex,
+        y: *mut rocsparse_double_complex,
+        idx_base: rocsparse_index_base,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level2_module\n  \\brief Sparse matrix vector multiplication using HYB storage format\n\n  \\details\n  \\p rocsparse_hybmv multiplies the scalar \\f$\\alpha\\f$ with a sparse \\f$m \\times n\\f$\n  matrix, defined in HYB storage format, and the dense vector \\f$x\\f$ and adds the\n  result to the dense vector \\f$y\\f$ that is multiplied by the scalar \\f$\\beta\\f$,\n  such that\n  \\f[\n    y := \\alpha \\cdot op(A) \\cdot x + \\beta \\cdot y,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans       matrix operation type.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse HYB matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  hyb         matrix in HYB storage format.\n  @param[in]\n  x           array of \\p n elements (\\f$op(A) == A\\f$) or \\p m elements\n              (\\f$op(A) == A^T\\f$ or \\f$op(A) == A^H\\f$).\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  y           array of \\p m elements (\\f$op(A) == A\\f$) or \\p n elements\n              (\\f$op(A) == A^T\\f$ or \\f$op(A) == A^H\\f$).\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p hyb structure was not initialized with\n              valid matrix sizes.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p hyb, \\p x,\n              \\p beta or \\p y pointer is invalid.\n  \\retval     rocsparse_status_invalid_value \\p hyb structure was not initialized\n              with a valid partitioning type.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_memory_error the buffer could not be allocated.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans != \\ref rocsparse_operation_none or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
+    pub fn rocsparse_shybmv(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        alpha: *const f32,
+        descr: rocsparse_mat_descr,
+        hyb: rocsparse_hyb_mat,
+        x: *const f32,
+        beta: *const f32,
+        y: *mut f32,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dhybmv(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        alpha: *const f64,
+        descr: rocsparse_mat_descr,
+        hyb: rocsparse_hyb_mat,
+        x: *const f64,
+        beta: *const f64,
+        y: *mut f64,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_chybmv(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        alpha: *const rocsparse_float_complex,
+        descr: rocsparse_mat_descr,
+        hyb: rocsparse_hyb_mat,
+        x: *const rocsparse_float_complex,
+        beta: *const rocsparse_float_complex,
+        y: *mut rocsparse_float_complex,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zhybmv(
+        handle: rocsparse_handle,
+        trans: rocsparse_operation,
+        alpha: *const rocsparse_double_complex,
+        descr: rocsparse_mat_descr,
+        hyb: rocsparse_hyb_mat,
+        x: *const rocsparse_double_complex,
+        beta: *const rocsparse_double_complex,
+        y: *mut rocsparse_double_complex,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level3_module\n  \\brief Sparse matrix dense matrix multiplication using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrmm multiplies the scalar \\f$\\alpha\\f$ with a sparse \\f$mb \\times kb\\f$\n  matrix \\f$A\\f$, defined in BSR storage format, and the dense \\f$k \\times n\\f$\n  matrix \\f$B\\f$ (where \\f$k = block\\_dim \\times kb\\f$) and adds the result to the dense\n  \\f$m \\times n\\f$ matrix \\f$C\\f$ (where \\f$m = block\\_dim \\times mb\\f$) that\n  is multiplied by the scalar \\f$\\beta\\f$, such that\n  \\f[\n    C := \\alpha \\cdot op(A) \\cdot op(B) + \\beta \\cdot C,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans_A == rocsparse_operation_none} \\\\\n    \\end{array}\n    \\right.\n  \\f]\n  and\n  \\f[\n    op(B) = \\left\\{\n    \\begin{array}{ll}\n        B,   & \\text{if trans_B == rocsparse_operation_none} \\\\\n        B^T, & \\text{if trans_B == rocsparse_operation_transpose} \\\\\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  Currently, only \\p trans_A == \\ref rocsparse_operation_none is supported.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         the storage format of the blocks. Can be \\ref rocsparse_direction_row or \\ref rocsparse_direction_column.\n  @param[in]\n  trans_A     matrix \\f$A\\f$ operation type. Currently, only \\ref rocsparse_operation_none is supported.\n  @param[in]\n  trans_B     matrix \\f$B\\f$ operation type. Currently, only \\ref rocsparse_operation_none and rocsparse_operation_transpose\n              are supported.\n  @param[in]\n  mb          number of block rows of the sparse BSR matrix \\f$A\\f$.\n  @param[in]\n  n           number of columns of the dense matrix \\f$op(B)\\f$ and \\f$C\\f$.\n  @param[in]\n  kb          number of block columns of the sparse BSR matrix \\f$A\\f$.\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse BSR matrix \\f$A\\f$.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse BSR matrix \\f$A\\f$. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  bsr_val     array of \\p nnzb*block_dim*block_dim elements of the sparse BSR matrix \\f$A\\f$.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of the\n              sparse BSR matrix \\f$A\\f$.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the block column indices of the sparse\n              BSR matrix \\f$A\\f$.\n  @param[in]\n  block_dim   size of the blocks in the sparse BSR matrix.\n  @param[in]\n  B           array of dimension \\f$ldb \\times n\\f$ (\\f$op(B) == B\\f$),\n              \\f$ldb \\times k\\f$ otherwise.\n  @param[in]\n  ldb         leading dimension of \\f$B\\f$, must be at least \\f$\\max{(1, k)}\\f$ (\\f$ op(B) == B\\f$) where \\f$k = block\\_dim \\times kb\\f$,\n  \\f$\\max{(1, n)}\\f$ otherwise.\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  C           array of dimension \\f$ldc \\times n\\f$.\n  @param[in]\n  ldc         leading dimension of \\f$C\\f$, must be at least \\f$\\max{(1, m)}\\f$ (\\f$ op(A) == A\\f$) where \\f$m = block\\_dim \\times mb\\f$,\n  \\f$\\max{(1, k)}\\f$ where \\f$k = block\\_dim \\times kb\\f$ otherwise.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p n, \\p kb, \\p nnzb, \\p ldb or \\p ldc\n              is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p bsr_val,\n              \\p bsr_row_ptr, \\p bsr_col_ind, \\p B, \\p beta or \\p C pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans_A != \\ref rocsparse_operation_none or\n              \\p trans_B == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  This example multiplies a BSR matrix with a dense matrix.\n  \\code{.c}\n      //     1 2 0 3 0 0\n      // A = 0 4 5 0 0 0\n      //     0 0 0 7 8 0\n      //     0 0 1 2 4 1\n\n      rocsparse_int block_dim = 2;\n      rocsparse_int mb   = 2;\n      rocsparse_int kb   = 3;\n      rocsparse_int nnzb = 4;\n      rocsparse_direction dir = rocsparse_direction_row;\n\n      bsr_row_ptr[mb+1]                 = {0, 2, 4};                                        // device memory\n      bsr_col_ind[nnzb]                 = {0, 1, 1, 2};                                     // device memory\n      bsr_val[nnzb*block_dim*block_dim] = {1, 2, 0, 4, 0, 3, 5, 0, 0, 7, 1, 2, 8, 0, 4, 1}; // device memory\n\n      // Set dimension n of B\n      rocsparse_int n = 64;\n      rocsparse_int m = mb * block_dim;\n      rocsparse_int k = kb * block_dim;\n\n      // Allocate and generate dense matrix B\n      std::vector<float> hB(k * n);\n      for(rocsparse_int i = 0; i < k * n; ++i)\n      {\n          hB[i] = static_cast<float>(rand()) / RAND_MAX;\n      }\n\n      // Copy B to the device\n      float* B;\n      hipMalloc((void**)&B, sizeof(float) * k * n);\n      hipMemcpy(B, hB.data(), sizeof(float) * k * n, hipMemcpyHostToDevice);\n\n      // alpha and beta\n      float alpha = 1.0f;\n      float beta  = 0.0f;\n\n      // Allocate memory for the resulting matrix C\n      float* C;\n      hipMalloc((void**)&C, sizeof(float) * m * n);\n\n      // Perform the matrix multiplication\n      rocsparse_sbsrmm(handle,\n                       dir,\n                       rocsparse_operation_none,\n                       rocsparse_operation_none,\n                       mb,\n                       n,\n                       kb,\n                       nnzb,\n                       &alpha,\n                       descr,\n                       bsr_val,\n                       bsr_row_ptr,\n                       bsr_col_ind,\n                       block_dim,\n                       B,\n                       k,\n                       &beta,\n                       C,\n                       m);\n  \\endcode\n/\n/**@{"]
+    pub fn rocsparse_sbsrmm(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        mb: rocsparse_int,
+        n: rocsparse_int,
+        kb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const f32,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f32,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        B: *const f32,
+        ldb: rocsparse_int,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dbsrmm(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        mb: rocsparse_int,
+        n: rocsparse_int,
+        kb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const f64,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f64,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        B: *const f64,
+        ldb: rocsparse_int,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cbsrmm(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        mb: rocsparse_int,
+        n: rocsparse_int,
+        kb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const rocsparse_float_complex,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_float_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        B: *const rocsparse_float_complex,
+        ldb: rocsparse_int,
+        beta: *const rocsparse_float_complex,
+        C: *mut rocsparse_float_complex,
+        ldc: rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zbsrmm(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        mb: rocsparse_int,
+        n: rocsparse_int,
+        kb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const rocsparse_double_complex,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_double_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        B: *const rocsparse_double_complex,
+        ldb: rocsparse_int,
+        beta: *const rocsparse_double_complex,
+        C: *mut rocsparse_double_complex,
+        ldc: rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level3_module\n  \\brief Sparse triangular system solve using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrsm_zero_pivot returns \\ref rocsparse_status_zero_pivot, if either a\n  structural or numerical zero has been found during rocsparse_sbsrsm_solve(),\n  rocsparse_dbsrsm_solve(), rocsparse_cbsrsm_solve() or rocsparse_zbsrsm_solve()\n  computation. The first zero pivot \\f$j\\f$ at \\f$A_{j,j}\\f$ is stored in \\p position,\n  using same index base as the BSR matrix.\n\n  \\p position can be in host or device memory. If no zero pivot has been found,\n  \\p position is set to -1 and \\ref rocsparse_status_success is returned instead.\n\n  \\note \\p rocsparse_bsrsm_zero_pivot is a blocking function. It might influence\n  performance negatively.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[inout]\n  position    pointer to zero pivot \\f$j\\f$, can be in host or device memory.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info or \\p position pointer is\n              invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_zero_pivot zero pivot has been found."]
+    pub fn rocsparse_bsrsm_zero_pivot(
+        handle: rocsparse_handle,
+        info: rocsparse_mat_info,
+        position: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level3_module\n  \\brief Sparse triangular system solve using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrsm_buffer_size returns the size of the temporary storage buffer that\n  is required by rocsparse_sbsrsm_analysis(), rocsparse_dbsrsm_analysis(),\n  rocsparse_cbsrsm_analysis(), rocsparse_zbsrsm_analysis(), rocsparse_sbsrsm_solve(),\n  rocsparse_dbsrsm_solve(), rocsparse_cbsrsm_solve() and rocsparse_zbsrsm_solve(). The\n  temporary storage buffer must be allocated by the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         matrix storage of BSR blocks.\n  @param[in]\n  trans_A     matrix A operation type.\n  @param[in]\n  trans_X     matrix X operation type.\n  @param[in]\n  mb          number of block rows of the sparse BSR matrix A.\n  @param[in]\n  nrhs        number of columns of the dense matrix op(X).\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse BSR matrix A.\n  @param[in]\n  descr       descriptor of the sparse BSR matrix A.\n  @param[in]\n  bsr_val     array of \\p nnzb blocks of the sparse BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of\n              the sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb containing the block column indices of the sparse\n              BSR matrix.\n  @param[in]\n  block_dim   block dimension of the sparse BSR matrix.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_sbsrsm_analysis(), rocsparse_dbsrsm_analysis(),\n              rocsparse_cbsrsm_analysis(), rocsparse_zbsrsm_analysis(),\n              rocsparse_sbsrsm_solve(), rocsparse_dbsrsm_solve(),\n              rocsparse_cbsrsm_solve() and rocsparse_zbsrsm_solve().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nrhs, \\p nnzb or \\p block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p bsr_val,\n              \\p bsr_row_ptr, \\p bsr_col_ind, \\p info or \\p buffer_size pointer\n              is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans_A == \\ref rocsparse_operation_conjugate_transpose,\n              \\p trans_X == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
+    pub fn rocsparse_sbsrsm_buffer_size(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans_A: rocsparse_operation,
+        trans_X: rocsparse_operation,
+        mb: rocsparse_int,
+        nrhs: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f32,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dbsrsm_buffer_size(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans_A: rocsparse_operation,
+        trans_X: rocsparse_operation,
+        mb: rocsparse_int,
+        nrhs: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f64,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cbsrsm_buffer_size(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans_A: rocsparse_operation,
+        trans_X: rocsparse_operation,
+        mb: rocsparse_int,
+        nrhs: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_float_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zbsrsm_buffer_size(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans_A: rocsparse_operation,
+        trans_X: rocsparse_operation,
+        mb: rocsparse_int,
+        nrhs: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_double_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level3_module\n  \\brief Sparse triangular system solve using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrsm_analysis performs the analysis step for rocsparse_sbsrsm_solve(),\n  rocsparse_dbsrsm_solve(), rocsparse_cbsrsm_solve() and rocsparse_zbsrsm_solve(). It\n  is expected that this function will be executed only once for a given matrix and\n  particular operation type. The analysis meta data can be cleared by\n  rocsparse_bsrsm_clear().\n\n  \\p rocsparse_bsrsm_analysis can share its meta data with\n  rocsparse_sbsrilu0_analysis(), rocsparse_dbsrilu0_analysis(),\n  rocsparse_cbsrilu0_analysis(), rocsparse_zbsrilu0_analysis(),\n  rocsparse_sbsric0_analysis(), rocsparse_dbsric0_analysis(),\n  rocsparse_cbsric0_analysis(), rocsparse_zbsric0_analysis(),\n  rocsparse_sbsrsv_analysis(), rocsparse_dbsrsv_analysis(),\n  rocsparse_cbsrsv_analysis() and rocsparse_zbsrsv_analysis(). Selecting\n  \\ref rocsparse_analysis_policy_reuse policy can greatly improve computation\n  performance of meta data. However, the user need to make sure that the sparsity\n  pattern remains unchanged. If this cannot be assured,\n  \\ref rocsparse_analysis_policy_force has to be used.\n\n  \\note\n  If the matrix sparsity pattern changes, the gathered information will become invalid.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         matrix storage of BSR blocks.\n  @param[in]\n  trans_A     matrix A operation type.\n  @param[in]\n  trans_X     matrix X operation type.\n  @param[in]\n  mb          number of block rows of the sparse BSR matrix A.\n  @param[in]\n  nrhs        number of columns of the dense matrix op(X).\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse BSR matrix A.\n  @param[in]\n  descr       descriptor of the sparse BSR matrix A.\n  @param[in]\n  bsr_val     array of \\p nnzb blocks of the sparse BSR matrix A.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of\n              the sparse BSR matrix A.\n  @param[in]\n  bsr_col_ind array of \\p nnzb containing the block column indices of the sparse\n              BSR matrix A.\n  @param[in]\n  block_dim   block dimension of the sparse BSR matrix A.\n  @param[out]\n  info        structure that holds the information collected during the analysis step.\n  @param[in]\n  analysis    \\ref rocsparse_analysis_policy_reuse or\n              \\ref rocsparse_analysis_policy_force.\n  @param[in]\n  solve       \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nrhs, \\p nnzb or \\p block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p bsr_val, \\p bsr_row_ptr,\n              \\p bsr_col_ind, \\p info or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans_A == \\ref rocsparse_operation_conjugate_transpose,\n              \\p trans_X == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
+    pub fn rocsparse_sbsrsm_analysis(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans_A: rocsparse_operation,
+        trans_X: rocsparse_operation,
+        mb: rocsparse_int,
+        nrhs: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f32,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        analysis: rocsparse_analysis_policy,
+        solve: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dbsrsm_analysis(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans_A: rocsparse_operation,
+        trans_X: rocsparse_operation,
+        mb: rocsparse_int,
+        nrhs: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f64,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        analysis: rocsparse_analysis_policy,
+        solve: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cbsrsm_analysis(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans_A: rocsparse_operation,
+        trans_X: rocsparse_operation,
+        mb: rocsparse_int,
+        nrhs: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_float_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        analysis: rocsparse_analysis_policy,
+        solve: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zbsrsm_analysis(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans_A: rocsparse_operation,
+        trans_X: rocsparse_operation,
+        mb: rocsparse_int,
+        nrhs: rocsparse_int,
+        nnzb: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_double_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        analysis: rocsparse_analysis_policy,
+        solve: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level3_module\n  \\brief Sparse triangular system solve using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrsm_clear deallocates all memory that was allocated by\n  rocsparse_sbsrsm_analysis(), rocsparse_dbsrsm_analysis(), rocsparse_cbsrsm_analysis()\n  or rocsparse_zbsrsm_analysis(). This is especially useful, if memory is an issue and\n  the analysis data is not required for further computation, e.g. when switching to\n  another sparse matrix format. Calling \\p rocsparse_bsrsm_clear is optional. All\n  allocated resources will be cleared, when the opaque \\ref rocsparse_mat_info struct\n  is destroyed using rocsparse_destroy_mat_info().\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[inout]\n  info        structure that holds the information collected during the analysis step.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info pointer is invalid.\n  \\retval     rocsparse_status_memory_error the buffer holding the meta data could not\n              be deallocated.\n  \\retval     rocsparse_status_internal_error an internal error occurred."]
+    pub fn rocsparse_bsrsm_clear(
+        handle: rocsparse_handle,
+        info: rocsparse_mat_info,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level3_module\n  \\brief Sparse triangular system solve using BSR storage format\n\n  \\details\n  \\p rocsparse_bsrsm_solve solves a sparse triangular linear system of a sparse\n  \\f$m \\times m\\f$ matrix, defined in BSR storage format, a dense solution matrix\n  \\f$X\\f$ and the right-hand side matrix \\f$B\\f$ that is multiplied by \\f$\\alpha\\f$, such that\n  \\f[\n    op(A) \\cdot op(X) = \\alpha \\cdot op(B),\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans_A == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans_A == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans_A == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n  ,\n  \\f[\n    op(X) = \\left\\{\n    \\begin{array}{ll}\n        X,   & \\text{if trans_X == rocsparse_operation_none} \\\\\n        X^T, & \\text{if trans_X == rocsparse_operation_transpose} \\\\\n        X^H, & \\text{if trans_X == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\p rocsparse_bsrsm_solve requires a user allocated temporary buffer. Its size is\n  returned by rocsparse_sbsrsm_buffer_size(), rocsparse_dbsrsm_buffer_size(),\n  rocsparse_cbsrsm_buffer_size() or rocsparse_zbsrsm_buffer_size(). Furthermore,\n  analysis meta data is required. It can be obtained by rocsparse_sbsrsm_analysis(),\n  rocsparse_dbsrsm_analysis(), rocsparse_cbsrsm_analysis() or\n  rocsparse_zbsrsm_analysis(). \\p rocsparse_bsrsm_solve reports the first zero pivot\n  (either numerical or structural zero). The zero pivot status can be checked calling\n  rocsparse_bsrsm_zero_pivot(). If\n  \\ref rocsparse_diag_type == \\ref rocsparse_diag_type_unit, no zero pivot will be\n  reported, even if \\f$A_{j,j} = 0\\f$ for some \\f$j\\f$.\n\n  \\note\n  The sparse BSR matrix has to be sorted.\n\n  \\note\n  Operation type of B and X must match, if \\f$op(B)=B, op(X)=X\\f$.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  Currently, only \\p trans_A != \\ref rocsparse_operation_conjugate_transpose and\n  \\p trans_X != \\ref rocsparse_operation_conjugate_transpose is supported.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         matrix storage of BSR blocks.\n  @param[in]\n  trans_A     matrix A operation type.\n  @param[in]\n  trans_X     matrix X operation type.\n  @param[in]\n  mb          number of block rows of the sparse BSR matrix A.\n  @param[in]\n  nrhs        number of columns of the dense matrix op(X).\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse BSR matrix A.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse BSR matrix A.\n  @param[in]\n  bsr_val     array of \\p nnzb blocks of the sparse BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of\n              the sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb containing the block column indices of the sparse\n              BSR matrix.\n  @param[in]\n  block_dim   block dimension of the sparse BSR matrix.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[in]\n  B           rhs matrix B with leading dimension \\p ldb.\n  @param[in]\n  ldb         leading dimension of rhs matrix B.\n  @param[out]\n  X           solution matrix X with leading dimension \\p ldx.\n  @param[in]\n  ldx         leading dimension of solution matrix X.\n  @param[in]\n  policy      \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nrhs, \\p nnzb or \\p block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p alpha, \\p descr, \\p bsr_val,\n              \\p bsr_row_ptr, \\p bsr_col_ind, \\p B, \\p X \\p info or \\p temp_buffer pointer\n              is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans_A == \\ref rocsparse_operation_conjugate_transpose,\n              \\p trans_X == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
+    pub fn rocsparse_sbsrsm_solve(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans_A: rocsparse_operation,
+        trans_X: rocsparse_operation,
+        mb: rocsparse_int,
+        nrhs: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const f32,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f32,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        B: *const f32,
+        ldb: rocsparse_int,
+        X: *mut f32,
+        ldx: rocsparse_int,
+        policy: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dbsrsm_solve(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans_A: rocsparse_operation,
+        trans_X: rocsparse_operation,
+        mb: rocsparse_int,
+        nrhs: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const f64,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f64,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        B: *const f64,
+        ldb: rocsparse_int,
+        X: *mut f64,
+        ldx: rocsparse_int,
+        policy: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cbsrsm_solve(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans_A: rocsparse_operation,
+        trans_X: rocsparse_operation,
+        mb: rocsparse_int,
+        nrhs: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const rocsparse_float_complex,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_float_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        B: *const rocsparse_float_complex,
+        ldb: rocsparse_int,
+        X: *mut rocsparse_float_complex,
+        ldx: rocsparse_int,
+        policy: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zbsrsm_solve(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans_A: rocsparse_operation,
+        trans_X: rocsparse_operation,
+        mb: rocsparse_int,
+        nrhs: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const rocsparse_double_complex,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_double_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        block_dim: rocsparse_int,
+        info: rocsparse_mat_info,
+        B: *const rocsparse_double_complex,
+        ldb: rocsparse_int,
+        X: *mut rocsparse_double_complex,
+        ldx: rocsparse_int,
+        policy: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level3_module\n  \\brief Sparse matrix dense matrix multiplication using CSR storage format\n\n  \\details\n  \\p rocsparse_csrmm multiplies the scalar \\f$\\alpha\\f$ with a sparse \\f$m \\times k\\f$\n  matrix \\f$A\\f$, defined in CSR storage format, and the dense \\f$k \\times n\\f$\n  matrix \\f$B\\f$ and adds the result to the dense \\f$m \\times n\\f$ matrix \\f$C\\f$ that\n  is multiplied by the scalar \\f$\\beta\\f$, such that\n  \\f[\n    C := \\alpha \\cdot op(A) \\cdot op(B) + \\beta \\cdot C,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans_A == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans_A == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans_A == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n  and\n  \\f[\n    op(B) = \\left\\{\n    \\begin{array}{ll}\n        B,   & \\text{if trans_B == rocsparse_operation_none} \\\\\n        B^T, & \\text{if trans_B == rocsparse_operation_transpose} \\\\\n        B^H, & \\text{if trans_B == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\code{.c}\n      for(i = 0; i < ldc; ++i)\n      {\n          for(j = 0; j < n; ++j)\n          {\n              C[i][j] = beta * C[i][j];\n\n              for(k = csr_row_ptr[i]; k < csr_row_ptr[i + 1]; ++k)\n              {\n                  C[i][j] += alpha * csr_val[k] * B[csr_col_ind[k]][j];\n              }\n          }\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans_A     matrix \\f$A\\f$ operation type.\n  @param[in]\n  trans_B     matrix \\f$B\\f$ operation type.\n  @param[in]\n  m           number of rows of the sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  n           number of columns of the dense matrix \\f$op(B)\\f$ and \\f$C\\f$.\n  @param[in]\n  k           number of columns of the sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix \\f$A\\f$. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix \\f$A\\f$.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix \\f$A\\f$.\n  @param[in]\n  B           array of dimension \\f$ldb \\times n\\f$ (\\f$op(B) == B\\f$),\n              \\f$ldb \\times k\\f$ otherwise.\n  @param[in]\n  ldb         leading dimension of \\f$B\\f$, must be at least \\f$\\max{(1, k)}\\f$\n              (\\f$op(B) == B\\f$), \\f$\\max{(1, n)}\\f$ otherwise.\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  C           array of dimension \\f$ldc \\times n\\f$.\n  @param[in]\n  ldc         leading dimension of \\f$C\\f$, must be at least \\f$\\max{(1, m)}\\f$\n              (\\f$op(A) == A\\f$), \\f$\\max{(1, k)}\\f$ otherwise.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n, \\p k, \\p nnz, \\p ldb or \\p ldc\n              is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p csr_val,\n              \\p csr_row_ptr, \\p csr_col_ind, \\p B, \\p beta or \\p C pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  This example multiplies a CSR matrix with a dense matrix.\n  \\code{.c}\n      //     1 2 0 3 0\n      // A = 0 4 5 0 0\n      //     6 0 0 7 8\n\n      rocsparse_int m   = 3;\n      rocsparse_int k   = 5;\n      rocsparse_int nnz = 8;\n\n      csr_row_ptr[m+1] = {0, 3, 5, 8};             // device memory\n      csr_col_ind[nnz] = {0, 1, 3, 1, 2, 0, 3, 4}; // device memory\n      csr_val[nnz]     = {1, 2, 3, 4, 5, 6, 7, 8}; // device memory\n\n      // Set dimension n of B\n      rocsparse_int n = 64;\n\n      // Allocate and generate dense matrix B\n      std::vector<float> hB(k * n);\n      for(rocsparse_int i = 0; i < k * n; ++i)\n      {\n          hB[i] = static_cast<float>(rand()) / RAND_MAX;\n      }\n\n      // Copy B to the device\n      float* B;\n      hipMalloc((void**)&B, sizeof(float) * k * n);\n      hipMemcpy(B, hB.data(), sizeof(float) * k * n, hipMemcpyHostToDevice);\n\n      // alpha and beta\n      float alpha = 1.0f;\n      float beta  = 0.0f;\n\n      // Allocate memory for the resulting matrix C\n      float* C;\n      hipMalloc((void**)&C, sizeof(float) * m * n);\n\n      // Perform the matrix multiplication\n      rocsparse_scsrmm(handle,\n                       rocsparse_operation_none,\n                       rocsparse_operation_none,\n                       m,\n                       n,\n                       k,\n                       nnz,\n                       &alpha,\n                       descr,\n                       csr_val,\n                       csr_row_ptr,\n                       csr_col_ind,\n                       B,\n                       k,\n                       &beta,\n                       C,\n                       m);\n  \\endcode\n/\n/**@{"]
+    pub fn rocsparse_scsrmm(
+        handle: rocsparse_handle,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        k: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const f32,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f32,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        B: *const f32,
+        ldb: rocsparse_int,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcsrmm(
+        handle: rocsparse_handle,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        k: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const f64,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f64,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        B: *const f64,
+        ldb: rocsparse_int,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccsrmm(
+        handle: rocsparse_handle,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        k: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const rocsparse_float_complex,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_float_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        B: *const rocsparse_float_complex,
+        ldb: rocsparse_int,
+        beta: *const rocsparse_float_complex,
+        C: *mut rocsparse_float_complex,
+        ldc: rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcsrmm(
+        handle: rocsparse_handle,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        k: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const rocsparse_double_complex,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_double_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        B: *const rocsparse_double_complex,
+        ldb: rocsparse_int,
+        beta: *const rocsparse_double_complex,
+        C: *mut rocsparse_double_complex,
+        ldc: rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level3_module\n  \\brief Sparse triangular system solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csrsm_zero_pivot returns \\ref rocsparse_status_zero_pivot, if either a\n  structural or numerical zero has been found during rocsparse_scsrsm_solve(),\n  rocsparse_dcsrsm_solve(), rocsparse_ccsrsm_solve() or rocsparse_zcsrsm_solve()\n  computation. The first zero pivot \\f$j\\f$ at \\f$A_{j,j}\\f$ is stored in \\p position,\n  using same index base as the CSR matrix.\n\n  \\p position can be in host or device memory. If no zero pivot has been found,\n  \\p position is set to -1 and \\ref rocsparse_status_success is returned instead.\n\n  \\note \\p rocsparse_csrsm_zero_pivot is a blocking function. It might influence\n  performance negatively.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[inout]\n  position    pointer to zero pivot \\f$j\\f$, can be in host or device memory.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info or \\p position pointer is\n              invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_zero_pivot zero pivot has been found."]
+    pub fn rocsparse_csrsm_zero_pivot(
+        handle: rocsparse_handle,
+        info: rocsparse_mat_info,
+        position: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level3_module\n  \\brief Sparse triangular system solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csrsm_buffer_size returns the size of the temporary storage buffer that\n  is required by rocsparse_scsrsm_analysis(), rocsparse_dcsrsm_analysis(),\n  rocsparse_ccsrsm_analysis(), rocsparse_zcsrsm_analysis(), rocsparse_scsrsm_solve(),\n  rocsparse_dcsrsm_solve(), rocsparse_ccsrsm_solve() and rocsparse_zcsrsm_solve(). The\n  temporary storage buffer must be allocated by the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans_A     matrix A operation type.\n  @param[in]\n  trans_B     matrix B operation type.\n  @param[in]\n  m           number of rows of the sparse CSR matrix A.\n  @param[in]\n  nrhs        number of columns of the dense matrix op(B).\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix A.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix A.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix A.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix A.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix A.\n  @param[in]\n  B           array of \\p m \\f$\\times\\f$ \\p nrhs elements of the rhs matrix B.\n  @param[in]\n  ldb         leading dimension of rhs matrix B.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[in]\n  policy      \\ref rocsparse_solve_policy_auto.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_scsrsm_analysis(), rocsparse_dcsrsm_analysis(),\n              rocsparse_ccsrsm_analysis(), rocsparse_zcsrsm_analysis(),\n              rocsparse_scsrsm_solve(), rocsparse_dcsrsm_solve(),\n              rocsparse_ccsrsm_solve() and rocsparse_zcsrsm_solve().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p nrhs or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p alpha, \\p descr, \\p csr_val,\n              \\p csr_row_ptr, \\p csr_col_ind, \\p B, \\p info or \\p buffer_size pointer\n              is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans_A == \\ref rocsparse_operation_conjugate_transpose,\n              \\p trans_B == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
+    pub fn rocsparse_scsrsm_buffer_size(
+        handle: rocsparse_handle,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        m: rocsparse_int,
+        nrhs: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const f32,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f32,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        B: *const f32,
+        ldb: rocsparse_int,
+        info: rocsparse_mat_info,
+        policy: rocsparse_solve_policy,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcsrsm_buffer_size(
+        handle: rocsparse_handle,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        m: rocsparse_int,
+        nrhs: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const f64,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f64,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        B: *const f64,
+        ldb: rocsparse_int,
+        info: rocsparse_mat_info,
+        policy: rocsparse_solve_policy,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccsrsm_buffer_size(
+        handle: rocsparse_handle,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        m: rocsparse_int,
+        nrhs: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const rocsparse_float_complex,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_float_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        B: *const rocsparse_float_complex,
+        ldb: rocsparse_int,
+        info: rocsparse_mat_info,
+        policy: rocsparse_solve_policy,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcsrsm_buffer_size(
+        handle: rocsparse_handle,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        m: rocsparse_int,
+        nrhs: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const rocsparse_double_complex,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_double_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        B: *const rocsparse_double_complex,
+        ldb: rocsparse_int,
+        info: rocsparse_mat_info,
+        policy: rocsparse_solve_policy,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level3_module\n  \\brief Sparse triangular system solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csrsm_analysis performs the analysis step for rocsparse_scsrsm_solve(),\n  rocsparse_dcsrsm_solve(), rocsparse_ccsrsm_solve() and rocsparse_zcsrsm_solve(). It\n  is expected that this function will be executed only once for a given matrix and\n  particular operation type. The analysis meta data can be cleared by\n  rocsparse_csrsm_clear().\n\n  \\p rocsparse_csrsm_analysis can share its meta data with\n  rocsparse_scsrilu0_analysis(), rocsparse_dcsrilu0_analysis(),\n  rocsparse_ccsrilu0_analysis(), rocsparse_zcsrilu0_analysis(),\n  rocsparse_scsric0_analysis(), rocsparse_dcsric0_analysis(),\n  rocsparse_ccsric0_analysis(), rocsparse_zcsric0_analysis(),\n  rocsparse_scsrsv_analysis(), rocsparse_dcsrsv_analysis(),\n  rocsparse_ccsrsv_analysis() and rocsparse_zcsrsv_analysis(). Selecting\n  \\ref rocsparse_analysis_policy_reuse policy can greatly improve computation\n  performance of meta data. However, the user need to make sure that the sparsity\n  pattern remains unchanged. If this cannot be assured,\n  \\ref rocsparse_analysis_policy_force has to be used.\n\n  \\note\n  If the matrix sparsity pattern changes, the gathered information will become invalid.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans_A     matrix A operation type.\n  @param[in]\n  trans_B     matrix B operation type.\n  @param[in]\n  m           number of rows of the sparse CSR matrix A.\n  @param[in]\n  nrhs        number of columns of the dense matrix op(B).\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix A.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix A.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix A.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix A.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix A.\n  @param[in]\n  B           array of \\p m \\f$\\times\\f$ \\p nrhs elements of the rhs matrix B.\n  @param[in]\n  ldb         leading dimension of rhs matrix B.\n  @param[out]\n  info        structure that holds the information collected during the analysis step.\n  @param[in]\n  analysis    \\ref rocsparse_analysis_policy_reuse or\n              \\ref rocsparse_analysis_policy_force.\n  @param[in]\n  solve       \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p nrhs or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p alpha, \\p descr, \\p csr_val,\n              \\p csr_row_ptr, \\p csr_col_ind, \\p B, \\p info or \\p temp_buffer pointer\n              is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans_A == \\ref rocsparse_operation_conjugate_transpose,\n              \\p trans_B == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
+    pub fn rocsparse_scsrsm_analysis(
+        handle: rocsparse_handle,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        m: rocsparse_int,
+        nrhs: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const f32,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f32,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        B: *const f32,
+        ldb: rocsparse_int,
+        info: rocsparse_mat_info,
+        analysis: rocsparse_analysis_policy,
+        solve: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcsrsm_analysis(
+        handle: rocsparse_handle,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        m: rocsparse_int,
+        nrhs: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const f64,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f64,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        B: *const f64,
+        ldb: rocsparse_int,
+        info: rocsparse_mat_info,
+        analysis: rocsparse_analysis_policy,
+        solve: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccsrsm_analysis(
+        handle: rocsparse_handle,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        m: rocsparse_int,
+        nrhs: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const rocsparse_float_complex,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_float_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        B: *const rocsparse_float_complex,
+        ldb: rocsparse_int,
+        info: rocsparse_mat_info,
+        analysis: rocsparse_analysis_policy,
+        solve: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcsrsm_analysis(
+        handle: rocsparse_handle,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        m: rocsparse_int,
+        nrhs: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const rocsparse_double_complex,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_double_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        B: *const rocsparse_double_complex,
+        ldb: rocsparse_int,
+        info: rocsparse_mat_info,
+        analysis: rocsparse_analysis_policy,
+        solve: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level3_module\n  \\brief Sparse triangular system solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csrsm_clear deallocates all memory that was allocated by\n  rocsparse_scsrsm_analysis(), rocsparse_dcsrsm_analysis(), rocsparse_ccsrsm_analysis()\n  or rocsparse_zcsrsm_analysis(). This is especially useful, if memory is an issue and\n  the analysis data is not required for further computation, e.g. when switching to\n  another sparse matrix format. Calling \\p rocsparse_csrsm_clear is optional. All\n  allocated resources will be cleared, when the opaque \\ref rocsparse_mat_info struct\n  is destroyed using rocsparse_destroy_mat_info().\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[inout]\n  info        structure that holds the information collected during the analysis step.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info pointer is invalid.\n  \\retval     rocsparse_status_memory_error the buffer holding the meta data could not\n              be deallocated.\n  \\retval     rocsparse_status_internal_error an internal error occurred."]
+    pub fn rocsparse_csrsm_clear(
+        handle: rocsparse_handle,
+        info: rocsparse_mat_info,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level3_module\n  \\brief Sparse triangular system solve using CSR storage format\n\n  \\details\n  \\p rocsparse_csrsm_solve solves a sparse triangular linear system of a sparse\n  \\f$m \\times m\\f$ matrix, defined in CSR storage format, a dense solution matrix\n  \\f$X\\f$ and the right-hand side matrix \\f$B\\f$ that is multiplied by \\f$\\alpha\\f$, such that\n  \\f[\n    op(A) \\cdot op(X) = \\alpha \\cdot op(B),\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans_A == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans_A == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans_A == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n  ,\n  \\f[\n    op(B) = \\left\\{\n    \\begin{array}{ll}\n        B,   & \\text{if trans_B == rocsparse_operation_none} \\\\\n        B^T, & \\text{if trans_B == rocsparse_operation_transpose} \\\\\n        B^H, & \\text{if trans_B == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n  and\n  \\f[\n    op(X) = \\left\\{\n    \\begin{array}{ll}\n        X,   & \\text{if trans_B == rocsparse_operation_none} \\\\\n        X^T, & \\text{if trans_B == rocsparse_operation_transpose} \\\\\n        X^H, & \\text{if trans_B == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\p rocsparse_csrsm_solve requires a user allocated temporary buffer. Its size is\n  returned by rocsparse_scsrsm_buffer_size(), rocsparse_dcsrsm_buffer_size(),\n  rocsparse_ccsrsm_buffer_size() or rocsparse_zcsrsm_buffer_size(). Furthermore,\n  analysis meta data is required. It can be obtained by rocsparse_scsrsm_analysis(),\n  rocsparse_dcsrsm_analysis(), rocsparse_ccsrsm_analysis() or\n  rocsparse_zcsrsm_analysis(). \\p rocsparse_csrsm_solve reports the first zero pivot\n  (either numerical or structural zero). The zero pivot status can be checked calling\n  rocsparse_csrsm_zero_pivot(). If\n  \\ref rocsparse_diag_type == \\ref rocsparse_diag_type_unit, no zero pivot will be\n  reported, even if \\f$A_{j,j} = 0\\f$ for some \\f$j\\f$.\n\n  \\note\n  The sparse CSR matrix has to be sorted. This can be achieved by calling\n  rocsparse_csrsort().\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  Currently, only \\p trans_A != \\ref rocsparse_operation_conjugate_transpose and\n  \\p trans_B != \\ref rocsparse_operation_conjugate_transpose is supported.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans_A     matrix A operation type.\n  @param[in]\n  trans_B     matrix B operation type.\n  @param[in]\n  m           number of rows of the sparse CSR matrix A.\n  @param[in]\n  nrhs        number of columns of the dense matrix op(B).\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix A.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix A.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix A.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix A.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix A.\n  @param[inout]\n  B           array of \\p m \\f$\\times\\f$ \\p nrhs elements of the rhs matrix B.\n  @param[in]\n  ldb         leading dimension of rhs matrix B.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[in]\n  policy      \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p nrhs or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p alpha, \\p descr, \\p csr_val,\n              \\p csr_row_ptr, \\p csr_col_ind, \\p B, \\p info or \\p temp_buffer pointer\n              is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans_A == \\ref rocsparse_operation_conjugate_transpose,\n              \\p trans_B == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  Consider the lower triangular \\f$m \\times m\\f$ matrix \\f$L\\f$, stored in CSR\n  storage format with unit diagonal. The following example solves \\f$L \\cdot X = B\\f$.\n  \\code{.c}\n      // Create rocSPARSE handle\n      rocsparse_handle handle;\n      rocsparse_create_handle(&handle);\n\n      // Create matrix descriptor\n      rocsparse_mat_descr descr;\n      rocsparse_create_mat_descr(&descr);\n      rocsparse_set_mat_fill_mode(descr, rocsparse_fill_mode_lower);\n      rocsparse_set_mat_diag_type(descr, rocsparse_diag_type_unit);\n\n      // Create matrix info structure\n      rocsparse_mat_info info;\n      rocsparse_create_mat_info(&info);\n\n      // Obtain required buffer size\n      size_t buffer_size;\n      rocsparse_dcsrsm_buffer_size(handle,\n                                   rocsparse_operation_none,\n                                   rocsparse_operation_none,\n                                   m,\n                                   nrhs,\n                                   nnz,\n                                   &alpha,\n                                   descr,\n                                   csr_val,\n                                   csr_row_ptr,\n                                   csr_col_ind,\n                                   B,\n                                   ldb,\n                                   info,\n                                   rocsparse_solve_policy_auto,\n                                   &buffer_size);\n\n      // Allocate temporary buffer\n      void* temp_buffer;\n      hipMalloc(&temp_buffer, buffer_size);\n\n      // Perform analysis step\n      rocsparse_dcsrsm_analysis(handle,\n                                rocsparse_operation_none,\n                                rocsparse_operation_none,\n                                m,\n                                nrhs,\n                                nnz,\n                                &alpha,\n                                descr,\n                                csr_val,\n                                csr_row_ptr,\n                                csr_col_ind,\n                                B,\n                                ldb,\n                                info,\n                                rocsparse_analysis_policy_reuse,\n                                rocsparse_solve_policy_auto,\n                                temp_buffer);\n\n      // Solve LX = B\n      rocsparse_dcsrsm_solve(handle,\n                             rocsparse_operation_none,\n                             rocsparse_operation_none,\n                             m,\n                             nrhs,\n                             nnz,\n                             &alpha,\n                             descr,\n                             csr_val,\n                             csr_row_ptr,\n                             csr_col_ind,\n                             B,\n                             ldb,\n                             info,\n                             rocsparse_solve_policy_auto,\n                             temp_buffer);\n\n      // No zero pivot should be found, with L having unit diagonal\n\n      // Clean up\n      hipFree(temp_buffer);\n      rocsparse_destroy_mat_info(info);\n      rocsparse_destroy_mat_descr(descr);\n      rocsparse_destroy_handle(handle);\n  \\endcode\n/\n/**@{"]
+    pub fn rocsparse_scsrsm_solve(
+        handle: rocsparse_handle,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        m: rocsparse_int,
+        nrhs: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const f32,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f32,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        B: *mut f32,
+        ldb: rocsparse_int,
+        info: rocsparse_mat_info,
+        policy: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcsrsm_solve(
+        handle: rocsparse_handle,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        m: rocsparse_int,
+        nrhs: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const f64,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f64,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        B: *mut f64,
+        ldb: rocsparse_int,
+        info: rocsparse_mat_info,
+        policy: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccsrsm_solve(
+        handle: rocsparse_handle,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        m: rocsparse_int,
+        nrhs: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const rocsparse_float_complex,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_float_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        B: *mut rocsparse_float_complex,
+        ldb: rocsparse_int,
+        info: rocsparse_mat_info,
+        policy: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcsrsm_solve(
+        handle: rocsparse_handle,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        m: rocsparse_int,
+        nrhs: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const rocsparse_double_complex,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_double_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        B: *mut rocsparse_double_complex,
+        ldb: rocsparse_int,
+        info: rocsparse_mat_info,
+        policy: rocsparse_solve_policy,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level3_module\n  \\brief Sparse matrix dense matrix multiplication using GEneral BSR storage format\n\n  \\details\n  \\p rocsparse_gebsrmm multiplies the scalar \\f$\\alpha\\f$ with a sparse \\f$mb \\times kb\\f$\n  matrix \\f$A\\f$, defined in GEneral BSR storage format, and the dense \\f$k \\times n\\f$\n  matrix \\f$B\\f$ (where \\f$k = col_block\\_dim \\times kb\\f$) and adds the result to the dense\n  \\f$m \\times n\\f$ matrix \\f$C\\f$ (where \\f$m = row_block\\_dim \\times mb\\f$) that\n  is multiplied by the scalar \\f$\\beta\\f$, such that\n  \\f[\n    C := \\alpha \\cdot op(A) \\cdot op(B) + \\beta \\cdot C,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans_A == rocsparse_operation_none} \\\\\n    \\end{array}\n    \\right.\n  \\f]\n  and\n  \\f[\n    op(B) = \\left\\{\n    \\begin{array}{ll}\n        B,   & \\text{if trans_B == rocsparse_operation_none} \\\\\n        B^T, & \\text{if trans_B == rocsparse_operation_transpose} \\\\\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  Currently, only \\p trans_A == \\ref rocsparse_operation_none is supported.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         the storage format of the blocks. Can be \\ref rocsparse_direction_row or \\ref rocsparse_direction_column.\n  @param[in]\n  trans_A     matrix \\f$A\\f$ operation type. Currently, only \\ref rocsparse_operation_none is supported.\n  @param[in]\n  trans_B     matrix \\f$B\\f$ operation type. Currently, only \\ref rocsparse_operation_none and rocsparse_operation_transpose\n              are supported.\n  @param[in]\n  mb          number of block rows of the sparse GEneral BSR matrix \\f$A\\f$.\n  @param[in]\n  n           number of columns of the dense matrix \\f$op(B)\\f$ and \\f$C\\f$.\n  @param[in]\n  kb          number of block columns of the sparse GEneral BSR matrix \\f$A\\f$.\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse GEneral BSR matrix \\f$A\\f$.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  descr       descriptor of the sparse GEneral BSR matrix \\f$A\\f$. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  bsr_val     array of \\p nnzb*row_block_dim*col_block_dim elements of the sparse GEneral BSR matrix \\f$A\\f$.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of the\n              sparse GEneral BSR matrix \\f$A\\f$.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the block column indices of the sparse\n              GEneral BSR matrix \\f$A\\f$.\n  @param[in]\n  row_block_dim   row size of the blocks in the sparse GEneral BSR matrix.\n  @param[in]\n  col_block_dim   column size of the blocks in the sparse GEneral BSR matrix.\n  @param[in]\n  B           array of dimension \\f$ldb \\times n\\f$ (\\f$op(B) == B\\f$),\n              \\f$ldb \\times k\\f$ otherwise.\n  @param[in]\n  ldb         leading dimension of \\f$B\\f$, must be at least \\f$\\max{(1, k)}\\f$ (\\f$ op(B) == B\\f$) where \\f$k = col\\_block\\_dim \\times kb\\f$,\n  \\f$\\max{(1, n)}\\f$ otherwise.\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  C           array of dimension \\f$ldc \\times n\\f$.\n  @param[in]\n  ldc         leading dimension of \\f$C\\f$, must be at least \\f$\\max{(1, m)}\\f$ (\\f$ op(A) == A\\f$) where \\f$m = row\\_block\\_dim \\times mb\\f$,\n  \\f$\\max{(1, k)}\\f$ where \\f$k = col\\_block\\_dim \\times kb\\f$ otherwise.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p n, \\p kb, \\p nnzb, \\p ldb, \\p ldc, \\p row_block_dim\n              or \\p col_block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p alpha, \\p bsr_val,\n              \\p bsr_row_ptr, \\p bsr_col_ind, \\p B, \\p beta or \\p C pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans_A != \\ref rocsparse_operation_none or\n              \\p trans_B == \\ref rocsparse_operation_conjugate_transpose or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  This example multiplies a GEneral BSR matrix with a dense matrix.\n  \\code{.c}\n      //     1 2 0 3 0 0\n      // A = 0 4 5 0 0 0\n      //     0 0 0 7 8 0\n      //     0 0 1 2 4 1\n\n      rocsparse_int row_block_dim = 2;\n      rocsparse_int col_block_dim = 3;\n      rocsparse_int mb   = 2;\n      rocsparse_int kb   = 2;\n      rocsparse_int nnzb = 4;\n      rocsparse_direction dir = rocsparse_direction_row;\n\n      bsr_row_ptr[mb+1]                 = {0, 2, 4};                                        // device memory\n      bsr_col_ind[nnzb]                 = {0, 1, 0, 1};                                     // device memory\n      bsr_val[nnzb*row_block_dim*col_block_dim] = {1, 2, 0, 0, 4, 5, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 7, 8, 0, 2, 4, 1}; // device memory\n\n      // Set dimension n of B\n      rocsparse_int n = 64;\n      rocsparse_int m = mb * row_block_dim;\n      rocsparse_int k = kb * col_block_dim;\n\n      // Allocate and generate dense matrix B\n      std::vector<float> hB(k * n);\n      for(rocsparse_int i = 0; i < k * n; ++i)\n      {\n          hB[i] = static_cast<float>(rand()) / RAND_MAX;\n      }\n\n      // Copy B to the device\n      float* B;\n      hipMalloc((void**)&B, sizeof(float) * k * n);\n      hipMemcpy(B, hB.data(), sizeof(float) * k * n, hipMemcpyHostToDevice);\n\n      // alpha and beta\n      float alpha = 1.0f;\n      float beta  = 0.0f;\n\n      // Allocate memory for the resulting matrix C\n      float* C;\n      hipMalloc((void**)&C, sizeof(float) * m * n);\n\n      // Perform the matrix multiplication\n      rocsparse_sgebsrmm(handle,\n                         dir,\n                         rocsparse_operation_none,\n                         rocsparse_operation_none,\n                         mb,\n                         n,\n                         kb,\n                         nnzb,\n                         &alpha,\n                         descr,\n                         bsr_val,\n                         bsr_row_ptr,\n                         bsr_col_ind,\n                         row_block_dim,\n                         col_block_dim,\n                         B,\n                         k,\n                         &beta,\n                         C,\n                         m);\n  \\endcode\n/\n/**@{"]
+    pub fn rocsparse_sgebsrmm(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        mb: rocsparse_int,
+        n: rocsparse_int,
+        kb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const f32,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f32,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        B: *const f32,
+        ldb: rocsparse_int,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dgebsrmm(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        mb: rocsparse_int,
+        n: rocsparse_int,
+        kb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const f64,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const f64,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        B: *const f64,
+        ldb: rocsparse_int,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cgebsrmm(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        mb: rocsparse_int,
+        n: rocsparse_int,
+        kb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const rocsparse_float_complex,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_float_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        B: *const rocsparse_float_complex,
+        ldb: rocsparse_int,
+        beta: *const rocsparse_float_complex,
+        C: *mut rocsparse_float_complex,
+        ldc: rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zgebsrmm(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        mb: rocsparse_int,
+        n: rocsparse_int,
+        kb: rocsparse_int,
+        nnzb: rocsparse_int,
+        alpha: *const rocsparse_double_complex,
+        descr: rocsparse_mat_descr,
+        bsr_val: *const rocsparse_double_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        B: *const rocsparse_double_complex,
+        ldb: rocsparse_int,
+        beta: *const rocsparse_double_complex,
+        C: *mut rocsparse_double_complex,
+        ldc: rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup level3_module\n  \\brief Dense matrix sparse matrix multiplication using CSR storage format\n\n  \\details\n  \\p rocsparse_gemmi multiplies the scalar \\f$\\alpha\\f$ with a dense \\f$m \\times k\\f$\n  matrix \\f$A\\f$ and the sparse \\f$k \\times n\\f$ matrix \\f$B\\f$, defined in CSR\n  storage format and adds the result to the dense \\f$m \\times n\\f$ matrix \\f$C\\f$ that\n  is multiplied by the scalar \\f$\\beta\\f$, such that\n  \\f[\n    C := \\alpha \\cdot op(A) \\cdot op(B) + \\beta \\cdot C\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans_A == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans_A == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans_A == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n  and\n  \\f[\n    op(B) = \\left\\{\n    \\begin{array}{ll}\n        B,   & \\text{if trans_B == rocsparse_operation_none} \\\\\n        B^T, & \\text{if trans_B == rocsparse_operation_transpose} \\\\\n        B^H, & \\text{if trans_B == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  trans_A     matrix \\f$A\\f$ operation type.\n  @param[in]\n  trans_B     matrix \\f$B\\f$ operation type.\n  @param[in]\n  m           number of rows of the dense matrix \\f$A\\f$.\n  @param[in]\n  n           number of columns of the sparse CSR matrix \\f$op(B)\\f$ and \\f$C\\f$.\n  @param[in]\n  k           number of columns of the dense matrix \\f$A\\f$.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  A           array of dimension \\f$lda \\times k\\f$ (\\f$op(A) == A\\f$) or\n              \\f$lda \\times m\\f$ (\\f$op(A) == A^T\\f$ or \\f$op(A) == A^H\\f$).\n  @param[in]\n  lda         leading dimension of \\f$A\\f$, must be at least \\f$m\\f$\n              (\\f$op(A) == A\\f$) or \\f$k\\f$ (\\f$op(A) == A^T\\f$ or\n              \\f$op(A) == A^H\\f$).\n  @param[in]\n  descr       descriptor of the sparse CSR matrix \\f$B\\f$. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix \\f$B\\f$.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse CSR\n              matrix \\f$B\\f$.\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  C           array of dimension \\f$ldc \\times n\\f$ that holds the values of \\f$C\\f$.\n  @param[in]\n  ldc         leading dimension of \\f$C\\f$, must be at least \\f$m\\f$.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n, \\p k, \\p nnz, \\p lda or \\p ldc\n              is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p alpha, \\p A, \\p csr_val,\n              \\p csr_row_ptr, \\p csr_col_ind, \\p beta or \\p C pointer is invalid.\n\n  \\par Example\n  This example multiplies a dense matrix with a CSC matrix.\n  \\code{.c}\n      rocsparse_int m   = 2;\n      rocsparse_int n   = 5;\n      rocsparse_int k   = 3;\n      rocsparse_int nnz = 8;\n      rocsparse_int lda = m;\n      rocsparse_int ldc = m;\n\n      // Matrix A (m x k)\n      // (  9.0  10.0  11.0 )\n      // ( 12.0  13.0  14.0 )\n\n      // Matrix B (k x n)\n      // ( 1.0  2.0  0.0  3.0  0.0 )\n      // ( 0.0  4.0  5.0  0.0  0.0 )\n      // ( 6.0  0.0  0.0  7.0  8.0 )\n\n      // Matrix C (m x n)\n      // ( 15.0  16.0  17.0  18.0  19.0 )\n      // ( 20.0  21.0  22.0  23.0  24.0 )\n\n      A[lda * k]           = {9.0, 12.0, 10.0, 13.0, 11.0, 14.0};      // device memory\n      csc_col_ptr_B[n + 1] = {0, 2, 4, 5, 7, 8};                       // device memory\n      csc_row_ind_B[nnz]   = {0, 0, 1, 1, 2, 3, 3, 4};                 // device memory\n      csc_val_B[nnz]       = {1.0, 6.0, 2.0, 4.0, 5.0, 3.0, 7.0, 8.0}; // device memory\n      C[ldc * n]           = {15.0, 20.0, 16.0, 21.0, 17.0, 22.0,      // device memory\n                              18.0, 23.0, 19.0, 24.0};\n\n      // alpha and beta\n      float alpha = 1.0f;\n      float beta  = 0.0f;\n\n      // Perform the matrix multiplication\n      rocsparse_sgemmi(handle,\n                       rocsparse_operation_none,\n                       rocsparse_operation_transpose,\n                       m,\n                       n,\n                       k,\n                       nnz,\n                       &alpha,\n                       A,\n                       lda,\n                       descr_B,\n                       csc_val_B,\n                       csc_col_ptr_B,\n                       csc_row_ind_B,\n                       &beta,\n                       C,\n                       ldc);\n  \\endcode\n/\n/**@{"]
+    pub fn rocsparse_sgemmi(
+        handle: rocsparse_handle,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        k: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f32,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dgemmi(
+        handle: rocsparse_handle,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        k: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const f64,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cgemmi(
+        handle: rocsparse_handle,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        k: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const rocsparse_float_complex,
+        A: *const rocsparse_float_complex,
+        lda: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_float_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        beta: *const rocsparse_float_complex,
+        C: *mut rocsparse_float_complex,
+        ldc: rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zgemmi(
+        handle: rocsparse_handle,
+        trans_A: rocsparse_operation,
+        trans_B: rocsparse_operation,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        k: rocsparse_int,
+        nnz: rocsparse_int,
+        alpha: *const rocsparse_double_complex,
+        A: *const rocsparse_double_complex,
+        lda: rocsparse_int,
+        descr: rocsparse_mat_descr,
+        csr_val: *const rocsparse_double_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        beta: *const rocsparse_double_complex,
+        C: *mut rocsparse_double_complex,
+        ldc: rocsparse_int,
+    ) -> rocsparse_status;
+}
 extern "C" {
     #[must_use]
     #[doc = " \\ingroup precond_module\n  \\brief Incomplete Cholesky factorization with 0 fill-ins and no pivoting using BSR\n  storage format\n\n  \\details\n  \\p rocsparse_bsric0_zero_pivot returns \\ref rocsparse_status_zero_pivot, if either a\n  structural or numerical zero has been found during rocsparse_sbsric0(),\n  rocsparse_dbsric0(), rocsparse_cbsric0() or rocsparse_zbsric0() computation.\n  The first zero pivot \\f$j\\f$ at \\f$A_{j,j}\\f$ is stored in \\p position, using same\n  index base as the BSR matrix.\n\n  \\p position can be in host or device memory. If no zero pivot has been found,\n  \\p position is set to -1 and \\ref rocsparse_status_success is returned instead.\n\n  \\note\n  If a zero pivot is found, \\p position=j means that either the diagonal block \\p A(j,j)\n  is missing (structural zero) or the diagonal block \\p A(j,j) is not positive definite\n  (numerical zero).\n\n  \\note \\p rocsparse_bsric0_zero_pivot is a blocking function. It might influence\n  performance negatively.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[inout]\n  position    pointer to zero pivot \\f$j\\f$, can be in host or device memory.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info or \\p position pointer is\n              invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_zero_pivot zero pivot has been found."]
@@ -6444,7 +8628,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup precond_module\n  \\brief Incomplete Cholesky factorization with 0 fill-ins and no pivoting using BSR\n  storage format\n\n  \\details\n  \\p rocsparse_bsric0 computes the incomplete Cholesky factorization with 0 fill-ins\n  and no pivoting of a sparse \\f$mb \\times mb\\f$ BSR matrix \\f$A\\f$, such that\n  \\f[\n    A \\approx LL^T\n  \\f]\n\n  \\p rocsparse_bsric0 requires a user allocated temporary buffer. Its size is returned\n  by rocsparse_sbsric0_buffer_size(), rocsparse_dbsric0_buffer_size(),\n  rocsparse_cbsric0_buffer_size() or rocsparse_zbsric0_buffer_size(). Furthermore,\n  analysis meta data is required. It can be obtained by rocsparse_sbsric0_analysis(),\n  rocsparse_dbsric0_analysis(), rocsparse_cbsric0_analysis() or rocsparse_zbsric0_analysis().\n  \\p rocsparse_bsric0 reports the first zero pivot (either numerical or structural zero).\n  The zero pivot status can be obtained by calling rocsparse_bsric0_zero_pivot().\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir             direction that specified whether to count nonzero elements by \\ref rocsparse_direction_row or by\n              \\ref rocsparse_direction_row.\n  @param[in]\n  mb          number of block rows in the sparse BSR matrix.\n  @param[in]\n  nnzb        number of non-zero block entries of the sparse BSR matrix.\n  @param[in]\n  descr       descriptor of the sparse BSR matrix.\n  @param[inout]\n  bsr_val     array of length \\p nnzb*block_dim*block_dim containing the values of the sparse BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of the\n              sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the block column indices of the sparse BSR matrix.\n  @param[in]\n  block_dim   the block dimension of the BSR matrix. Between 1 and m where \\p m=mb*block_dim.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[in]\n  policy      \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nnzb, or \\p block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p bsr_val, \\p bsr_row_ptr\n              or \\p bsr_col_ind pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  Consider the sparse \\f$m \\times m\\f$ matrix \\f$A\\f$, stored in BSR\n  storage format. The following example computes the incomplete Cholesky factorization\n  \\f$M \\approx LL^T\\f$ and solves the preconditioned system \\f$My = x\\f$.\n  \\code{.c}\n      // Create rocSPARSE handle\n      rocsparse_handle handle;\n      rocsparse_create_handle(&handle);\n\n      // Create matrix descriptor for M\n      rocsparse_mat_descr descr_M;\n      rocsparse_create_mat_descr(&descr_M);\n\n      // Create matrix descriptor for L\n      rocsparse_mat_descr descr_L;\n      rocsparse_create_mat_descr(&descr_L);\n      rocsparse_set_mat_fill_mode(descr_L, rocsparse_fill_mode_lower);\n      rocsparse_set_mat_diag_type(descr_L, rocsparse_diag_type_unit);\n\n      // Create matrix descriptor for L'\n      rocsparse_mat_descr descr_Lt;\n      rocsparse_create_mat_descr(&descr_Lt);\n      rocsparse_set_mat_fill_mode(descr_Lt, rocsparse_fill_mode_upper);\n      rocsparse_set_mat_diag_type(descr_Lt, rocsparse_diag_type_non_unit);\n\n      // Create matrix info structure\n      rocsparse_mat_info info;\n      rocsparse_create_mat_info(&info);\n\n      // Obtain required buffer size\n      size_t buffer_size_M;\n      size_t buffer_size_L;\n      size_t buffer_size_Lt;\n      rocsparse_dbsric0_buffer_size(handle,\n                                     rocsparse_direction_row,\n                                     mb,\n                                     nnzb,\n                                     descr_M,\n                                     bsr_val,\n                                     bsr_row_ptr,\n                                     bsr_col_ind,\n                                     block_dim,\n                                     info,\n                                     &buffer_size_M);\n      rocsparse_dbsrsv_buffer_size(handle,\n                                   rocsparse_direction_row,\n                                   rocsparse_operation_none,\n                                   mb,\n                                   nnzb,\n                                   descr_L,\n                                   bsr_val,\n                                   bsr_row_ptr,\n                                   bsr_col_ind,\n                                   block_dim,\n                                   info,\n                                   &buffer_size_L);\n      rocsparse_dbsrsv_buffer_size(handle,\n                                   rocsparse_direction_row,\n                                   rocsparse_operation_transpose,\n                                   mb,\n                                   nnzb,\n                                   descr_Lt,\n                                   bsr_val,\n                                   bsr_row_ptr,\n                                   bsr_col_ind,\n                                   block_dim,\n                                   info,\n                                   &buffer_size_Lt);\n\n      size_t buffer_size = max(buffer_size_M, max(buffer_size_L, buffer_size_Lt));\n\n      // Allocate temporary buffer\n      void* temp_buffer;\n      hipMalloc(&temp_buffer, buffer_size);\n\n      // Perform analysis steps, using rocsparse_analysis_policy_reuse to improve\n      // computation performance\n      rocsparse_dbsric0_analysis(handle,\n                                  rocsparse_direction_row,\n                                  mb,\n                                  nnzb,\n                                  descr_M,\n                                  bsr_val,\n                                  bsr_row_ptr,\n                                  bsr_col_ind,\n                                  block_dim,\n                                  info,\n                                  rocsparse_analysis_policy_reuse,\n                                  rocsparse_solve_policy_auto,\n                                  temp_buffer);\n      rocsparse_dbsrsv_analysis(handle,\n                                rocsparse_direction_row,\n                                rocsparse_operation_none,\n                                mb,\n                                nnzb,\n                                descr_L,\n                                bsr_val,\n                                bsr_row_ptr,\n                                bsr_col_ind,\n                                block_dim,\n                                info,\n                                rocsparse_analysis_policy_reuse,\n                                rocsparse_solve_policy_auto,\n                                temp_buffer);\n      rocsparse_dbsrsv_analysis(handle,\n                                rocsparse_direction_row,\n                                rocsparse_operation_transpose,\n                                mb,\n                                nnzb,\n                                descr_Lt,\n                                bsr_val,\n                                bsr_row_ptr,\n                                bsr_col_ind,\n                                block_dim,\n                                info,\n                                rocsparse_analysis_policy_reuse,\n                                rocsparse_solve_policy_auto,\n                                temp_buffer);\n\n      // Check for zero pivot\n      rocsparse_int position;\n      if(rocsparse_status_zero_pivot == rocsparse_bsric0_zero_pivot(handle,\n                                                                    info,\n                                                                    &position))\n      {\n          printf(\"A has structural zero at A(%d,%d)\\n\", position, position);\n      }\n\n      // Compute incomplete Cholesky factorization M = LL'\n      rocsparse_dbsric0(handle,\n                         rocsparse_direction_row,\n                         mb,\n                         nnzb,\n                         descr_M,\n                         bsr_val,\n                         bsr_row_ptr,\n                         bsr_col_ind,\n                         block_dim,\n                         info,\n                         rocsparse_solve_policy_auto,\n                         temp_buffer);\n\n      // Check for zero pivot\n      if(rocsparse_status_zero_pivot == rocsparse_bsric0_zero_pivot(handle,\n                                                                     info,\n                                                                     &position))\n      {\n          printf(\"L has structural and/or numerical zero at L(%d,%d)\\n\",\n                 position,\n                 position);\n      }\n\n      // Solve Lz = x\n      rocsparse_dbsrsv_solve(handle,\n                             rocsparse_direction_row,\n                             rocsparse_operation_none,\n                             mb,\n                             nnzb,\n                             &alpha,\n                             descr_L,\n                             bsr_val,\n                             bsr_row_ptr,\n                             bsr_col_ind,\n                             block_dim,\n                             info,\n                             x,\n                             z,\n                             rocsparse_solve_policy_auto,\n                             temp_buffer);\n\n      // Solve L'y = z\n      rocsparse_dbsrsv_solve(handle,\n                             rocsparse_direction_row,\n                             rocsparse_operation_transpose,\n                             mb,\n                             nnzb,\n                             &alpha,\n                             descr_Lt,\n                             bsr_val,\n                             bsr_row_ptr,\n                             bsr_col_ind,\n                             block_dim,\n                             info,\n                             z,\n                             y,\n                             rocsparse_solve_policy_auto,\n                             temp_buffer);\n\n      // Clean up\n      hipFree(temp_buffer);\n      rocsparse_destroy_mat_info(info);\n      rocsparse_destroy_mat_descr(descr_M);\n      rocsparse_destroy_mat_descr(descr_L);\n      rocsparse_destroy_mat_descr(descr_Lt);\n      rocsparse_destroy_handle(handle);\n  \\endcode\n/\n/**@{"]
+    #[doc = " \\ingroup precond_module\n  \\brief Incomplete Cholesky factorization with 0 fill-ins and no pivoting using BSR\n  storage format\n\n  \\details\n  \\p rocsparse_bsric0 computes the incomplete Cholesky factorization with 0 fill-ins\n  and no pivoting of a sparse \\f$mb \\times mb\\f$ BSR matrix \\f$A\\f$, such that\n  \\f[\n    A \\approx LL^T\n  \\f]\n\n  \\p rocsparse_bsric0 requires a user allocated temporary buffer. Its size is returned\n  by rocsparse_sbsric0_buffer_size(), rocsparse_dbsric0_buffer_size(),\n  rocsparse_cbsric0_buffer_size() or rocsparse_zbsric0_buffer_size(). Furthermore,\n  analysis meta data is required. It can be obtained by rocsparse_sbsric0_analysis(),\n  rocsparse_dbsric0_analysis(), rocsparse_cbsric0_analysis() or rocsparse_zbsric0_analysis().\n  \\p rocsparse_bsric0 reports the first zero pivot (either numerical or structural zero).\n  The zero pivot status can be obtained by calling rocsparse_bsric0_zero_pivot().\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir             direction that specified whether to count nonzero elements by \\ref rocsparse_direction_row or by\n              \\ref rocsparse_direction_row.\n  @param[in]\n  mb          number of block rows in the sparse BSR matrix.\n  @param[in]\n  nnzb        number of non-zero block entries of the sparse BSR matrix.\n  @param[in]\n  descr       descriptor of the sparse BSR matrix.\n  @param[inout]\n  bsr_val     array of length \\p nnzb*block_dim*block_dim containing the values of the sparse BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of the\n              sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the block column indices of the sparse BSR matrix.\n  @param[in]\n  block_dim   the block dimension of the BSR matrix. Between 1 and m where \\p m=mb*block_dim.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[in]\n  policy      \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nnzb, or \\p block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p bsr_val, \\p bsr_row_ptr\n              or \\p bsr_col_ind pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  Consider the sparse \\f$m \\times m\\f$ matrix \\f$A\\f$, stored in BSR\n  storage format. The following example computes the incomplete Cholesky factorization\n  \\f$M \\approx LL^T\\f$ and solves the preconditioned system \\f$My = x\\f$.\n  \\code{.c}\n      // Create rocSPARSE handle\n      rocsparse_handle handle;\n      rocsparse_create_handle(&handle);\n\n      // Create matrix descriptor for M\n      rocsparse_mat_descr descr_M;\n      rocsparse_create_mat_descr(&descr_M);\n\n      // Create matrix descriptor for L\n      rocsparse_mat_descr descr_L;\n      rocsparse_create_mat_descr(&descr_L);\n      rocsparse_set_mat_fill_mode(descr_L, rocsparse_fill_mode_lower);\n      rocsparse_set_mat_diag_type(descr_L, rocsparse_diag_type_unit);\n\n      // Create matrix descriptor for L'\n      rocsparse_mat_descr descr_Lt;\n      rocsparse_create_mat_descr(&descr_Lt);\n      rocsparse_set_mat_fill_mode(descr_Lt, rocsparse_fill_mode_upper);\n      rocsparse_set_mat_diag_type(descr_Lt, rocsparse_diag_type_non_unit);\n\n      // Create matrix info structure\n      rocsparse_mat_info info;\n      rocsparse_create_mat_info(&info);\n\n      // Obtain required buffer size\n      size_t buffer_size_M;\n      size_t buffer_size_L;\n      size_t buffer_size_Lt;\n      rocsparse_dbsric0_buffer_size(handle,\n                                     rocsparse_direction_row,\n                                     mb,\n                                     nnzb,\n                                     descr_M,\n                                     bsr_val,\n                                     bsr_row_ptr,\n                                     bsr_col_ind,\n                                     block_dim,\n                                     info,\n                                     &buffer_size_M);\n      rocsparse_dbsrsv_buffer_size(handle,\n                                   rocsparse_direction_row,\n                                   rocsparse_operation_none,\n                                   mb,\n                                   nnzb,\n                                   descr_L,\n                                   bsr_val,\n                                   bsr_row_ptr,\n                                   bsr_col_ind,\n                                   block_dim,\n                                   info,\n                                   &buffer_size_L);\n      rocsparse_dbsrsv_buffer_size(handle,\n                                   rocsparse_direction_row,\n                                   rocsparse_operation_transpose,\n                                   mb,\n                                   nnzb,\n                                   descr_Lt,\n                                   bsr_val,\n                                   bsr_row_ptr,\n                                   bsr_col_ind,\n                                   block_dim,\n                                   info,\n                                   &buffer_size_Lt);\n\n      size_t buffer_size = max(buffer_size_M, max(buffer_size_L, buffer_size_Lt));\n\n      // Allocate temporary buffer\n      void* temp_buffer;\n      hipMalloc(&temp_buffer, buffer_size);\n\n      // Perform analysis steps, using rocsparse_analysis_policy_reuse to improve\n      // computation performance\n      rocsparse_dbsric0_analysis(handle,\n                                  rocsparse_direction_row,\n                                  mb,\n                                  nnzb,\n                                  descr_M,\n                                  bsr_val,\n                                  bsr_row_ptr,\n                                  bsr_col_ind,\n                                  block_dim,\n                                  info,\n                                  rocsparse_analysis_policy_reuse,\n                                  rocsparse_solve_policy_auto,\n                                  temp_buffer);\n      rocsparse_dbsrsv_analysis(handle,\n                                rocsparse_direction_row,\n                                rocsparse_operation_none,\n                                mb,\n                                nnzb,\n                                descr_L,\n                                bsr_val,\n                                bsr_row_ptr,\n                                bsr_col_ind,\n                                block_dim,\n                                info,\n                                rocsparse_analysis_policy_reuse,\n                                rocsparse_solve_policy_auto,\n                                temp_buffer);\n      rocsparse_dbsrsv_analysis(handle,\n                                rocsparse_direction_row,\n                                rocsparse_operation_transpose,\n                                mb,\n                                nnzb,\n                                descr_Lt,\n                                bsr_val,\n                                bsr_row_ptr,\n                                bsr_col_ind,\n                                block_dim,\n                                info,\n                                rocsparse_analysis_policy_reuse,\n                                rocsparse_solve_policy_auto,\n                                temp_buffer);\n\n      // Check for zero pivot\n      rocsparse_int position;\n      if(rocsparse_status_zero_pivot == rocsparse_bsric0_zero_pivot(handle,\n                                                                    info,\n                                                                    &position))\n      {\n          printf(\"A has structural zero at A(%d,%d)\\n\", position, position);\n      }\n\n      // Compute incomplete Cholesky factorization M = LL'\n      rocsparse_dbsric0(handle,\n                         rocsparse_direction_row,\n                         mb,\n                         nnzb,\n                         descr_M,\n                         bsr_val,\n                         bsr_row_ptr,\n                         bsr_col_ind,\n                         block_dim,\n                         info,\n                         rocsparse_solve_policy_auto,\n                         temp_buffer);\n\n      // Check for zero pivot\n      if(rocsparse_status_zero_pivot == rocsparse_bsric0_zero_pivot(handle,\n                                                                     info,\n                                                                     &position))\n      {\n          printf(\"L has structural and/or numerical zero at L(%d,%d)\\n\",\n                 position,\n                 position);\n      }\n\n      // Solve Lz = x\n      rocsparse_dbsrsv_solve(handle,\n                             rocsparse_direction_row,\n                             rocsparse_operation_none,\n                             mb,\n                             nnzb,\n                             &alpha,\n                             descr_L,\n                             bsr_val,\n                             bsr_row_ptr,\n                             bsr_col_ind,\n                             block_dim,\n                             info,\n                             x,\n                             z,\n                             rocsparse_solve_policy_auto,\n                             temp_buffer);\n\n      // Solve L'y = z\n      rocsparse_dbsrsv_solve(handle,\n                             rocsparse_direction_row,\n                             rocsparse_operation_transpose,\n                             mb,\n                             nnzb,\n                             &alpha,\n                             descr_Lt,\n                             bsr_val,\n                             bsr_row_ptr,\n                             bsr_col_ind,\n                             block_dim,\n                             info,\n                             z,\n                             y,\n                             rocsparse_solve_policy_auto,\n                             temp_buffer);\n\n      // Clean up\n      hipFree(temp_buffer);\n      rocsparse_destroy_mat_info(info);\n      rocsparse_destroy_mat_descr(descr_M);\n      rocsparse_destroy_mat_descr(descr_L);\n      rocsparse_destroy_mat_descr(descr_Lt);\n      rocsparse_destroy_handle(handle);\n  \\endcode\n/\n/**@{"]
     pub fn rocsparse_sbsric0(
         handle: rocsparse_handle,
         dir: rocsparse_direction,
@@ -6729,7 +8913,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup precond_module\n  \\brief Incomplete LU factorization with 0 fill-ins and no pivoting using BSR storage\n  format\n\n  \\details\n  \\p rocsparse_bsrilu0 computes the incomplete LU factorization with 0 fill-ins and no\n  pivoting of a sparse \\f$mb \\times mb\\f$ BSR matrix \\f$A\\f$, such that\n  \\f[\n    A \\approx LU\n  \\f]\n\n  \\p rocsparse_bsrilu0 requires a user allocated temporary buffer. Its size is returned\n  by rocsparse_sbsrilu0_buffer_size(), rocsparse_dbsrilu0_buffer_size(),\n  rocsparse_cbsrilu0_buffer_size() or rocsparse_zbsrilu0_buffer_size(). Furthermore,\n  analysis meta data is required. It can be obtained by rocsparse_sbsrilu0_analysis(),\n  rocsparse_dbsrilu0_analysis(), rocsparse_cbsrilu0_analysis() or\n  rocsparse_zbsrilu0_analysis(). \\p rocsparse_bsrilu0 reports the first zero pivot\n  (either numerical or structural zero). The zero pivot status can be obtained by\n  calling rocsparse_bsrilu0_zero_pivot().\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         direction that specified whether to count nonzero elements by\n              \\ref rocsparse_direction_row or by \\ref rocsparse_direction_row.\n  @param[in]\n  mb          number of block rows in the sparse BSR matrix.\n  @param[in]\n  nnzb        number of non-zero block entries of the sparse BSR matrix.\n  @param[in]\n  descr       descriptor of the sparse BSR matrix.\n  @param[inout]\n  bsr_val     array of length \\p nnzb*block_dim*block_dim containing the values of the sparse BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of the\n              sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the block column indices of the sparse BSR matrix.\n  @param[in]\n  block_dim   the block dimension of the BSR matrix. Between 1 and m where \\p m=mb*block_dim.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[in]\n  policy      \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nnzb, or \\p block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p bsr_val, \\p bsr_row_ptr\n              or \\p bsr_col_ind pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  Consider the sparse \\f$m \\times m\\f$ matrix \\f$A\\f$, stored in BSR\n  storage format. The following example computes the incomplete LU factorization\n  \\f$M \\approx LU\\f$ and solves the preconditioned system \\f$My = x\\f$.\n  \\code{.c}\n      // Create rocSPARSE handle\n      rocsparse_handle handle;\n      rocsparse_create_handle(&handle);\n\n      // Create matrix descriptor for M\n      rocsparse_mat_descr descr_M;\n      rocsparse_create_mat_descr(&descr_M);\n\n      // Create matrix descriptor for L\n      rocsparse_mat_descr descr_L;\n      rocsparse_create_mat_descr(&descr_L);\n      rocsparse_set_mat_fill_mode(descr_L, rocsparse_fill_mode_lower);\n      rocsparse_set_mat_diag_type(descr_L, rocsparse_diag_type_unit);\n\n      // Create matrix descriptor for U\n      rocsparse_mat_descr descr_U;\n      rocsparse_create_mat_descr(&descr_U);\n      rocsparse_set_mat_fill_mode(descr_U, rocsparse_fill_mode_upper);\n      rocsparse_set_mat_diag_type(descr_U, rocsparse_diag_type_non_unit);\n\n      // Create matrix info structure\n      rocsparse_mat_info info;\n      rocsparse_create_mat_info(&info);\n\n      // Obtain required buffer size\n      size_t buffer_size_M;\n      size_t buffer_size_L;\n      size_t buffer_size_U;\n      rocsparse_dbsrilu0_buffer_size(handle,\n                                     rocsparse_direction_row,\n                                     mb,\n                                     nnzb,\n                                     descr_M,\n                                     bsr_val,\n                                     bsr_row_ptr,\n                                     bsr_col_ind,\n                                     block_dim,\n                                     info,\n                                     &buffer_size_M);\n      rocsparse_dbsrsv_buffer_size(handle,\n                                   rocsparse_direction_row,\n                                   rocsparse_operation_none,\n                                   mb,\n                                   nnzb,\n                                   descr_L,\n                                   bsr_val,\n                                   bsr_row_ptr,\n                                   bsr_col_ind,\n                                   block_dim,\n                                   info,\n                                   &buffer_size_L);\n      rocsparse_dbsrsv_buffer_size(handle,\n                                   rocsparse_direction_row,\n                                   rocsparse_operation_none,\n                                   mb,\n                                   nnzb,\n                                   descr_U,\n                                   bsr_val,\n                                   bsr_row_ptr,\n                                   bsr_col_ind,\n                                   block_dim,\n                                   info,\n                                   &buffer_size_U);\n\n      size_t buffer_size = max(buffer_size_M, max(buffer_size_L, buffer_size_U));\n\n      // Allocate temporary buffer\n      void* temp_buffer;\n      hipMalloc(&temp_buffer, buffer_size);\n\n      // Perform analysis steps, using rocsparse_analysis_policy_reuse to improve\n      // computation performance\n      rocsparse_dbsrilu0_analysis(handle,\n                                  rocsparse_direction_row,\n                                  mb,\n                                  nnzb,\n                                  descr_M,\n                                  bsr_val,\n                                  bsr_row_ptr,\n                                  bsr_col_ind,\n                                  block_dim,\n                                  info,\n                                  rocsparse_analysis_policy_reuse,\n                                  rocsparse_solve_policy_auto,\n                                  temp_buffer);\n      rocsparse_dbsrsv_analysis(handle,\n                                rocsparse_direction_row,\n                                rocsparse_operation_none,\n                                mb,\n                                nnzb,\n                                descr_L,\n                                bsr_val,\n                                bsr_row_ptr,\n                                bsr_col_ind,\n                                block_dim,\n                                info,\n                                rocsparse_analysis_policy_reuse,\n                                rocsparse_solve_policy_auto,\n                                temp_buffer);\n      rocsparse_dbsrsv_analysis(handle,\n                                rocsparse_direction_row,\n                                rocsparse_operation_none,\n                                mb,\n                                nnzb,\n                                descr_U,\n                                bsr_val,\n                                bsr_row_ptr,\n                                bsr_col_ind,\n                                block_dim,\n                                info,\n                                rocsparse_analysis_policy_reuse,\n                                rocsparse_solve_policy_auto,\n                                temp_buffer);\n\n      // Check for zero pivot\n      rocsparse_int position;\n      if(rocsparse_status_zero_pivot == rocsparse_bsrilu0_zero_pivot(handle,\n                                                                    info,\n                                                                    &position))\n      {\n          printf(\"A has structural zero at A(%d,%d)\\n\", position, position);\n      }\n\n      // Compute incomplete LU factorization M = LU\n      rocsparse_dbsrilu0(handle,\n                         rocsparse_direction_row,\n                         mb,\n                         nnzb,\n                         descr_M,\n                         bsr_val,\n                         bsr_row_ptr,\n                         bsr_col_ind,\n                         block_dim,\n                         info,\n                         rocsparse_solve_policy_auto,\n                         temp_buffer);\n\n      // Check for zero pivot\n      if(rocsparse_status_zero_pivot == rocsparse_bsrilu0_zero_pivot(handle,\n                                                                     info,\n                                                                     &position))\n      {\n          printf(\"L has structural and/or numerical zero at L(%d,%d)\\n\",\n                 position,\n                 position);\n      }\n\n      // Solve Lz = x\n      rocsparse_dbsrsv_solve(handle,\n                             rocsparse_direction_row,\n                             rocsparse_operation_none,\n                             mb,\n                             nnzb,\n                             &alpha,\n                             descr_L,\n                             bsr_val,\n                             bsr_row_ptr,\n                             bsr_col_ind,\n                             block_dim,\n                             info,\n                             x,\n                             z,\n                             rocsparse_solve_policy_auto,\n                             temp_buffer);\n\n      // Solve Uy = z\n      rocsparse_dbsrsv_solve(handle,\n                             rocsparse_direction_row,\n                             rocsparse_operation_none,\n                             mb,\n                             nnzb,\n                             &alpha,\n                             descr_U,\n                             bsr_val,\n                             bsr_row_ptr,\n                             bsr_col_ind,\n                             block_dim,\n                             info,\n                             z,\n                             y,\n                             rocsparse_solve_policy_auto,\n                             temp_buffer);\n\n      // Clean up\n      hipFree(temp_buffer);\n      rocsparse_destroy_mat_info(info);\n      rocsparse_destroy_mat_descr(descr_M);\n      rocsparse_destroy_mat_descr(descr_L);\n      rocsparse_destroy_mat_descr(descr_U);\n      rocsparse_destroy_handle(handle);\n  \\endcode\n/\n/**@{"]
+    #[doc = " \\ingroup precond_module\n  \\brief Incomplete LU factorization with 0 fill-ins and no pivoting using BSR storage\n  format\n\n  \\details\n  \\p rocsparse_bsrilu0 computes the incomplete LU factorization with 0 fill-ins and no\n  pivoting of a sparse \\f$mb \\times mb\\f$ BSR matrix \\f$A\\f$, such that\n  \\f[\n    A \\approx LU\n  \\f]\n\n  \\p rocsparse_bsrilu0 requires a user allocated temporary buffer. Its size is returned\n  by rocsparse_sbsrilu0_buffer_size(), rocsparse_dbsrilu0_buffer_size(),\n  rocsparse_cbsrilu0_buffer_size() or rocsparse_zbsrilu0_buffer_size(). Furthermore,\n  analysis meta data is required. It can be obtained by rocsparse_sbsrilu0_analysis(),\n  rocsparse_dbsrilu0_analysis(), rocsparse_cbsrilu0_analysis() or\n  rocsparse_zbsrilu0_analysis(). \\p rocsparse_bsrilu0 reports the first zero pivot\n  (either numerical or structural zero). The zero pivot status can be obtained by\n  calling rocsparse_bsrilu0_zero_pivot().\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         direction that specified whether to count nonzero elements by\n              \\ref rocsparse_direction_row or by \\ref rocsparse_direction_row.\n  @param[in]\n  mb          number of block rows in the sparse BSR matrix.\n  @param[in]\n  nnzb        number of non-zero block entries of the sparse BSR matrix.\n  @param[in]\n  descr       descriptor of the sparse BSR matrix.\n  @param[inout]\n  bsr_val     array of length \\p nnzb*block_dim*block_dim containing the values of the sparse BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of the\n              sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the block column indices of the sparse BSR matrix.\n  @param[in]\n  block_dim   the block dimension of the BSR matrix. Between 1 and m where \\p m=mb*block_dim.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[in]\n  policy      \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nnzb, or \\p block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p bsr_val, \\p bsr_row_ptr\n              or \\p bsr_col_ind pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  Consider the sparse \\f$m \\times m\\f$ matrix \\f$A\\f$, stored in BSR\n  storage format. The following example computes the incomplete LU factorization\n  \\f$M \\approx LU\\f$ and solves the preconditioned system \\f$My = x\\f$.\n  \\code{.c}\n      // Create rocSPARSE handle\n      rocsparse_handle handle;\n      rocsparse_create_handle(&handle);\n\n      // Create matrix descriptor for M\n      rocsparse_mat_descr descr_M;\n      rocsparse_create_mat_descr(&descr_M);\n\n      // Create matrix descriptor for L\n      rocsparse_mat_descr descr_L;\n      rocsparse_create_mat_descr(&descr_L);\n      rocsparse_set_mat_fill_mode(descr_L, rocsparse_fill_mode_lower);\n      rocsparse_set_mat_diag_type(descr_L, rocsparse_diag_type_unit);\n\n      // Create matrix descriptor for U\n      rocsparse_mat_descr descr_U;\n      rocsparse_create_mat_descr(&descr_U);\n      rocsparse_set_mat_fill_mode(descr_U, rocsparse_fill_mode_upper);\n      rocsparse_set_mat_diag_type(descr_U, rocsparse_diag_type_non_unit);\n\n      // Create matrix info structure\n      rocsparse_mat_info info;\n      rocsparse_create_mat_info(&info);\n\n      // Obtain required buffer size\n      size_t buffer_size_M;\n      size_t buffer_size_L;\n      size_t buffer_size_U;\n      rocsparse_dbsrilu0_buffer_size(handle,\n                                     rocsparse_direction_row,\n                                     mb,\n                                     nnzb,\n                                     descr_M,\n                                     bsr_val,\n                                     bsr_row_ptr,\n                                     bsr_col_ind,\n                                     block_dim,\n                                     info,\n                                     &buffer_size_M);\n      rocsparse_dbsrsv_buffer_size(handle,\n                                   rocsparse_direction_row,\n                                   rocsparse_operation_none,\n                                   mb,\n                                   nnzb,\n                                   descr_L,\n                                   bsr_val,\n                                   bsr_row_ptr,\n                                   bsr_col_ind,\n                                   block_dim,\n                                   info,\n                                   &buffer_size_L);\n      rocsparse_dbsrsv_buffer_size(handle,\n                                   rocsparse_direction_row,\n                                   rocsparse_operation_none,\n                                   mb,\n                                   nnzb,\n                                   descr_U,\n                                   bsr_val,\n                                   bsr_row_ptr,\n                                   bsr_col_ind,\n                                   block_dim,\n                                   info,\n                                   &buffer_size_U);\n\n      size_t buffer_size = max(buffer_size_M, max(buffer_size_L, buffer_size_U));\n\n      // Allocate temporary buffer\n      void* temp_buffer;\n      hipMalloc(&temp_buffer, buffer_size);\n\n      // Perform analysis steps, using rocsparse_analysis_policy_reuse to improve\n      // computation performance\n      rocsparse_dbsrilu0_analysis(handle,\n                                  rocsparse_direction_row,\n                                  mb,\n                                  nnzb,\n                                  descr_M,\n                                  bsr_val,\n                                  bsr_row_ptr,\n                                  bsr_col_ind,\n                                  block_dim,\n                                  info,\n                                  rocsparse_analysis_policy_reuse,\n                                  rocsparse_solve_policy_auto,\n                                  temp_buffer);\n      rocsparse_dbsrsv_analysis(handle,\n                                rocsparse_direction_row,\n                                rocsparse_operation_none,\n                                mb,\n                                nnzb,\n                                descr_L,\n                                bsr_val,\n                                bsr_row_ptr,\n                                bsr_col_ind,\n                                block_dim,\n                                info,\n                                rocsparse_analysis_policy_reuse,\n                                rocsparse_solve_policy_auto,\n                                temp_buffer);\n      rocsparse_dbsrsv_analysis(handle,\n                                rocsparse_direction_row,\n                                rocsparse_operation_none,\n                                mb,\n                                nnzb,\n                                descr_U,\n                                bsr_val,\n                                bsr_row_ptr,\n                                bsr_col_ind,\n                                block_dim,\n                                info,\n                                rocsparse_analysis_policy_reuse,\n                                rocsparse_solve_policy_auto,\n                                temp_buffer);\n\n      // Check for zero pivot\n      rocsparse_int position;\n      if(rocsparse_status_zero_pivot == rocsparse_bsrilu0_zero_pivot(handle,\n                                                                    info,\n                                                                    &position))\n      {\n          printf(\"A has structural zero at A(%d,%d)\\n\", position, position);\n      }\n\n      // Compute incomplete LU factorization M = LU\n      rocsparse_dbsrilu0(handle,\n                         rocsparse_direction_row,\n                         mb,\n                         nnzb,\n                         descr_M,\n                         bsr_val,\n                         bsr_row_ptr,\n                         bsr_col_ind,\n                         block_dim,\n                         info,\n                         rocsparse_solve_policy_auto,\n                         temp_buffer);\n\n      // Check for zero pivot\n      if(rocsparse_status_zero_pivot == rocsparse_bsrilu0_zero_pivot(handle,\n                                                                     info,\n                                                                     &position))\n      {\n          printf(\"L has structural and/or numerical zero at L(%d,%d)\\n\",\n                 position,\n                 position);\n      }\n\n      // Solve Lz = x\n      rocsparse_dbsrsv_solve(handle,\n                             rocsparse_direction_row,\n                             rocsparse_operation_none,\n                             mb,\n                             nnzb,\n                             &alpha,\n                             descr_L,\n                             bsr_val,\n                             bsr_row_ptr,\n                             bsr_col_ind,\n                             block_dim,\n                             info,\n                             x,\n                             z,\n                             rocsparse_solve_policy_auto,\n                             temp_buffer);\n\n      // Solve Uy = z\n      rocsparse_dbsrsv_solve(handle,\n                             rocsparse_direction_row,\n                             rocsparse_operation_none,\n                             mb,\n                             nnzb,\n                             &alpha,\n                             descr_U,\n                             bsr_val,\n                             bsr_row_ptr,\n                             bsr_col_ind,\n                             block_dim,\n                             info,\n                             z,\n                             y,\n                             rocsparse_solve_policy_auto,\n                             temp_buffer);\n\n      // Clean up\n      hipFree(temp_buffer);\n      rocsparse_destroy_mat_info(info);\n      rocsparse_destroy_mat_descr(descr_M);\n      rocsparse_destroy_mat_descr(descr_L);\n      rocsparse_destroy_mat_descr(descr_U);\n      rocsparse_destroy_handle(handle);\n  \\endcode\n/\n/**@{"]
     pub fn rocsparse_sbsrilu0(
         handle: rocsparse_handle,
         dir: rocsparse_direction,
@@ -6805,6 +8989,33 @@ extern "C" {
         position: *mut rocsparse_int,
     ) -> rocsparse_status;
 }
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup precond_module\n  \\brief Incomplete Cholesky factorization with 0 fill-ins and no pivoting using CSR\n  storage format\n\n  \\details\n  \\p rocsparse_csric0_singular_pivot() returns the position of a\n  numerical singular pivot (where \\f$|L_{j,j}| \\leq \\text{tolerance}\\f$)\n  that has been found during rocsparse_scsric0() or\n  rocsparse_dcsric0() computation. The first singular pivot \\f$j\\f$ at \\f$L_{j,j}\\f$\n  is stored in \\p position, using same index base as the CSR matrix.\n\n  \\p position can be in host or device memory. If no singular pivot has been found,\n  \\p position is set to -1.\n\n  \\note \\p rocsparse_csric0_singular_pivot() is a blocking function. It might influence\n  performance negatively.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[inout]\n  position    pointer to singular pivot \\f$k\\f$, can be in host or device memory.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info or \\p position pointer is\n              invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred."]
+    pub fn rocsparse_csric0_singular_pivot(
+        handle: rocsparse_handle,
+        info: rocsparse_mat_info,
+        position: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup precond_module\n  \\brief Incomplete Cholesky factorization with 0 fill-ins and no pivoting using CSR\n  storage format\n\n  \\details\n  \\p rocsparse_csric0_set_tolerance()  sets the numerical tolerance for detecting a\n  numerical singular pivot (where \\f$|L_{j,j}|  \\leq \\text{tolerance}\\f$)\n  that might be found during rocsparse_scsric0() or\n  rocsparse_dcsric0()  computation.\n\n\n  \\note \\p rocsparse_csric0_set_tolerance() is a blocking function. It might influence\n  performance negatively.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[in]\n  tolerance    tolerance for detecting singular pivot (\\f$|L_{j,j}|  \\leq \\text{tolerance}\\f$)\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer if \\p info tolerance pointer is\n              invalid"]
+    pub fn rocsparse_csric0_set_tolerance(
+        handle: rocsparse_handle,
+        info: rocsparse_mat_info,
+        tolerance: f64,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup precond_module\n  \\brief Incomplete Cholesky factorization with 0 fill-ins and no pivoting using CSR\n  storage format\n\n  \\details\n  \\p rocsparse_csric0_get_tolerance() returns the numerical tolerance for detecting a\n  numerical singular pivot (where \\f$|L_{j,j}|  \\leq \\text{tolerance}\\f$)\n  that might be found during rocsparse_scsric0() or\n  rocsparse_dcsric0() computation.\n\n\n  \\note \\p rocsparse_csric0_get_tolerance() is a blocking function. It might influence\n  performance negatively.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[out]\n  tolerance    obtain tolerance for detecting singular pivot (\\f$|L_{j,j}|  \\leq \\text{tolerance}\\f$)\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer if \\p info or \\p tolerance pointer is\n              invalid"]
+    pub fn rocsparse_csric0_get_tolerance(
+        handle: rocsparse_handle,
+        info: rocsparse_mat_info,
+        tolerance: *mut f64,
+    ) -> rocsparse_status;
+}
 extern "C" {
     #[must_use]
     #[doc = " \\ingroup precond_module\n  \\brief Incomplete Cholesky factorization with 0 fill-ins and no pivoting using CSR\n  storage format\n\n  \\details\n  \\p rocsparse_csric0_buffer_size returns the size of the temporary storage buffer\n  that is required by rocsparse_scsric0_analysis(), rocsparse_dcsric0_analysis(),\n  rocsparse_scsric0() and rocsparse_dcsric0(). The temporary storage buffer must\n  be allocated by the user. The size of the temporary storage buffer is identical to\n  the size returned by rocsparse_scsrsv_buffer_size(), rocsparse_dcsrsv_buffer_size(),\n  rocsparse_scsrilu0_buffer_size() and rocsparse_dcsrilu0_buffer_size() if the matrix\n  sparsity pattern is identical. The user allocated buffer can thus be shared between\n  subsequent calls to those functions.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[out]\n  info        structure that holds the information collected during the analysis step.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_scsric0_analysis(), rocsparse_dcsric0_analysis(),\n              rocsparse_scsric0() and rocsparse_dcsric0().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p csr_val, \\p csr_row_ptr,\n              \\p csr_col_ind, \\p info or \\p buffer_size pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans != \\ref rocsparse_operation_none or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n/\n/**@{"]
@@ -6937,7 +9148,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup precond_module\n  \\brief Incomplete Cholesky factorization with 0 fill-ins and no pivoting using CSR\n  storage format\n\n  \\details\n  \\p rocsparse_csric0 computes the incomplete Cholesky factorization with 0 fill-ins\n  and no pivoting of a sparse \\f$m \\times m\\f$ CSR matrix \\f$A\\f$, such that\n  \\f[\n    A \\approx LL^T\n  \\f]\n\n  \\p rocsparse_csric0 requires a user allocated temporary buffer. Its size is returned\n  by rocsparse_scsric0_buffer_size() or rocsparse_dcsric0_buffer_size(). Furthermore,\n  analysis meta data is required. It can be obtained by rocsparse_scsric0_analysis()\n  or rocsparse_dcsric0_analysis(). \\p rocsparse_csric0 reports the first zero pivot\n  (either numerical or structural zero). The zero pivot status can be obtained by\n  calling rocsparse_csric0_zero_pivot().\n\n  \\note\n  The sparse CSR matrix has to be sorted. This can be achieved by calling\n  rocsparse_csrsort().\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix.\n  @param[inout]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start\n              of every row of the sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[in]\n  policy      \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p csr_val, \\p csr_row_ptr\n              or \\p csr_col_ind pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans != \\ref rocsparse_operation_none or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  Consider the sparse \\f$m \\times m\\f$ matrix \\f$A\\f$, stored in CSR\n  storage format. The following example computes the incomplete Cholesky factorization\n  \\f$M \\approx LL^T\\f$ and solves the preconditioned system \\f$My = x\\f$.\n  \\code{.c}\n      // Create rocSPARSE handle\n      rocsparse_handle handle;\n      rocsparse_create_handle(&handle);\n\n      // Create matrix descriptor for M\n      rocsparse_mat_descr descr_M;\n      rocsparse_create_mat_descr(&descr_M);\n\n      // Create matrix descriptor for L\n      rocsparse_mat_descr descr_L;\n      rocsparse_create_mat_descr(&descr_L);\n      rocsparse_set_mat_fill_mode(descr_L, rocsparse_fill_mode_lower);\n      rocsparse_set_mat_diag_type(descr_L, rocsparse_diag_type_unit);\n\n      // Create matrix descriptor for L'\n      rocsparse_mat_descr descr_Lt;\n      rocsparse_create_mat_descr(&descr_Lt);\n      rocsparse_set_mat_fill_mode(descr_Lt, rocsparse_fill_mode_upper);\n      rocsparse_set_mat_diag_type(descr_Lt, rocsparse_diag_type_non_unit);\n\n      // Create matrix info structure\n      rocsparse_mat_info info;\n      rocsparse_create_mat_info(&info);\n\n      // Obtain required buffer size\n      size_t buffer_size_M;\n      size_t buffer_size_L;\n      size_t buffer_size_Lt;\n      rocsparse_dcsric0_buffer_size(handle,\n                                    m,\n                                    nnz,\n                                    descr_M,\n                                    csr_val,\n                                    csr_row_ptr,\n                                    csr_col_ind,\n                                    info,\n                                    &buffer_size_M);\n      rocsparse_dcsrsv_buffer_size(handle,\n                                   rocsparse_operation_none,\n                                   m,\n                                   nnz,\n                                   descr_L,\n                                   csr_val,\n                                   csr_row_ptr,\n                                   csr_col_ind,\n                                   info,\n                                   &buffer_size_L);\n      rocsparse_dcsrsv_buffer_size(handle,\n                                   rocsparse_operation_transpose,\n                                   m,\n                                   nnz,\n                                   descr_Lt,\n                                   csr_val,\n                                   csr_row_ptr,\n                                   csr_col_ind,\n                                   info,\n                                   &buffer_size_Lt);\n\n      size_t buffer_size = max(buffer_size_M, max(buffer_size_L, buffer_size_Lt));\n\n      // Allocate temporary buffer\n      void* temp_buffer;\n      hipMalloc(&temp_buffer, buffer_size);\n\n      // Perform analysis steps, using rocsparse_analysis_policy_reuse to improve\n      // computation performance\n      rocsparse_dcsric0_analysis(handle,\n                                 m,\n                                 nnz,\n                                 descr_M,\n                                 csr_val,\n                                 csr_row_ptr,\n                                 csr_col_ind,\n                                 info,\n                                 rocsparse_analysis_policy_reuse,\n                                 rocsparse_solve_policy_auto,\n                                 temp_buffer);\n      rocsparse_dcsrsv_analysis(handle,\n                                rocsparse_operation_none,\n                                m,\n                                nnz,\n                                descr_L,\n                                csr_val,\n                                csr_row_ptr,\n                                csr_col_ind,\n                                info,\n                                rocsparse_analysis_policy_reuse,\n                                rocsparse_solve_policy_auto,\n                                temp_buffer);\n      rocsparse_dcsrsv_analysis(handle,\n                                rocsparse_operation_transpose,\n                                m,\n                                nnz,\n                                descr_Lt,\n                                csr_val,\n                                csr_row_ptr,\n                                csr_col_ind,\n                                info,\n                                rocsparse_analysis_policy_reuse,\n                                rocsparse_solve_policy_auto,\n                                temp_buffer);\n\n      // Check for zero pivot\n      rocsparse_int position;\n      if(rocsparse_status_zero_pivot == rocsparse_csric0_zero_pivot(handle,\n                                                                    info,\n                                                                    &position))\n      {\n          printf(\"A has structural zero at A(%d,%d)\\n\", position, position);\n      }\n\n      // Compute incomplete Cholesky factorization M = LL'\n      rocsparse_dcsric0(handle,\n                        m,\n                        nnz,\n                        descr_M,\n                        csr_val,\n                        csr_row_ptr,\n                        csr_col_ind,\n                        info,\n                        rocsparse_solve_policy_auto,\n                        temp_buffer);\n\n      // Check for zero pivot\n      if(rocsparse_status_zero_pivot == rocsparse_csric0_zero_pivot(handle,\n                                                                    info,\n                                                                    &position))\n      {\n          printf(\"L has structural and/or numerical zero at L(%d,%d)\\n\",\n                 position,\n                 position);\n      }\n\n      // Solve Lz = x\n      rocsparse_dcsrsv_solve(handle,\n                             rocsparse_operation_none,\n                             m,\n                             nnz,\n                             &alpha,\n                             descr_L,\n                             csr_val,\n                             csr_row_ptr,\n                             csr_col_ind,\n                             info,\n                             x,\n                             z,\n                             rocsparse_solve_policy_auto,\n                             temp_buffer);\n\n      // Solve L'y = z\n      rocsparse_dcsrsv_solve(handle,\n                             rocsparse_operation_transpose,\n                             m,\n                             nnz,\n                             &alpha,\n                             descr_Lt,\n                             csr_val,\n                             csr_row_ptr,\n                             csr_col_ind,\n                             info,\n                             z,\n                             y,\n                             rocsparse_solve_policy_auto,\n                             temp_buffer);\n\n      // Clean up\n      hipFree(temp_buffer);\n      rocsparse_destroy_mat_info(info);\n      rocsparse_destroy_mat_descr(descr_M);\n      rocsparse_destroy_mat_descr(descr_L);\n      rocsparse_destroy_mat_descr(descr_Lt);\n      rocsparse_destroy_handle(handle);\n  \\endcode\n/\n/**@{"]
+    #[doc = " \\ingroup precond_module\n  \\brief Incomplete Cholesky factorization with 0 fill-ins and no pivoting using CSR\n  storage format\n\n  \\details\n  \\p rocsparse_csric0 computes the incomplete Cholesky factorization with 0 fill-ins\n  and no pivoting of a sparse \\f$m \\times m\\f$ CSR matrix \\f$A\\f$, such that\n  \\f[\n    A \\approx LL^T\n  \\f]\n\n  \\p rocsparse_csric0 requires a user allocated temporary buffer. Its size is returned\n  by rocsparse_scsric0_buffer_size() or rocsparse_dcsric0_buffer_size(). Furthermore,\n  analysis meta data is required. It can be obtained by rocsparse_scsric0_analysis()\n  or rocsparse_dcsric0_analysis(). \\p rocsparse_csric0 reports the first zero pivot\n  (either numerical or structural zero). The zero pivot status can be obtained by\n  calling rocsparse_csric0_zero_pivot().\n\n  \\note\n  The sparse CSR matrix has to be sorted. This can be achieved by calling\n  rocsparse_csrsort().\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix.\n  @param[inout]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start\n              of every row of the sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[in]\n  policy      \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p csr_val, \\p csr_row_ptr\n              or \\p csr_col_ind pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans != \\ref rocsparse_operation_none or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  Consider the sparse \\f$m \\times m\\f$ matrix \\f$A\\f$, stored in CSR\n  storage format. The following example computes the incomplete Cholesky factorization\n  \\f$M \\approx LL^T\\f$ and solves the preconditioned system \\f$My = x\\f$.\n  \\code{.c}\n      // Create rocSPARSE handle\n      rocsparse_handle handle;\n      rocsparse_create_handle(&handle);\n\n      // Create matrix descriptor for M\n      rocsparse_mat_descr descr_M;\n      rocsparse_create_mat_descr(&descr_M);\n\n      // Create matrix descriptor for L\n      rocsparse_mat_descr descr_L;\n      rocsparse_create_mat_descr(&descr_L);\n      rocsparse_set_mat_fill_mode(descr_L, rocsparse_fill_mode_lower);\n      rocsparse_set_mat_diag_type(descr_L, rocsparse_diag_type_unit);\n\n      // Create matrix descriptor for L'\n      rocsparse_mat_descr descr_Lt;\n      rocsparse_create_mat_descr(&descr_Lt);\n      rocsparse_set_mat_fill_mode(descr_Lt, rocsparse_fill_mode_upper);\n      rocsparse_set_mat_diag_type(descr_Lt, rocsparse_diag_type_non_unit);\n\n      // Create matrix info structure\n      rocsparse_mat_info info;\n      rocsparse_create_mat_info(&info);\n\n      // Obtain required buffer size\n      size_t buffer_size_M;\n      size_t buffer_size_L;\n      size_t buffer_size_Lt;\n      rocsparse_dcsric0_buffer_size(handle,\n                                    m,\n                                    nnz,\n                                    descr_M,\n                                    csr_val,\n                                    csr_row_ptr,\n                                    csr_col_ind,\n                                    info,\n                                    &buffer_size_M);\n      rocsparse_dcsrsv_buffer_size(handle,\n                                   rocsparse_operation_none,\n                                   m,\n                                   nnz,\n                                   descr_L,\n                                   csr_val,\n                                   csr_row_ptr,\n                                   csr_col_ind,\n                                   info,\n                                   &buffer_size_L);\n      rocsparse_dcsrsv_buffer_size(handle,\n                                   rocsparse_operation_transpose,\n                                   m,\n                                   nnz,\n                                   descr_Lt,\n                                   csr_val,\n                                   csr_row_ptr,\n                                   csr_col_ind,\n                                   info,\n                                   &buffer_size_Lt);\n\n      size_t buffer_size = max(buffer_size_M, max(buffer_size_L, buffer_size_Lt));\n\n      // Allocate temporary buffer\n      void* temp_buffer;\n      hipMalloc(&temp_buffer, buffer_size);\n\n      // Perform analysis steps, using rocsparse_analysis_policy_reuse to improve\n      // computation performance\n      rocsparse_dcsric0_analysis(handle,\n                                 m,\n                                 nnz,\n                                 descr_M,\n                                 csr_val,\n                                 csr_row_ptr,\n                                 csr_col_ind,\n                                 info,\n                                 rocsparse_analysis_policy_reuse,\n                                 rocsparse_solve_policy_auto,\n                                 temp_buffer);\n      rocsparse_dcsrsv_analysis(handle,\n                                rocsparse_operation_none,\n                                m,\n                                nnz,\n                                descr_L,\n                                csr_val,\n                                csr_row_ptr,\n                                csr_col_ind,\n                                info,\n                                rocsparse_analysis_policy_reuse,\n                                rocsparse_solve_policy_auto,\n                                temp_buffer);\n      rocsparse_dcsrsv_analysis(handle,\n                                rocsparse_operation_transpose,\n                                m,\n                                nnz,\n                                descr_Lt,\n                                csr_val,\n                                csr_row_ptr,\n                                csr_col_ind,\n                                info,\n                                rocsparse_analysis_policy_reuse,\n                                rocsparse_solve_policy_auto,\n                                temp_buffer);\n\n      // Check for zero pivot\n      rocsparse_int position;\n      if(rocsparse_status_zero_pivot == rocsparse_csric0_zero_pivot(handle,\n                                                                    info,\n                                                                    &position))\n      {\n          printf(\"A has structural zero at A(%d,%d)\\n\", position, position);\n      }\n\n      // Compute incomplete Cholesky factorization M = LL'\n      rocsparse_dcsric0(handle,\n                        m,\n                        nnz,\n                        descr_M,\n                        csr_val,\n                        csr_row_ptr,\n                        csr_col_ind,\n                        info,\n                        rocsparse_solve_policy_auto,\n                        temp_buffer);\n\n      // Check for zero pivot\n      if(rocsparse_status_zero_pivot == rocsparse_csric0_zero_pivot(handle,\n                                                                    info,\n                                                                    &position))\n      {\n          printf(\"L has structural and/or numerical zero at L(%d,%d)\\n\",\n                 position,\n                 position);\n      }\n\n      // Solve Lz = x\n      rocsparse_dcsrsv_solve(handle,\n                             rocsparse_operation_none,\n                             m,\n                             nnz,\n                             &alpha,\n                             descr_L,\n                             csr_val,\n                             csr_row_ptr,\n                             csr_col_ind,\n                             info,\n                             x,\n                             z,\n                             rocsparse_solve_policy_auto,\n                             temp_buffer);\n\n      // Solve L'y = z\n      rocsparse_dcsrsv_solve(handle,\n                             rocsparse_operation_transpose,\n                             m,\n                             nnz,\n                             &alpha,\n                             descr_Lt,\n                             csr_val,\n                             csr_row_ptr,\n                             csr_col_ind,\n                             info,\n                             z,\n                             y,\n                             rocsparse_solve_policy_auto,\n                             temp_buffer);\n\n      // Clean up\n      hipFree(temp_buffer);\n      rocsparse_destroy_mat_info(info);\n      rocsparse_destroy_mat_descr(descr_M);\n      rocsparse_destroy_mat_descr(descr_L);\n      rocsparse_destroy_mat_descr(descr_Lt);\n      rocsparse_destroy_handle(handle);\n  \\endcode\n/\n/**@{"]
     pub fn rocsparse_scsric0(
         handle: rocsparse_handle,
         m: rocsparse_int,
@@ -7005,6 +9216,33 @@ extern "C" {
         position: *mut rocsparse_int,
     ) -> rocsparse_status;
 }
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup precond_module\n  \\brief Incomplete LU factorization with 0 fill-ins and no pivoting using CSR\n  storage format\n\n  \\details\n  \\p rocsparse_csrilu0_set_tolerance() sets the numerical tolerance for detecting a\n  near numerical zero entry during rocsparse_scsrilu0(),\n  rocsparse_dcsrilu0(), rocsparse_ccsrilu0() or rocsparse_zcsrilu0() computation. The\n  first singular pivot \\f$j\\f$ at \\f$|A_{j,j}| \\leq \\text{tolerance}\\f$.\n\n\n  \\note \\p rocsparse_csrilu0_set_tolerance() is a blocking function. It might influence\n  performance negatively.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[in]\n  tolerance    tolerance value to determine singular pivot \\f$|A_{j,j}| \\leq \\text{tolerance}\\f$,\n               where variable tolerance is in host memory.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred."]
+    pub fn rocsparse_csrilu0_set_tolerance(
+        handle: rocsparse_handle,
+        info: rocsparse_mat_info,
+        tolerance: f64,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup precond_module\n  \\brief Incomplete LU factorization with 0 fill-ins and no pivoting using CSR\n  storage format\n\n  \\details\n  \\p rocsparse_csrilu0_get_tolerance() returns the numerical tolerance for detecing\n  a near numerical zero entry during rocsparse_scsrilu0(),\n  rocsparse_dcsrilu0(), rocsparse_ccsrilu0() or rocsparse_zcsrilu0() computation. The\n  first singular pivot \\f$j\\f$ at \\f$|A_{j,j}| \\leq \\text{tolerance}\\f$.\n\n\n  \\note \\p rocsparse_csrilu0_get_tolerance() is a blocking function. It might influence\n  performance negatively.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[out]\n  tolerance   obtain tolerance value to determine singular pivot \\f$|A_{j,j}| \\leq \\text{tolerance}\\f$,\n              where variable tolerance is in host memory.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info or tolerance pointer is invalid..\n  \\retval     rocsparse_status_internal_error an internal error occurred."]
+    pub fn rocsparse_csrilu0_get_tolerance(
+        handle: rocsparse_handle,
+        info: rocsparse_mat_info,
+        tolerance: *mut f64,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup precond_module\n  \\brief Incomplete LU factorization with 0 fill-ins and no pivoting using CSR\n  storage format\n\n  \\details\n  \\p rocsparse_csrilu0_singular_pivot() returns the position of a\n  near numerical zero entry that has been found during rocsparse_scsrilu0(),\n  rocsparse_dcsrilu0(), rocsparse_ccsrilu0() or rocsparse_zcsrilu0() computation. The\n  first singular pivot \\f$j\\f$ at \\f$|A_{j,j}| \\leq \\text{tolerance}\\f$  is stored in \\p position,\n  using same index base as the CSR matrix.\n\n  \\p position can be in host or device memory. If no singular pivot has been found,\n  \\p position is set to -1.\n\n  \\note \\p rocsparse_csrilu0_singular_pivot() is a blocking function. It might influence\n  performance negatively.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[inout]\n  position    pointer to singular pivot \\f$j\\f$, can be in host or device memory.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info or \\p position pointer is\n              invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred."]
+    pub fn rocsparse_csrilu0_singular_pivot(
+        handle: rocsparse_handle,
+        info: rocsparse_mat_info,
+        position: *mut rocsparse_int,
+    ) -> rocsparse_status;
+}
 extern "C" {
     #[must_use]
     #[doc = " \\ingroup precond_module\n  \\brief Incomplete LU factorization with 0 fill-ins and no pivoting using CSR storage\n  format\n\n  \\details\n  \\p rocsparse_csrilu0_numeric_boost enables the user to replace a numerical value in\n  an incomplete LU factorization. \\p tol is used to determine whether a numerical value\n  is replaced by \\p boost_val, such that \\f$A_{j,j} = \\text{boost_val}\\f$ if\n  \\f$\\text{tol} \\ge \\left|A_{j,j}\\right|\\f$.\n\n  \\note The boost value is enabled by setting \\p enable_boost to 1 or disabled by\n  setting \\p enable_boost to 0.\n\n  \\note \\p tol and \\p boost_val can be in host or device memory.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  info            structure that holds the information collected during the analysis step.\n  @param[in]\n  enable_boost    enable/disable numeric boost.\n  @param[in]\n  boost_tol       tolerance to determine whether a numerical value is replaced or not.\n  @param[in]\n  boost_val       boost value to replace a numerical value.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p info, \\p tol or \\p boost_val pointer\n              is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n/\n/**@{"]
@@ -7198,7 +9436,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup precond_module\n  \\brief Incomplete LU factorization with 0 fill-ins and no pivoting using CSR\n  storage format\n\n  \\details\n  \\p rocsparse_csrilu0 computes the incomplete LU factorization with 0 fill-ins and no\n  pivoting of a sparse \\f$m \\times m\\f$ CSR matrix \\f$A\\f$, such that\n  \\f[\n    A \\approx LU\n  \\f]\n\n  \\p rocsparse_csrilu0 requires a user allocated temporary buffer. Its size is returned\n  by rocsparse_scsrilu0_buffer_size(), rocsparse_dcsrilu0_buffer_size(),\n  rocsparse_ccsrilu0_buffer_size() or rocsparse_zcsrilu0_buffer_size(). Furthermore,\n  analysis meta data is required. It can be obtained by rocsparse_scsrilu0_analysis(),\n  rocsparse_dcsrilu0_analysis(), rocsparse_ccsrilu0_analysis() or\n  rocsparse_zcsrilu0_analysis(). \\p rocsparse_csrilu0 reports the first zero pivot\n  (either numerical or structural zero). The zero pivot status can be obtained by\n  calling rocsparse_csrilu0_zero_pivot().\n\n  \\note\n  The sparse CSR matrix has to be sorted. This can be achieved by calling\n  rocsparse_csrsort().\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix.\n  @param[inout]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start\n              of every row of the sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[in]\n  policy      \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p csr_val, \\p csr_row_ptr\n              or \\p csr_col_ind pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans != \\ref rocsparse_operation_none or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  Consider the sparse \\f$m \\times m\\f$ matrix \\f$A\\f$, stored in CSR\n  storage format. The following example computes the incomplete LU factorization\n  \\f$M \\approx LU\\f$ and solves the preconditioned system \\f$My = x\\f$.\n  \\code{.c}\n      // Create rocSPARSE handle\n      rocsparse_handle handle;\n      rocsparse_create_handle(&handle);\n\n      // Create matrix descriptor for M\n      rocsparse_mat_descr descr_M;\n      rocsparse_create_mat_descr(&descr_M);\n\n      // Create matrix descriptor for L\n      rocsparse_mat_descr descr_L;\n      rocsparse_create_mat_descr(&descr_L);\n      rocsparse_set_mat_fill_mode(descr_L, rocsparse_fill_mode_lower);\n      rocsparse_set_mat_diag_type(descr_L, rocsparse_diag_type_unit);\n\n      // Create matrix descriptor for U\n      rocsparse_mat_descr descr_U;\n      rocsparse_create_mat_descr(&descr_U);\n      rocsparse_set_mat_fill_mode(descr_U, rocsparse_fill_mode_upper);\n      rocsparse_set_mat_diag_type(descr_U, rocsparse_diag_type_non_unit);\n\n      // Create matrix info structure\n      rocsparse_mat_info info;\n      rocsparse_create_mat_info(&info);\n\n      // Obtain required buffer size\n      size_t buffer_size_M;\n      size_t buffer_size_L;\n      size_t buffer_size_U;\n      rocsparse_dcsrilu0_buffer_size(handle,\n                                    m,\n                                    nnz,\n                                    descr_M,\n                                    csr_val,\n                                    csr_row_ptr,\n                                    csr_col_ind,\n                                    info,\n                                    &buffer_size_M);\n      rocsparse_dcsrsv_buffer_size(handle,\n                                   rocsparse_operation_none,\n                                   m,\n                                   nnz,\n                                   descr_L,\n                                   csr_val,\n                                   csr_row_ptr,\n                                   csr_col_ind,\n                                   info,\n                                   &buffer_size_L);\n      rocsparse_dcsrsv_buffer_size(handle,\n                                   rocsparse_operation_none,\n                                   m,\n                                   nnz,\n                                   descr_U,\n                                   csr_val,\n                                   csr_row_ptr,\n                                   csr_col_ind,\n                                   info,\n                                   &buffer_size_U);\n\n      size_t buffer_size = max(buffer_size_M, max(buffer_size_L, buffer_size_U));\n\n      // Allocate temporary buffer\n      void* temp_buffer;\n      hipMalloc(&temp_buffer, buffer_size);\n\n      // Perform analysis steps, using rocsparse_analysis_policy_reuse to improve\n      // computation performance\n      rocsparse_dcsrilu0_analysis(handle,\n                                  m,\n                                  nnz,\n                                  descr_M,\n                                  csr_val,\n                                  csr_row_ptr,\n                                  csr_col_ind,\n                                  info,\n                                  rocsparse_analysis_policy_reuse,\n                                  rocsparse_solve_policy_auto,\n                                  temp_buffer);\n      rocsparse_dcsrsv_analysis(handle,\n                                rocsparse_operation_none,\n                                m,\n                                nnz,\n                                descr_L,\n                                csr_val,\n                                csr_row_ptr,\n                                csr_col_ind,\n                                info,\n                                rocsparse_analysis_policy_reuse,\n                                rocsparse_solve_policy_auto,\n                                temp_buffer);\n      rocsparse_dcsrsv_analysis(handle,\n                                rocsparse_operation_none,\n                                m,\n                                nnz,\n                                descr_U,\n                                csr_val,\n                                csr_row_ptr,\n                                csr_col_ind,\n                                info,\n                                rocsparse_analysis_policy_reuse,\n                                rocsparse_solve_policy_auto,\n                                temp_buffer);\n\n      // Check for zero pivot\n      rocsparse_int position;\n      if(rocsparse_status_zero_pivot == rocsparse_csrilu0_zero_pivot(handle,\n                                                                     info,\n                                                                     &position))\n      {\n          printf(\"A has structural zero at A(%d,%d)\\n\", position, position);\n      }\n\n      // Compute incomplete LU factorization\n      rocsparse_dcsrilu0(handle,\n                         m,\n                         nnz,\n                         descr_M,\n                         csr_val,\n                         csr_row_ptr,\n                         csr_col_ind,\n                         info,\n                         rocsparse_solve_policy_auto,\n                         temp_buffer);\n\n      // Check for zero pivot\n      if(rocsparse_status_zero_pivot == rocsparse_csrilu0_zero_pivot(handle,\n                                                                     info,\n                                                                     &position))\n      {\n          printf(\"U has structural and/or numerical zero at U(%d,%d)\\n\",\n                 position,\n                 position);\n      }\n\n      // Solve Lz = x\n      rocsparse_dcsrsv_solve(handle,\n                             rocsparse_operation_none,\n                             m,\n                             nnz,\n                             &alpha,\n                             descr_L,\n                             csr_val,\n                             csr_row_ptr,\n                             csr_col_ind,\n                             info,\n                             x,\n                             z,\n                             rocsparse_solve_policy_auto,\n                             temp_buffer);\n\n      // Solve Uy = z\n      rocsparse_dcsrsv_solve(handle,\n                             rocsparse_operation_none,\n                             m,\n                             nnz,\n                             &alpha,\n                             descr_U,\n                             csr_val,\n                             csr_row_ptr,\n                             csr_col_ind,\n                             info,\n                             z,\n                             y,\n                             rocsparse_solve_policy_auto,\n                             temp_buffer);\n\n      // Clean up\n      hipFree(temp_buffer);\n      rocsparse_destroy_mat_info(info);\n      rocsparse_destroy_mat_descr(descr_M);\n      rocsparse_destroy_mat_descr(descr_L);\n      rocsparse_destroy_mat_descr(descr_U);\n      rocsparse_destroy_handle(handle);\n  \\endcode\n/\n/**@{"]
+    #[doc = " \\ingroup precond_module\n  \\brief Incomplete LU factorization with 0 fill-ins and no pivoting using CSR\n  storage format\n\n  \\details\n  \\p rocsparse_csrilu0 computes the incomplete LU factorization with 0 fill-ins and no\n  pivoting of a sparse \\f$m \\times m\\f$ CSR matrix \\f$A\\f$, such that\n  \\f[\n    A \\approx LU\n  \\f]\n\n  \\p rocsparse_csrilu0 requires a user allocated temporary buffer. Its size is returned\n  by rocsparse_scsrilu0_buffer_size(), rocsparse_dcsrilu0_buffer_size(),\n  rocsparse_ccsrilu0_buffer_size() or rocsparse_zcsrilu0_buffer_size(). Furthermore,\n  analysis meta data is required. It can be obtained by rocsparse_scsrilu0_analysis(),\n  rocsparse_dcsrilu0_analysis(), rocsparse_ccsrilu0_analysis() or\n  rocsparse_zcsrilu0_analysis(). \\p rocsparse_csrilu0 reports the first zero pivot\n  (either numerical or structural zero). The zero pivot status can be obtained by\n  calling rocsparse_csrilu0_zero_pivot().\n\n  \\note\n  The sparse CSR matrix has to be sorted. This can be achieved by calling\n  rocsparse_csrsort().\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  descr       descriptor of the sparse CSR matrix.\n  @param[inout]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start\n              of every row of the sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[in]\n  info        structure that holds the information collected during the analysis step.\n  @param[in]\n  policy      \\ref rocsparse_solve_policy_auto.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p csr_val, \\p csr_row_ptr\n              or \\p csr_col_ind pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\p trans != \\ref rocsparse_operation_none or\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  Consider the sparse \\f$m \\times m\\f$ matrix \\f$A\\f$, stored in CSR\n  storage format. The following example computes the incomplete LU factorization\n  \\f$M \\approx LU\\f$ and solves the preconditioned system \\f$My = x\\f$.\n  \\code{.c}\n      // Create rocSPARSE handle\n      rocsparse_handle handle;\n      rocsparse_create_handle(&handle);\n\n      // Create matrix descriptor for M\n      rocsparse_mat_descr descr_M;\n      rocsparse_create_mat_descr(&descr_M);\n\n      // Create matrix descriptor for L\n      rocsparse_mat_descr descr_L;\n      rocsparse_create_mat_descr(&descr_L);\n      rocsparse_set_mat_fill_mode(descr_L, rocsparse_fill_mode_lower);\n      rocsparse_set_mat_diag_type(descr_L, rocsparse_diag_type_unit);\n\n      // Create matrix descriptor for U\n      rocsparse_mat_descr descr_U;\n      rocsparse_create_mat_descr(&descr_U);\n      rocsparse_set_mat_fill_mode(descr_U, rocsparse_fill_mode_upper);\n      rocsparse_set_mat_diag_type(descr_U, rocsparse_diag_type_non_unit);\n\n      // Create matrix info structure\n      rocsparse_mat_info info;\n      rocsparse_create_mat_info(&info);\n\n      // Obtain required buffer size\n      size_t buffer_size_M;\n      size_t buffer_size_L;\n      size_t buffer_size_U;\n      rocsparse_dcsrilu0_buffer_size(handle,\n                                    m,\n                                    nnz,\n                                    descr_M,\n                                    csr_val,\n                                    csr_row_ptr,\n                                    csr_col_ind,\n                                    info,\n                                    &buffer_size_M);\n      rocsparse_dcsrsv_buffer_size(handle,\n                                   rocsparse_operation_none,\n                                   m,\n                                   nnz,\n                                   descr_L,\n                                   csr_val,\n                                   csr_row_ptr,\n                                   csr_col_ind,\n                                   info,\n                                   &buffer_size_L);\n      rocsparse_dcsrsv_buffer_size(handle,\n                                   rocsparse_operation_none,\n                                   m,\n                                   nnz,\n                                   descr_U,\n                                   csr_val,\n                                   csr_row_ptr,\n                                   csr_col_ind,\n                                   info,\n                                   &buffer_size_U);\n\n      size_t buffer_size = max(buffer_size_M, max(buffer_size_L, buffer_size_U));\n\n      // Allocate temporary buffer\n      void* temp_buffer;\n      hipMalloc(&temp_buffer, buffer_size);\n\n      // Perform analysis steps, using rocsparse_analysis_policy_reuse to improve\n      // computation performance\n      rocsparse_dcsrilu0_analysis(handle,\n                                  m,\n                                  nnz,\n                                  descr_M,\n                                  csr_val,\n                                  csr_row_ptr,\n                                  csr_col_ind,\n                                  info,\n                                  rocsparse_analysis_policy_reuse,\n                                  rocsparse_solve_policy_auto,\n                                  temp_buffer);\n      rocsparse_dcsrsv_analysis(handle,\n                                rocsparse_operation_none,\n                                m,\n                                nnz,\n                                descr_L,\n                                csr_val,\n                                csr_row_ptr,\n                                csr_col_ind,\n                                info,\n                                rocsparse_analysis_policy_reuse,\n                                rocsparse_solve_policy_auto,\n                                temp_buffer);\n      rocsparse_dcsrsv_analysis(handle,\n                                rocsparse_operation_none,\n                                m,\n                                nnz,\n                                descr_U,\n                                csr_val,\n                                csr_row_ptr,\n                                csr_col_ind,\n                                info,\n                                rocsparse_analysis_policy_reuse,\n                                rocsparse_solve_policy_auto,\n                                temp_buffer);\n\n      // Check for zero pivot\n      rocsparse_int position;\n      if(rocsparse_status_zero_pivot == rocsparse_csrilu0_zero_pivot(handle,\n                                                                     info,\n                                                                     &position))\n      {\n          printf(\"A has structural zero at A(%d,%d)\\n\", position, position);\n      }\n\n      // Compute incomplete LU factorization\n      rocsparse_dcsrilu0(handle,\n                         m,\n                         nnz,\n                         descr_M,\n                         csr_val,\n                         csr_row_ptr,\n                         csr_col_ind,\n                         info,\n                         rocsparse_solve_policy_auto,\n                         temp_buffer);\n\n      // Check for zero pivot\n      if(rocsparse_status_zero_pivot == rocsparse_csrilu0_zero_pivot(handle,\n                                                                     info,\n                                                                     &position))\n      {\n          printf(\"U has structural and/or numerical zero at U(%d,%d)\\n\",\n                 position,\n                 position);\n      }\n\n      // Solve Lz = x\n      rocsparse_dcsrsv_solve(handle,\n                             rocsparse_operation_none,\n                             m,\n                             nnz,\n                             &alpha,\n                             descr_L,\n                             csr_val,\n                             csr_row_ptr,\n                             csr_col_ind,\n                             info,\n                             x,\n                             z,\n                             rocsparse_solve_policy_auto,\n                             temp_buffer);\n\n      // Solve Uy = z\n      rocsparse_dcsrsv_solve(handle,\n                             rocsparse_operation_none,\n                             m,\n                             nnz,\n                             &alpha,\n                             descr_U,\n                             csr_val,\n                             csr_row_ptr,\n                             csr_col_ind,\n                             info,\n                             z,\n                             y,\n                             rocsparse_solve_policy_auto,\n                             temp_buffer);\n\n      // Clean up\n      hipFree(temp_buffer);\n      rocsparse_destroy_mat_info(info);\n      rocsparse_destroy_mat_descr(descr_M);\n      rocsparse_destroy_mat_descr(descr_L);\n      rocsparse_destroy_mat_descr(descr_U);\n      rocsparse_destroy_handle(handle);\n  \\endcode\n/\n/**@{"]
     pub fn rocsparse_scsrilu0(
         handle: rocsparse_handle,
         m: rocsparse_int,
@@ -7294,7 +9532,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup precond_module\n  \\brief Iterative Incomplete LU factorization with 0 fill-ins and no pivoting using CSR\n  storage format.\n\n  \\details\n  \\p rocsparse_csritilu0_compute computes iteratively the incomplete LU factorization with 0 fill-ins and no\n  pivoting of a sparse \\f$m \\times m\\f$ CSR matrix \\f$A\\f$, such that\n  \\f[\n    A \\approx LU\n  \\f]\n\n  \\p rocsparse_csritilu0 requires a user allocated temporary buffer. Its size is returned\n  by rocsparse_csritilu0_buffer_size(). Furthermore,\n  analysis meta data is required. It can be obtained by rocsparse_csritlu0_preprocess().\n\n  \\note\n  The sparse CSR matrix has to be sorted. This can be achieved by calling\n  rocsparse_csrsort().\n\n  \\note\n  This function is blocking with respect to the host.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  alg         algorithm to use, \\ref rocsparse_itilu0_alg\n  @param[in]\n  option      combination of enumeration values from \\ref rocsparse_itilu0_option.\n  @param[inout]\n  nmaxiter     maximum number of iterations.\n  @param[in]\n  tol tolerance to use for stopping criteria.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start\n              of every row of the sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[inout]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[out]\n  ilu0        incomplete factorization.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  buffer_size size of the storage buffer allocated by the user.\n  @param[in]\n  buffer      storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_value \\p alg or \\p base is invalid.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_row_ptr\n              or \\p csr_col_ind pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n\n/\n/**@{"]
+    #[doc = " \\ingroup precond_module\n  \\brief Iterative Incomplete LU factorization with 0 fill-ins and no pivoting using CSR\n  storage format.\n\n  \\details\n  \\p rocsparse_csritilu0_compute computes iteratively the incomplete LU factorization with 0 fill-ins and no\n  pivoting of a sparse \\f$m \\times m\\f$ CSR matrix \\f$A\\f$, such that\n  \\f[\n    A \\approx LU\n  \\f]\n\n  \\p rocsparse_csritilu0 requires a user allocated temporary buffer. Its size is returned\n  by rocsparse_csritilu0_buffer_size(). Furthermore,\n  analysis meta data is required. It can be obtained by rocsparse_csritlu0_preprocess().\n\n  \\note\n  The sparse CSR matrix has to be sorted. This can be achieved by calling\n  rocsparse_csrsort().\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  alg         algorithm to use, \\ref rocsparse_itilu0_alg\n  @param[in]\n  option      combination of enumeration values from \\ref rocsparse_itilu0_option.\n  @param[inout]\n  nmaxiter     maximum number of iterations.\n  @param[in]\n  tol tolerance to use for stopping criteria.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start\n              of every row of the sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[inout]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[out]\n  ilu0        incomplete factorization.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  buffer_size size of the storage buffer allocated by the user.\n  @param[in]\n  buffer      storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_value \\p alg or \\p base is invalid.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_row_ptr\n              or \\p csr_col_ind pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n\n/\n/**@{"]
     pub fn rocsparse_scsritilu0_compute(
         handle: rocsparse_handle,
         alg: rocsparse_itilu0_alg,
@@ -7414,6 +9652,144 @@ extern "C" {
         buffer: *mut ::std::os::raw::c_void,
     ) -> rocsparse_status;
 }
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup precond_module\n  \\brief Batched Pentadiagonal solver\n\n  \\details\n  \\p rocsparse_gpsv_interleaved_batch_buffer_size calculates the required buffer size\n  for rocsparse_gpsv_interleaved_batch(). It is the users responsibility to allocate\n  this buffer.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  alg          algorithm to solve the linear system.\n  @param[in]\n  m            size of the pentadiagonal linear system.\n  @param[in]\n  ds           lower diagonal (distance 2) of pentadiagonal system. First two entries\n               must be zero.\n  @param[in]\n  dl           lower diagonal of pentadiagonal system. First entry must be zero.\n  @param[in]\n  d            main diagonal of pentadiagonal system.\n  @param[in]\n  du           upper diagonal of pentadiagonal system. Last entry must be zero.\n  @param[in]\n  dw           upper diagonal (distance 2) of pentadiagonal system. Last two entries\n               must be zero.\n  @param[in]\n  x            Dense array of right-hand-sides with dimension \\p batch_stride by \\p m.\n  @param[in]\n  batch_count  The number of systems to solve.\n  @param[in]\n  batch_stride The number of elements that separate consecutive elements in a system.\n               Must satisfy \\p batch_stride >= batch_count.\n  @param[out]\n  buffer_size  Number of bytes of the temporary storage buffer required.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p alg, \\p batch_count or\n              \\p batch_stride is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p ds, \\p dl, \\p d, \\p du, \\p dw, \\p x\n              or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n/\n/**@{"]
+    pub fn rocsparse_sgpsv_interleaved_batch_buffer_size(
+        handle: rocsparse_handle,
+        alg: rocsparse_gpsv_interleaved_alg,
+        m: rocsparse_int,
+        ds: *const f32,
+        dl: *const f32,
+        d: *const f32,
+        du: *const f32,
+        dw: *const f32,
+        x: *const f32,
+        batch_count: rocsparse_int,
+        batch_stride: rocsparse_int,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dgpsv_interleaved_batch_buffer_size(
+        handle: rocsparse_handle,
+        alg: rocsparse_gpsv_interleaved_alg,
+        m: rocsparse_int,
+        ds: *const f64,
+        dl: *const f64,
+        d: *const f64,
+        du: *const f64,
+        dw: *const f64,
+        x: *const f64,
+        batch_count: rocsparse_int,
+        batch_stride: rocsparse_int,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cgpsv_interleaved_batch_buffer_size(
+        handle: rocsparse_handle,
+        alg: rocsparse_gpsv_interleaved_alg,
+        m: rocsparse_int,
+        ds: *const rocsparse_float_complex,
+        dl: *const rocsparse_float_complex,
+        d: *const rocsparse_float_complex,
+        du: *const rocsparse_float_complex,
+        dw: *const rocsparse_float_complex,
+        x: *const rocsparse_float_complex,
+        batch_count: rocsparse_int,
+        batch_stride: rocsparse_int,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zgpsv_interleaved_batch_buffer_size(
+        handle: rocsparse_handle,
+        alg: rocsparse_gpsv_interleaved_alg,
+        m: rocsparse_int,
+        ds: *const rocsparse_double_complex,
+        dl: *const rocsparse_double_complex,
+        d: *const rocsparse_double_complex,
+        du: *const rocsparse_double_complex,
+        dw: *const rocsparse_double_complex,
+        x: *const rocsparse_double_complex,
+        batch_count: rocsparse_int,
+        batch_stride: rocsparse_int,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup precond_module\n  \\brief Batched Pentadiagonal solver\n\n  \\details\n  \\p rocsparse_gpsv_interleaved_batch  solves a batch of pentadiagonal linear systems.\n  The coefficient matrix of each pentadiagonal linear system is defined by five vectors\n  for the lower part (ds, dl), main diagonal (d) and upper part (du, dw).\n\n  The function requires a temporary buffer. The size of the required buffer is returned\n  by rocsparse_gpsv_interleaved_batch_buffer_size().\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  The routine is numerically stable because it uses QR to solve the linear systems.\n\n  \\note\n  m need to be at least 3, to be a valid pentadiagonal matrix.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  alg          algorithm to solve the linear system.\n  @param[in]\n  m            size of the pentadiagonal linear system.\n  @param[inout]\n  ds           lower diagonal (distance 2) of pentadiagonal system. First two entries\n               must be zero.\n  @param[inout]\n  dl           lower diagonal of pentadiagonal system. First entry must be zero.\n  @param[inout]\n  d            main diagonal of pentadiagonal system.\n  @param[inout]\n  du           upper diagonal of pentadiagonal system. Last entry must be zero.\n  @param[inout]\n  dw           upper diagonal (distance 2) of pentadiagonal system. Last two entries\n               must be zero.\n  @param[inout]\n  x            Dense array of right-hand-sides with dimension \\p batch_stride by \\p m.\n  @param[in]\n  batch_count  The number of systems to solve.\n  @param[in]\n  batch_stride The number of elements that separate consecutive elements in a system.\n               Must satisfy \\p batch_stride >= batch_count.\n  @param[in]\n  temp_buffer  Temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p alg, \\p batch_count or\n              \\p batch_stride is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p ds, \\p dl, \\p d, \\p du, \\p dw, \\p x\n              or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n/\n/**@{"]
+    pub fn rocsparse_sgpsv_interleaved_batch(
+        handle: rocsparse_handle,
+        alg: rocsparse_gpsv_interleaved_alg,
+        m: rocsparse_int,
+        ds: *mut f32,
+        dl: *mut f32,
+        d: *mut f32,
+        du: *mut f32,
+        dw: *mut f32,
+        x: *mut f32,
+        batch_count: rocsparse_int,
+        batch_stride: rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dgpsv_interleaved_batch(
+        handle: rocsparse_handle,
+        alg: rocsparse_gpsv_interleaved_alg,
+        m: rocsparse_int,
+        ds: *mut f64,
+        dl: *mut f64,
+        d: *mut f64,
+        du: *mut f64,
+        dw: *mut f64,
+        x: *mut f64,
+        batch_count: rocsparse_int,
+        batch_stride: rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_cgpsv_interleaved_batch(
+        handle: rocsparse_handle,
+        alg: rocsparse_gpsv_interleaved_alg,
+        m: rocsparse_int,
+        ds: *mut rocsparse_float_complex,
+        dl: *mut rocsparse_float_complex,
+        d: *mut rocsparse_float_complex,
+        du: *mut rocsparse_float_complex,
+        dw: *mut rocsparse_float_complex,
+        x: *mut rocsparse_float_complex,
+        batch_count: rocsparse_int,
+        batch_stride: rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zgpsv_interleaved_batch(
+        handle: rocsparse_handle,
+        alg: rocsparse_gpsv_interleaved_alg,
+        m: rocsparse_int,
+        ds: *mut rocsparse_double_complex,
+        dl: *mut rocsparse_double_complex,
+        d: *mut rocsparse_double_complex,
+        du: *mut rocsparse_double_complex,
+        dw: *mut rocsparse_double_complex,
+        x: *mut rocsparse_double_complex,
+        batch_count: rocsparse_int,
+        batch_stride: rocsparse_int,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
 extern "C" {
     #[must_use]
     #[doc = " \\ingroup precond_module\n  \\brief Tridiagonal solver with pivoting\n\n  \\details\n  \\p rocsparse_gtsv_buffer_size returns the size of the temporary storage buffer\n  that is required by rocsparse_sgtsv(), rocsparse_dgtsv(),\n  rocsparse_cgtsv() and rocsparse_zgtsv(). The temporary storage buffer\n  must be allocated by the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           size of the tri-diagonal linear system (must be >= 2).\n  @param[in]\n  n           number of columns in the dense matrix B.\n  @param[in]\n  dl          lower diagonal of tri-diagonal system. First entry must be zero.\n  @param[in]\n  d           main diagonal of tri-diagonal system.\n  @param[in]\n  du          upper diagonal of tri-diagonal system. Last entry must be zero.\n  @param[in]\n  B           Dense matrix of size ( \\p ldb, \\p n ).\n  @param[in]\n  ldb         Leading dimension of B. Must satisfy \\p ldb >= max(1, m).\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_sgtsv(), rocsparse_dgtsv(), rocsparse_cgtsv()\n              and rocsparse_zgtsv().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p ldb is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p dl, \\p d, \\p du,\n              \\p B or \\p buffer_size pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n/\n/**@{"]
@@ -7473,7 +9849,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup precond_module\n  \\brief Tridiagonal solver with pivoting\n\n  \\details\n  \\p rocsparse_gtsv solves a tridiagonal system for multiple right hand sides using pivoting.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           size of the tri-diagonal linear system (must be >= 2).\n  @param[in]\n  n           number of columns in the dense matrix B.\n  @param[in]\n  dl          lower diagonal of tri-diagonal system. First entry must be zero.\n  @param[in]\n  d           main diagonal of tri-diagonal system.\n  @param[in]\n  du          upper diagonal of tri-diagonal system. Last entry must be zero.\n  @param[inout]\n  B           Dense matrix of size ( \\p ldb, \\p n ).\n  @param[in]\n  ldb         Leading dimension of B. Must satisfy \\p ldb >= max(1, m).\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p ldb is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p dl, \\p d,\n              \\p du, \\p B or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n/\n/**@{"]
+    #[doc = " \\ingroup precond_module\n  \\brief Tridiagonal solver with pivoting\n\n  \\details\n  \\p rocsparse_gtsv solves a tridiagonal system for multiple right hand sides using pivoting.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           size of the tri-diagonal linear system (must be >= 2).\n  @param[in]\n  n           number of columns in the dense matrix B.\n  @param[in]\n  dl          lower diagonal of tri-diagonal system. First entry must be zero.\n  @param[in]\n  d           main diagonal of tri-diagonal system.\n  @param[in]\n  du          upper diagonal of tri-diagonal system. Last entry must be zero.\n  @param[inout]\n  B           Dense matrix of size ( \\p ldb, \\p n ).\n  @param[in]\n  ldb         Leading dimension of B. Must satisfy \\p ldb >= max(1, m).\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p ldb is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p dl, \\p d,\n              \\p du, \\p B or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n/\n/**@{"]
     pub fn rocsparse_sgtsv(
         handle: rocsparse_handle,
         m: rocsparse_int,
@@ -7587,7 +9963,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup precond_module\n  \\brief Tridiagonal solver (no pivoting)\n\n  \\details\n  \\p rocsparse_gtsv_no_pivot  solves a tridiagonal linear system for multiple right-hand sides\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           size of the tri-diagonal linear system (must be >= 2).\n  @param[in]\n  n           number of columns in the dense matrix B.\n  @param[in]\n  dl          lower diagonal of tri-diagonal system. First entry must be zero.\n  @param[in]\n  d           main diagonal of tri-diagonal system.\n  @param[in]\n  du          upper diagonal of tri-diagonal system. Last entry must be zero.\n  @param[inout]\n  B           Dense matrix of size ( \\p ldb, \\p n ).\n  @param[in]\n  ldb         Leading dimension of B. Must satisfy \\p ldb >= max(1, m).\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p ldb is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p dl, \\p d,\n              \\p du, \\p B or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n/\n/**@{"]
+    #[doc = " \\ingroup precond_module\n  \\brief Tridiagonal solver (no pivoting)\n\n  \\details\n  \\p rocsparse_gtsv_no_pivot  solves a tridiagonal linear system for multiple right-hand sides\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           size of the tri-diagonal linear system (must be >= 2).\n  @param[in]\n  n           number of columns in the dense matrix B.\n  @param[in]\n  dl          lower diagonal of tri-diagonal system. First entry must be zero.\n  @param[in]\n  d           main diagonal of tri-diagonal system.\n  @param[in]\n  du          upper diagonal of tri-diagonal system. Last entry must be zero.\n  @param[inout]\n  B           Dense matrix of size ( \\p ldb, \\p n ).\n  @param[in]\n  ldb         Leading dimension of B. Must satisfy \\p ldb >= max(1, m).\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p ldb is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p dl, \\p d,\n              \\p du, \\p B or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n/\n/**@{"]
     pub fn rocsparse_sgtsv_no_pivot(
         handle: rocsparse_handle,
         m: rocsparse_int,
@@ -7701,7 +10077,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup precond_module\n  \\brief Strided Batch tridiagonal solver (no pivoting)\n\n  \\details\n  \\p rocsparse_gtsv_no_pivot_strided_batch  solves a batched tridiagonal linear system\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           size of the tri-diagonal linear system (must be >= 2).\n  @param[in]\n  dl          lower diagonal of tri-diagonal system. First entry must be zero.\n  @param[in]\n  d           main diagonal of tri-diagonal system.\n  @param[in]\n  du          upper diagonal of tri-diagonal system. Last entry must be zero.\n  @param[inout]\n  x           Dense array of righthand-sides where the ith righthand-side starts at \\p x+batch_stride*i.\n  @param[in]\n  batch_count The number of systems to solve.\n  @param[in]\n  batch_stride The number of elements that separate each system. Must satisfy \\p batch_stride >= m.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p batch_count or \\p batch_stride is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p dl, \\p d,\n              \\p du, \\p x or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n/\n/**@{"]
+    #[doc = " \\ingroup precond_module\n  \\brief Strided Batch tridiagonal solver (no pivoting)\n\n  \\details\n  \\p rocsparse_gtsv_no_pivot_strided_batch  solves a batched tridiagonal linear system\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           size of the tri-diagonal linear system (must be >= 2).\n  @param[in]\n  dl          lower diagonal of tri-diagonal system. First entry must be zero.\n  @param[in]\n  d           main diagonal of tri-diagonal system.\n  @param[in]\n  du          upper diagonal of tri-diagonal system. Last entry must be zero.\n  @param[inout]\n  x           Dense array of righthand-sides where the ith righthand-side starts at \\p x+batch_stride*i.\n  @param[in]\n  batch_count The number of systems to solve.\n  @param[in]\n  batch_stride The number of elements that separate each system. Must satisfy \\p batch_stride >= m.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p batch_count or \\p batch_stride is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p dl, \\p d,\n              \\p du, \\p x or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n/\n/**@{"]
     pub fn rocsparse_sgtsv_no_pivot_strided_batch(
         handle: rocsparse_handle,
         m: rocsparse_int,
@@ -7819,7 +10195,7 @@ extern "C" {
 }
 extern "C" {
     #[must_use]
-    #[doc = " \\ingroup precond_module\n  \\brief Interleaved Batch tridiagonal solver\n\n  \\details\n  \\p rocsparse_gtsv_interleaved_batch  solves a batched tridiagonal linear system. The routine requires a temporary storage\n  buffer that must be allocated by the user. The size of this buffer can be determined by first calling\n  \\p rocsparse_gtsv_interleaved_batch_buffer_size. The user can specify different algorithms for \\p rocsparse_gtsv_interleaved_batch\n  to use. Options are thomas ( \\p rocsparse_gtsv_interleaved_thomas ), LU ( \\p rocsparse_gtsv_interleaved_lu ),\n  or QR ( \\p rocsparse_gtsv_interleaved_qr ). Passing \\p rocsparse_gtsv_interleaved_default defaults the algorithm to use QR.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  alg         Algorithm to use when solving tridiagonal systems. Options are thomas ( \\p rocsparse_gtsv_interleaved_thomas ),\n              LU ( \\p rocsparse_gtsv_interleaved_lu ), or QR ( \\p rocsparse_gtsv_interleaved_qr ). Passing\n              \\p rocsparse_gtsv_interleaved_default defaults the algorithm to use QR. Thomas algorithm is the fastest but is not\n              stable while LU and QR are slower but are stable.\n  @param[in]\n  m           size of the tri-diagonal linear system.\n  @param[inout]\n  dl          lower diagonal of tri-diagonal system. The first element of the lower diagonal must be zero.\n  @param[inout]\n  d           main diagonal of tri-diagonal system.\n  @param[inout]\n  du          upper diagonal of tri-diagonal system. The last element of the upper diagonal must be zero.\n  @param[inout]\n  x           Dense array of righthand-sides with dimension \\p batch_stride by \\p m.\n  @param[in]\n  batch_count The number of systems to solve.\n  @param[in]\n  batch_stride The number of elements that separate consecutive elements in a system. Must satisfy \\p batch_stride >= batch_count.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p batch_count or \\p batch_stride is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p dl, \\p d,\n              \\p du, \\p x or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n/\n/**@{"]
+    #[doc = " \\ingroup precond_module\n  \\brief Interleaved Batch tridiagonal solver\n\n  \\details\n  \\p rocsparse_gtsv_interleaved_batch  solves a batched tridiagonal linear system. The routine requires a temporary storage\n  buffer that must be allocated by the user. The size of this buffer can be determined by first calling\n  \\p rocsparse_gtsv_interleaved_batch_buffer_size. The user can specify different algorithms for \\p rocsparse_gtsv_interleaved_batch\n  to use. Options are thomas ( \\p rocsparse_gtsv_interleaved_thomas ), LU ( \\p rocsparse_gtsv_interleaved_lu ),\n  or QR ( \\p rocsparse_gtsv_interleaved_qr ). Passing \\p rocsparse_gtsv_interleaved_default defaults the algorithm to use QR.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  alg         Algorithm to use when solving tridiagonal systems. Options are thomas ( \\p rocsparse_gtsv_interleaved_thomas ),\n              LU ( \\p rocsparse_gtsv_interleaved_lu ), or QR ( \\p rocsparse_gtsv_interleaved_qr ). Passing\n              \\p rocsparse_gtsv_interleaved_default defaults the algorithm to use QR. Thomas algorithm is the fastest but is not\n              stable while LU and QR are slower but are stable.\n  @param[in]\n  m           size of the tri-diagonal linear system.\n  @param[inout]\n  dl          lower diagonal of tri-diagonal system. The first element of the lower diagonal must be zero.\n  @param[inout]\n  d           main diagonal of tri-diagonal system.\n  @param[inout]\n  du          upper diagonal of tri-diagonal system. The last element of the upper diagonal must be zero.\n  @param[inout]\n  x           Dense array of righthand-sides with dimension \\p batch_stride by \\p m.\n  @param[in]\n  batch_count The number of systems to solve.\n  @param[in]\n  batch_stride The number of elements that separate consecutive elements in a system. Must satisfy \\p batch_stride >= batch_count.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p batch_count or \\p batch_stride is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p dl, \\p d,\n              \\p du, \\p x or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n/\n/**@{"]
     pub fn rocsparse_sgtsv_interleaved_batch(
         handle: rocsparse_handle,
         alg: rocsparse_gtsv_interleaved_alg,
@@ -7878,2688 +10254,6 @@ extern "C" {
         temp_buffer: *mut ::std::os::raw::c_void,
     ) -> rocsparse_status;
 }
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup precond_module\n  \\brief Batched Pentadiagonal solver\n\n  \\details\n  \\p rocsparse_gpsv_interleaved_batch_buffer_size calculates the required buffer size\n  for rocsparse_gpsv_interleaved_batch(). It is the users responsibility to allocate\n  this buffer.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  alg          algorithm to solve the linear system.\n  @param[in]\n  m            size of the pentadiagonal linear system.\n  @param[in]\n  ds           lower diagonal (distance 2) of pentadiagonal system. First two entries\n               must be zero.\n  @param[in]\n  dl           lower diagonal of pentadiagonal system. First entry must be zero.\n  @param[in]\n  d            main diagonal of pentadiagonal system.\n  @param[in]\n  du           upper diagonal of pentadiagonal system. Last entry must be zero.\n  @param[in]\n  dw           upper diagonal (distance 2) of pentadiagonal system. Last two entries\n               must be zero.\n  @param[in]\n  x            Dense array of right-hand-sides with dimension \\p batch_stride by \\p m.\n  @param[in]\n  batch_count  The number of systems to solve.\n  @param[in]\n  batch_stride The number of elements that separate consecutive elements in a system.\n               Must satisfy \\p batch_stride >= batch_count.\n  @param[out]\n  buffer_size  Number of bytes of the temporary storage buffer required.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p alg, \\p batch_count or\n              \\p batch_stride is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p ds, \\p dl, \\p d, \\p du, \\p dw, \\p x\n              or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n/\n/**@{"]
-    pub fn rocsparse_sgpsv_interleaved_batch_buffer_size(
-        handle: rocsparse_handle,
-        alg: rocsparse_gpsv_interleaved_alg,
-        m: rocsparse_int,
-        ds: *const f32,
-        dl: *const f32,
-        d: *const f32,
-        du: *const f32,
-        dw: *const f32,
-        x: *const f32,
-        batch_count: rocsparse_int,
-        batch_stride: rocsparse_int,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dgpsv_interleaved_batch_buffer_size(
-        handle: rocsparse_handle,
-        alg: rocsparse_gpsv_interleaved_alg,
-        m: rocsparse_int,
-        ds: *const f64,
-        dl: *const f64,
-        d: *const f64,
-        du: *const f64,
-        dw: *const f64,
-        x: *const f64,
-        batch_count: rocsparse_int,
-        batch_stride: rocsparse_int,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cgpsv_interleaved_batch_buffer_size(
-        handle: rocsparse_handle,
-        alg: rocsparse_gpsv_interleaved_alg,
-        m: rocsparse_int,
-        ds: *const rocsparse_float_complex,
-        dl: *const rocsparse_float_complex,
-        d: *const rocsparse_float_complex,
-        du: *const rocsparse_float_complex,
-        dw: *const rocsparse_float_complex,
-        x: *const rocsparse_float_complex,
-        batch_count: rocsparse_int,
-        batch_stride: rocsparse_int,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zgpsv_interleaved_batch_buffer_size(
-        handle: rocsparse_handle,
-        alg: rocsparse_gpsv_interleaved_alg,
-        m: rocsparse_int,
-        ds: *const rocsparse_double_complex,
-        dl: *const rocsparse_double_complex,
-        d: *const rocsparse_double_complex,
-        du: *const rocsparse_double_complex,
-        dw: *const rocsparse_double_complex,
-        x: *const rocsparse_double_complex,
-        batch_count: rocsparse_int,
-        batch_stride: rocsparse_int,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup precond_module\n  \\brief Batched Pentadiagonal solver\n\n  \\details\n  \\p rocsparse_gpsv_interleaved_batch  solves a batch of pentadiagonal linear systems.\n  The coefficient matrix of each pentadiagonal linear system is defined by five vectors\n  for the lower part (ds, dl), main diagonal (d) and upper part (du, dw).\n\n  The function requires a temporary buffer. The size of the required buffer is returned\n  by rocsparse_gpsv_interleaved_batch_buffer_size().\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  The routine is numerically stable because it uses QR to solve the linear systems.\n\n  \\note\n  m need to be at least 3, to be a valid pentadiagonal matrix.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  alg          algorithm to solve the linear system.\n  @param[in]\n  m            size of the pentadiagonal linear system.\n  @param[inout]\n  ds           lower diagonal (distance 2) of pentadiagonal system. First two entries\n               must be zero.\n  @param[inout]\n  dl           lower diagonal of pentadiagonal system. First entry must be zero.\n  @param[inout]\n  d            main diagonal of pentadiagonal system.\n  @param[inout]\n  du           upper diagonal of pentadiagonal system. Last entry must be zero.\n  @param[inout]\n  dw           upper diagonal (distance 2) of pentadiagonal system. Last two entries\n               must be zero.\n  @param[inout]\n  x            Dense array of right-hand-sides with dimension \\p batch_stride by \\p m.\n  @param[in]\n  batch_count  The number of systems to solve.\n  @param[in]\n  batch_stride The number of elements that separate consecutive elements in a system.\n               Must satisfy \\p batch_stride >= batch_count.\n  @param[in]\n  temp_buffer  Temporary storage buffer allocated by the user.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p alg, \\p batch_count or\n              \\p batch_stride is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p ds, \\p dl, \\p d, \\p du, \\p dw, \\p x\n              or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n/\n/**@{"]
-    pub fn rocsparse_sgpsv_interleaved_batch(
-        handle: rocsparse_handle,
-        alg: rocsparse_gpsv_interleaved_alg,
-        m: rocsparse_int,
-        ds: *mut f32,
-        dl: *mut f32,
-        d: *mut f32,
-        du: *mut f32,
-        dw: *mut f32,
-        x: *mut f32,
-        batch_count: rocsparse_int,
-        batch_stride: rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dgpsv_interleaved_batch(
-        handle: rocsparse_handle,
-        alg: rocsparse_gpsv_interleaved_alg,
-        m: rocsparse_int,
-        ds: *mut f64,
-        dl: *mut f64,
-        d: *mut f64,
-        du: *mut f64,
-        dw: *mut f64,
-        x: *mut f64,
-        batch_count: rocsparse_int,
-        batch_stride: rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cgpsv_interleaved_batch(
-        handle: rocsparse_handle,
-        alg: rocsparse_gpsv_interleaved_alg,
-        m: rocsparse_int,
-        ds: *mut rocsparse_float_complex,
-        dl: *mut rocsparse_float_complex,
-        d: *mut rocsparse_float_complex,
-        du: *mut rocsparse_float_complex,
-        dw: *mut rocsparse_float_complex,
-        x: *mut rocsparse_float_complex,
-        batch_count: rocsparse_int,
-        batch_stride: rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zgpsv_interleaved_batch(
-        handle: rocsparse_handle,
-        alg: rocsparse_gpsv_interleaved_alg,
-        m: rocsparse_int,
-        ds: *mut rocsparse_double_complex,
-        dl: *mut rocsparse_double_complex,
-        d: *mut rocsparse_double_complex,
-        du: *mut rocsparse_double_complex,
-        dw: *mut rocsparse_double_complex,
-        x: *mut rocsparse_double_complex,
-        batch_count: rocsparse_int,
-        batch_stride: rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief\n  This function computes the number of nonzero elements per row or column and the total number of nonzero elements in a dense matrix.\n  \\details\n  The routine does support asynchronous execution if the pointer mode is set to device.\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  dir        direction that specified whether to count nonzero elements by \\ref rocsparse_direction_row or by \\ref rocsparse_direction_row.\n\n  @param[in]\n  m           number of rows of the dense matrix \\p A.\n\n  @param[in]\n  n           number of columns of the dense matrix \\p A.\n\n  @param[in]\n  descr      the descriptor of the dense matrix \\p A.\n\n  @param[in]\n  A           array of dimensions (\\p ld, \\p n)\n\n  @param[in]\n  ld         leading dimension of dense array \\p A.\n\n  @param[out]\n  nnz_per_row_columns\n              array of size \\p m or \\p n containing the number of nonzero elements per row or column, respectively.\n  @param[out]\n  nnz_total_dev_host_ptr\n              total number of nonzero elements in device or host memory.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p ld is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p A or \\p nnz_per_row_columns or \\p nnz_total_dev_host_ptr\n              pointer is invalid.\n/\n/**@{"]
-    pub fn rocsparse_snnz(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        A: *const f32,
-        ld: rocsparse_int,
-        nnz_per_row_columns: *mut rocsparse_int,
-        nnz_total_dev_host_ptr: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dnnz(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        A: *const f64,
-        ld: rocsparse_int,
-        nnz_per_row_columns: *mut rocsparse_int,
-        nnz_total_dev_host_ptr: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cnnz(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        A: *const rocsparse_float_complex,
-        ld: rocsparse_int,
-        nnz_per_row_columns: *mut rocsparse_int,
-        nnz_total_dev_host_ptr: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_znnz(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        A: *const rocsparse_double_complex,
-        ld: rocsparse_int,
-        nnz_per_row_columns: *mut rocsparse_int,
-        nnz_total_dev_host_ptr: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief\n  This function converts the matrix A in dense format into a sparse matrix in CSR format.\n  All the parameters are assumed to have been pre-allocated by the user and the arrays are filled in based on nnz_per_row, which can be pre-computed with rocsparse_xnnz().\n\n  \\note\n  This function is blocking with respect to the host.\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  m           number of rows of the dense matrix \\p A.\n\n  @param[in]\n  n           number of columns of the dense matrix \\p A.\n\n  @param[in]\n  descr      the descriptor of the dense matrix \\p A, the supported matrix type is rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[in]\n  A           array of dimensions (\\p ld, \\p n)\n\n  @param[in]\n  ld         leading dimension of dense array \\p A.\n\n  @param[in]\n  nnz_per_rows   array of size \\p n containing the number of non-zero elements per row.\n\n  @param[out]\n  csr_val\n              array of nnz ( = \\p csr_row_ptr[m] - \\p csr_row_ptr[0] ) nonzero elements of matrix \\p A.\n  @param[out]\n  csr_row_ptr\n              integer array of m+1 elements that contains the start of every row and the end of the last row plus one.\n  @param[out]\n  csr_col_ind\n              integer array of nnz ( = \\p csr_row_ptr[m] - csr_row_ptr[0] ) column indices of the non-zero elements of matrix \\p A.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p ld is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p A or \\p nnz_per_rows or \\p csr_val \\p csr_row_ptr or \\p csr_col_ind\n              pointer is invalid.\n/\n/**@{"]
-    pub fn rocsparse_sdense2csr(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        A: *const f32,
-        ld: rocsparse_int,
-        nnz_per_rows: *const rocsparse_int,
-        csr_val: *mut f32,
-        csr_row_ptr: *mut rocsparse_int,
-        csr_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ddense2csr(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        A: *const f64,
-        ld: rocsparse_int,
-        nnz_per_rows: *const rocsparse_int,
-        csr_val: *mut f64,
-        csr_row_ptr: *mut rocsparse_int,
-        csr_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cdense2csr(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        A: *const rocsparse_float_complex,
-        ld: rocsparse_int,
-        nnz_per_rows: *const rocsparse_int,
-        csr_val: *mut rocsparse_float_complex,
-        csr_row_ptr: *mut rocsparse_int,
-        csr_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zdense2csr(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        A: *const rocsparse_double_complex,
-        ld: rocsparse_int,
-        nnz_per_rows: *const rocsparse_int,
-        csr_val: *mut rocsparse_double_complex,
-        csr_row_ptr: *mut rocsparse_int,
-        csr_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief\n  This function computes the the size of the user allocated temporary storage buffer used when converting and pruning\n  a dense matrix to a CSR matrix.\n\n  \\details\n  \\p rocsparse_prune_dense2csr_buffer_size returns the size of the temporary storage buffer\n  that is required by rocsparse_sprune_dense2csr_nnz(), rocsparse_dprune_dense2csr_nnz(),\n  rocsparse_sprune_dense2csr(), and rocsparse_dprune_dense2csr(). The temporary\n  storage buffer must be allocated by the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  m           number of rows of the dense matrix \\p A.\n\n  @param[in]\n  n           number of columns of the dense matrix \\p A.\n\n  @param[in]\n  A           array of dimensions (\\p lda, \\p n)\n\n  @param[in]\n  lda         leading dimension of dense array \\p A.\n\n  @param[in]\n  threshold   pointer to the pruning non-negative threshold which can exist in either host or device memory.\n\n  @param[in]\n  descr      the descriptor of the dense matrix \\p A, the supported matrix type is rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[in]\n  csr_val\n              array of nnz ( = \\p csr_row_ptr[m] - \\p csr_row_ptr[0] ) nonzero elements of matrix \\p A.\n  @param[in]\n  csr_row_ptr\n              integer array of \\p m+1 elements that contains the start of every row and the end of the last row plus one.\n  @param[in]\n  csr_col_ind\n              integer array of nnz ( = \\p csr_row_ptr[m] - csr_row_ptr[0] ) column indices of the non-zero elements of matrix \\p A.\n\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_sprune_dense2csr_nnz(), rocsparse_dprune_dense2csr_nnz(),\n              rocsparse_sprune_dense2csr() and rocsparse_dprune_dense2csr().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p buffer_size pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n/\n/**@{"]
-    pub fn rocsparse_sprune_dense2csr_buffer_size(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        A: *const f32,
-        lda: rocsparse_int,
-        threshold: *const f32,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f32,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dprune_dense2csr_buffer_size(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        A: *const f64,
-        lda: rocsparse_int,
-        threshold: *const f64,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f64,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief\n  This function computes the number of nonzero elements per row and the total number of nonzero elements in a dense matrix once\n  elements less than the threshold are pruned from the matrix.\n\n  \\details\n  The routine does support asynchronous execution if the pointer mode is set to device.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  m           number of rows of the dense matrix \\p A.\n\n  @param[in]\n  n           number of columns of the dense matrix \\p A.\n\n  @param[in]\n  A           array of dimensions (\\p lda, \\p n)\n\n  @param[in]\n  lda         leading dimension of dense array \\p A.\n\n  @param[in]\n  threshold   pointer to the pruning non-negative threshold which can exist in either host or device memory.\n\n  @param[in]\n  descr      the descriptor of the dense matrix \\p A.\n\n  @param[out]\n  csr_row_ptr\n              integer array of \\p m+1 elements that contains the start of every row and the end of the last row plus one.\n  @param[out]\n  nnz_total_dev_host_ptr\n              total number of nonzero elements in device or host memory.\n\n  @param[out]\n  temp_buffer\n              buffer allocated by the user whose size is determined by calling rocsparse_xprune_dense2csr_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p lda is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p A or \\p threshold or \\p descr or \\p csr_row_ptr\n              or \\p nnz_total_dev_host_ptr or \\p temp_buffer pointer is invalid.\n/\n/**@{"]
-    pub fn rocsparse_sprune_dense2csr_nnz(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        A: *const f32,
-        lda: rocsparse_int,
-        threshold: *const f32,
-        descr: rocsparse_mat_descr,
-        csr_row_ptr: *mut rocsparse_int,
-        nnz_total_dev_host_ptr: *mut rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dprune_dense2csr_nnz(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        A: *const f64,
-        lda: rocsparse_int,
-        threshold: *const f64,
-        descr: rocsparse_mat_descr,
-        csr_row_ptr: *mut rocsparse_int,
-        nnz_total_dev_host_ptr: *mut rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief\n  This function converts the matrix A in dense format into a sparse matrix in CSR format while pruning values\n  that are less than the threshold. All the parameters are assumed to have been pre-allocated by the user.\n\n  \\details\n  The user first allocates \\p csr_row_ptr to have \\p m+1 elements and then calls rocsparse_xprune_dense2csr_nnz()\n  which fills in the \\p csr_row_ptr array and stores the number of elements that are larger than the pruning threshold\n  in \\p nnz_total_dev_host_ptr. The user then allocates \\p csr_col_ind and \\p csr_val to have size \\p nnz_total_dev_host_ptr\n  and completes the conversion by calling rocsparse_xprune_dense2csr(). A temporary storage buffer is used by both\n  rocsparse_xprune_dense2csr_nnz() and rocsparse_xprune_dense2csr() and must be allocated by the user and whose size is determined\n  by rocsparse_xprune_dense2csr_buffer_size().\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  m           number of rows of the dense matrix \\p A.\n\n  @param[in]\n  n           number of columns of the dense matrix \\p A.\n\n  @param[in]\n  A           array of dimensions (\\p lda, \\p n)\n\n  @param[in]\n  lda         leading dimension of dense array \\p A.\n\n  @param[in]\n  threshold   pointer to the non-negative pruning threshold which can exist in either host or device memory.\n\n  @param[in]\n  descr      the descriptor of the dense matrix \\p A, the supported matrix type is rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[out]\n  csr_val\n              array of nnz ( = \\p csr_row_ptr[m] - \\p csr_row_ptr[0] ) nonzero elements of matrix \\p A.\n  @param[in]\n  csr_row_ptr\n              integer array of \\p m+1 elements that contains the start of every row and the end of the last row plus one.\n  @param[out]\n  csr_col_ind\n              integer array of nnz ( = \\p csr_row_ptr[m] - csr_row_ptr[0] ) column indices of the non-zero elements of matrix \\p A.\n\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user, size is returned by\n              rocsparse_xprune_dense2csr_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p lda is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p A or \\p descr or \\p threshold or \\p csr_val\n              or \\p csr_row_ptr or \\p csr_col_ind or \\p temp_buffer pointer is invalid.\n/\n/**@{"]
-    pub fn rocsparse_sprune_dense2csr(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        A: *const f32,
-        lda: rocsparse_int,
-        threshold: *const f32,
-        descr: rocsparse_mat_descr,
-        csr_val: *mut f32,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *mut rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dprune_dense2csr(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        A: *const f64,
-        lda: rocsparse_int,
-        threshold: *const f64,
-        descr: rocsparse_mat_descr,
-        csr_val: *mut f64,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *mut rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief\n  This function computes the size of the user allocated temporary storage buffer used when converting and pruning by percentage a\n  dense matrix to a CSR matrix.\n\n  \\details\n  When converting and pruning a dense matrix A to a CSR matrix by percentage the following steps are performed. First the user\n  calls \\p rocsparse_prune_dense2csr_by_percentage_buffer_size which determines the size of the temporary storage buffer. Once\n  determined, this buffer must be allocated by the user. Next the user allocates the csr_row_ptr array to have \\p m+1 elements\n  and calls \\p rocsparse_prune_dense2csr_nnz_by_percentage. Finally the user finishes the conversion by allocating the csr_col_ind\n  and csr_val arrays (whos size is determined by the value at nnz_total_dev_host_ptr) and calling \\p rocsparse_prune_dense2csr_by_percentage.\n\n  The pruning by percentage works by first sorting the absolute values of the dense matrix \\p A. We then determine a position in this\n  sorted array by\n  \\f[\n    pos = ceil(m*n*(percentage/100)) - 1\n    pos = min(pos, m*n-1)\n    pos = max(pos, 0)\n    threshold = sorted_A[pos]\n  \\f]\n  Once we have this threshold we prune values in the dense matrix \\p A as in \\p rocsparse_prune_dense2csr.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  m           number of rows of the dense matrix \\p A.\n\n  @param[in]\n  n           number of columns of the dense matrix \\p A.\n\n  @param[in]\n  A           array of dimensions (\\p lda, \\p n)\n\n  @param[in]\n  lda         leading dimension of dense array \\p A.\n\n  @param[in]\n  percentage  percentage >= 0 and percentage <= 100.\n\n  @param[in]\n  descr      the descriptor of the dense matrix \\p A, the supported matrix type is rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[in]\n  csr_val    array of nnz ( = \\p csr_row_ptr[m] - \\p csr_row_ptr[0] ) nonzero elements of matrix \\p A.\n\n  @param[in]\n  csr_row_ptr integer array of \\p m+1 elements that contains the start of every row and the end of the last row plus one.\n\n  @param[in]\n  csr_col_ind integer array of nnz ( = \\p csr_row_ptr[m] - csr_row_ptr[0] ) column indices of the non-zero elements of matrix \\p A.\n\n  @param[in]\n  info prune information structure\n\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_sprune_dense2csr_nnz_by_percentage(), rocsparse_dprune_dense2csr_nnz_by_percentage(),\n              rocsparse_sprune_dense2csr_by_percentage() and rocsparse_dprune_dense2csr_by_percentage().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p buffer_size pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n/\n/**@{"]
-    pub fn rocsparse_sprune_dense2csr_by_percentage_buffer_size(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        A: *const f32,
-        lda: rocsparse_int,
-        percentage: f32,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f32,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dprune_dense2csr_by_percentage_buffer_size(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        A: *const f64,
-        lda: rocsparse_int,
-        percentage: f64,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f64,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief\n  This function computes the number of nonzero elements per row and the total number of nonzero elements in a dense matrix\n  when converting and pruning by percentage a dense matrix to a CSR matrix.\n\n  \\details\n  When converting and pruning a dense matrix A to a CSR matrix by percentage the following steps are performed. First the user\n  calls \\p rocsparse_prune_dense2csr_by_percentage_buffer_size which determines the size of the temporary storage buffer. Once\n  determined, this buffer must be allocated by the user. Next the user allocates the csr_row_ptr array to have \\p m+1 elements\n  and calls \\p rocsparse_prune_dense2csr_nnz_by_percentage. Finally the user finishes the conversion by allocating the csr_col_ind\n  and csr_val arrays (whos size is determined by the value at nnz_total_dev_host_ptr) and calling \\p rocsparse_prune_dense2csr_by_percentage.\n\n  The pruning by percentage works by first sorting the absolute values of the dense matrix \\p A. We then determine a position in this\n  sorted array by\n  \\f[\n    pos = ceil(m*n*(percentage/100)) - 1\n    pos = min(pos, m*n-1)\n    pos = max(pos, 0)\n    threshold = sorted_A[pos]\n  \\f]\n  Once we have this threshold we prune values in the dense matrix \\p A as in \\p rocsparse_prune_dense2csr.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  m           number of rows of the dense matrix \\p A.\n\n  @param[in]\n  n           number of columns of the dense matrix \\p A.\n\n  @param[in]\n  A           array of dimensions (\\p lda, \\p n)\n\n  @param[in]\n  lda         leading dimension of dense array \\p A.\n\n  @param[in]\n  percentage  percentage >= 0 and percentage <= 100.\n\n  @param[in]\n  descr       the descriptor of the dense matrix \\p A.\n\n  @param[out]\n  csr_row_ptr integer array of \\p m+1 elements that contains the start of every row and the end of the last row plus one.\n\n  @param[out]\n  nnz_total_dev_host_ptr total number of nonzero elements in device or host memory.\n\n  @param[in]\n  info prune information structure\n\n  @param[out]\n  temp_buffer buffer allocated by the user whose size is determined by calling rocsparse_xprune_dense2csr_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p lda or \\p percentage is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p A or \\p descr or \\p info or \\p csr_row_ptr\n              or \\p nnz_total_dev_host_ptr or \\p temp_buffer pointer is invalid.\n/\n/**@{"]
-    pub fn rocsparse_sprune_dense2csr_nnz_by_percentage(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        A: *const f32,
-        lda: rocsparse_int,
-        percentage: f32,
-        descr: rocsparse_mat_descr,
-        csr_row_ptr: *mut rocsparse_int,
-        nnz_total_dev_host_ptr: *mut rocsparse_int,
-        info: rocsparse_mat_info,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dprune_dense2csr_nnz_by_percentage(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        A: *const f64,
-        lda: rocsparse_int,
-        percentage: f64,
-        descr: rocsparse_mat_descr,
-        csr_row_ptr: *mut rocsparse_int,
-        nnz_total_dev_host_ptr: *mut rocsparse_int,
-        info: rocsparse_mat_info,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief\n  This function converts the matrix A in dense format into a sparse matrix in CSR format while pruning values\n  based on percentage.\n\n  \\details\n  When converting and pruning a dense matrix A to a CSR matrix by percentage the following steps are performed. First the user\n  calls \\p rocsparse_prune_dense2csr_by_percentage_buffer_size which determines the size of the temporary storage buffer. Once\n  determined, this buffer must be allocated by the user. Next the user allocates the csr_row_ptr array to have \\p m+1 elements\n  and calls \\p rocsparse_prune_dense2csr_nnz_by_percentage. Finally the user finishes the conversion by allocating the csr_col_ind\n  and csr_val arrays (whos size is determined by the value at nnz_total_dev_host_ptr) and calling \\p rocsparse_prune_dense2csr_by_percentage.\n\n  The pruning by percentage works by first sorting the absolute values of the dense matrix \\p A. We then determine a position in this\n  sorted array by\n  \\f[\n    pos = ceil(m*n*(percentage/100)) - 1\n    pos = min(pos, m*n-1)\n    pos = max(pos, 0)\n    threshold = sorted_A[pos]\n  \\f]\n  Once we have this threshold we prune values in the dense matrix \\p A as in \\p rocsparse_prune_dense2csr.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  m           number of rows of the dense matrix \\p A.\n\n  @param[in]\n  n           number of columns of the dense matrix \\p A.\n\n  @param[in]\n  A           array of dimensions (\\p lda, \\p n)\n\n  @param[in]\n  lda         leading dimension of dense array \\p A.\n\n  @param[in]\n  percentage  percentage >= 0 and percentage <= 100.\n\n  @param[in]\n  descr       the descriptor of the dense matrix \\p A, the supported matrix type is rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[out]\n  csr_val array of nnz ( = \\p csr_row_ptr[m] - \\p csr_row_ptr[0] ) nonzero elements of matrix \\p A.\n\n  @param[in]\n  csr_row_ptr integer array of \\p m+1 elements that contains the start of every row and the end of the last row plus one.\n\n  @param[out]\n  csr_col_ind integer array of nnz ( = \\p csr_row_ptr[m] - csr_row_ptr[0] ) column indices of the non-zero elements of matrix \\p A.\n\n  @param[in]\n  info prune information structure\n\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user, size is returned by\n              rocsparse_xprune_dense2csr_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p lda or \\p percentage is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p A or \\p descr or \\p info or \\p csr_val\n              or \\p csr_row_ptr or \\p csr_col_ind or \\p temp_buffer pointer is invalid.\n/\n/**@{"]
-    pub fn rocsparse_sprune_dense2csr_by_percentage(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        A: *const f32,
-        lda: rocsparse_int,
-        percentage: f32,
-        descr: rocsparse_mat_descr,
-        csr_val: *mut f32,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *mut rocsparse_int,
-        info: rocsparse_mat_info,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dprune_dense2csr_by_percentage(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        A: *const f64,
-        lda: rocsparse_int,
-        percentage: f64,
-        descr: rocsparse_mat_descr,
-        csr_val: *mut f64,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *mut rocsparse_int,
-        info: rocsparse_mat_info,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief\n\n  This function converts the matrix A in dense format into a sparse matrix in CSC format.\n  All the parameters are assumed to have been pre-allocated by the user and the arrays are\n  filled in based on nnz_per_columns, which can be pre-computed with rocsparse_xnnz().\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  m           number of rows of the dense matrix \\p A.\n\n  @param[in]\n  n           number of columns of the dense matrix \\p A.\n\n  @param[in]\n  descr      the descriptor of the dense matrix \\p A, the supported matrix type is rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[in]\n  A           array of dimensions (\\p ld, \\p n)\n\n  @param[in]\n  ld         leading dimension of dense array \\p A.\n\n  @param[in]\n  nnz_per_columns   array of size \\p n containing the number of non-zero elements per column.\n\n  @param[out]\n  csc_val\n              array of nnz ( = \\p csc_col_ptr[m] - \\p csc_col_ptr[0] ) nonzero elements of matrix \\p A.\n  @param[out]\n  csc_col_ptr\n              integer array of m+1 elements that contains the start of every column and the end of the last column plus one.\n  @param[out]\n  csc_row_ind\n              integer array of nnz ( = \\p csc_col_ptr[m] - csc_col_ptr[0] ) column indices of the non-zero elements of matrix \\p A.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p ld is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p A or \\p nnz_per_columns or \\p csc_val \\p csc_col_ptr or \\p csc_row_ind\n              pointer is invalid.\n/\n/**@{"]
-    pub fn rocsparse_sdense2csc(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        A: *const f32,
-        ld: rocsparse_int,
-        nnz_per_columns: *const rocsparse_int,
-        csc_val: *mut f32,
-        csc_col_ptr: *mut rocsparse_int,
-        csc_row_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ddense2csc(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        A: *const f64,
-        ld: rocsparse_int,
-        nnz_per_columns: *const rocsparse_int,
-        csc_val: *mut f64,
-        csc_col_ptr: *mut rocsparse_int,
-        csc_row_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cdense2csc(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        A: *const rocsparse_float_complex,
-        ld: rocsparse_int,
-        nnz_per_columns: *const rocsparse_int,
-        csc_val: *mut rocsparse_float_complex,
-        csc_col_ptr: *mut rocsparse_int,
-        csc_row_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zdense2csc(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        A: *const rocsparse_double_complex,
-        ld: rocsparse_int,
-        nnz_per_columns: *const rocsparse_int,
-        csc_val: *mut rocsparse_double_complex,
-        csc_col_ptr: *mut rocsparse_int,
-        csc_row_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief\n\n  This function converts the matrix A in dense format into a sparse matrix in COO format.\n  All the parameters are assumed to have been pre-allocated by the user and the arrays are\n  filled in based on nnz_per_rows, which can be pre-computed with rocsparse_xnnz().\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  m           number of rows of the dense matrix \\p A.\n\n  @param[in]\n  n           number of columns of the dense matrix \\p A.\n\n  @param[in]\n  descr      the descriptor of the dense matrix \\p A, the supported matrix type is rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[in]\n  A           array of dimensions (\\p ld, \\p n)\n\n  @param[in]\n  ld         leading dimension of dense array \\p A.\n\n  @param[in]\n  nnz_per_rows   array of size \\p n containing the number of non-zero elements per row.\n\n  @param[out]\n  coo_val\n              array of nnz nonzero elements of matrix \\p A.\n  @param[out]\n  coo_row_ind\n              integer array of nnz row indices of the non-zero elements of matrix \\p A.\n  @param[out]\n  coo_col_ind integer array of nnz column indices of the non-zero elements of matrix \\p A.\n\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p ld is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p A or \\p nnz_per_rows or \\p coo_val \\p coo_col_ind or \\p coo_row_ind\n              pointer is invalid.\n/\n/**@{"]
-    pub fn rocsparse_sdense2coo(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        A: *const f32,
-        ld: rocsparse_int,
-        nnz_per_rows: *const rocsparse_int,
-        coo_val: *mut f32,
-        coo_row_ind: *mut rocsparse_int,
-        coo_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ddense2coo(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        A: *const f64,
-        ld: rocsparse_int,
-        nnz_per_rows: *const rocsparse_int,
-        coo_val: *mut f64,
-        coo_row_ind: *mut rocsparse_int,
-        coo_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cdense2coo(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        A: *const rocsparse_float_complex,
-        ld: rocsparse_int,
-        nnz_per_rows: *const rocsparse_int,
-        coo_val: *mut rocsparse_float_complex,
-        coo_row_ind: *mut rocsparse_int,
-        coo_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zdense2coo(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        A: *const rocsparse_double_complex,
-        ld: rocsparse_int,
-        nnz_per_rows: *const rocsparse_int,
-        coo_val: *mut rocsparse_double_complex,
-        coo_row_ind: *mut rocsparse_int,
-        coo_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief\n  This function converts the sparse matrix in CSR format into a dense matrix.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  m           number of rows of the dense matrix \\p A.\n\n  @param[in]\n  n           number of columns of the dense matrix \\p A.\n\n  @param[in]\n  descr       the descriptor of the dense matrix \\p A, the supported matrix type is \\ref rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[in]\n  csr_val     array of nnz ( = \\p csr_row_ptr[m] - \\p csr_row_ptr[0] ) nonzero elements of matrix \\p A.\n  @param[in]\n  csr_row_ptr integer array of m+1 elements that contains the start of every row and the end of the last row plus one.\n  @param[in]\n  csr_col_ind integer array of nnz ( = \\p csr_row_ptr[m] - csr_row_ptr[0] ) column indices of the non-zero elements of matrix \\p A.\n\n  @param[out]\n  A           array of dimensions (\\p ld, \\p n)\n\n  @param[out]\n  ld          leading dimension of dense array \\p A.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p ld is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p A or \\p csr_val \\p csr_row_ptr or \\p csr_col_ind\n              pointer is invalid.\n/\n/**@{"]
-    pub fn rocsparse_scsr2dense(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f32,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        A: *mut f32,
-        ld: rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcsr2dense(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f64,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        A: *mut f64,
-        ld: rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccsr2dense(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_float_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        A: *mut rocsparse_float_complex,
-        ld: rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcsr2dense(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_double_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        A: *mut rocsparse_double_complex,
-        ld: rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief\n  This function converts the sparse matrix in CSC format into a dense matrix.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  m           number of rows of the dense matrix \\p A.\n\n  @param[in]\n  n           number of columns of the dense matrix \\p A.\n\n  @param[in]\n  descr       the descriptor of the dense matrix \\p A, the supported matrix type is \\ref rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[in]\n  csc_val     array of nnz ( = \\p csc_col_ptr[m] - \\p csc_col_ptr[0] ) nonzero elements of matrix \\p A.\n  @param[in]\n  csc_col_ptr integer array of m+1 elements that contains the start of every row and the end of the last row plus one.\n  @param[in]\n  csc_row_ind integer array of nnz ( = \\p csc_col_ptr[m] - csc_col_ptr[0] ) column indices of the non-zero elements of matrix \\p A.\n\n  @param[out]\n  A           array of dimensions (\\p ld, \\p n)\n\n  @param[out]\n  ld          leading dimension of dense array \\p A.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p ld is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p A or \\p csc_val \\p csc_col_ptr or \\p csc_row_ind\n              pointer is invalid.\n/\n/**@{"]
-    pub fn rocsparse_scsc2dense(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csc_val: *const f32,
-        csc_col_ptr: *const rocsparse_int,
-        csc_row_ind: *const rocsparse_int,
-        A: *mut f32,
-        ld: rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcsc2dense(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csc_val: *const f64,
-        csc_col_ptr: *const rocsparse_int,
-        csc_row_ind: *const rocsparse_int,
-        A: *mut f64,
-        ld: rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccsc2dense(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csc_val: *const rocsparse_float_complex,
-        csc_col_ptr: *const rocsparse_int,
-        csc_row_ind: *const rocsparse_int,
-        A: *mut rocsparse_float_complex,
-        ld: rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcsc2dense(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csc_val: *const rocsparse_double_complex,
-        csc_col_ptr: *const rocsparse_int,
-        csc_row_ind: *const rocsparse_int,
-        A: *mut rocsparse_double_complex,
-        ld: rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief\n  This function converts the sparse matrix in COO format into a dense matrix.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  m           number of rows of the dense matrix \\p A.\n\n  @param[in]\n  n           number of columns of the dense matrix \\p A.\n\n  @param[in]\n  nnz         number of non-zero entries of the sparse COO matrix.\n  @param[in]\n  descr       the descriptor of the dense matrix \\p A, the supported matrix type is \\ref rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[in]\n  coo_val     array of nnz nonzero elements of matrix \\p A.\n  @param[in]\n  coo_row_ind integer array of nnz row indices of the non-zero elements of matrix \\p A.\n\n  @param[in]\n  coo_col_ind integer array of nnz column indices of the non-zero elements of matrix \\p A.\n  @param[out]\n  A           array of dimensions (\\p ld, \\p n)\n\n  @param[out]\n  ld          leading dimension of dense array \\p A.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p nnz or \\p ld is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p A or \\p coo_val \\p coo_col_ind or \\p coo_row_ind\n              pointer is invalid.\n/\n/**@{"]
-    pub fn rocsparse_scoo2dense(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        coo_val: *const f32,
-        coo_row_ind: *const rocsparse_int,
-        coo_col_ind: *const rocsparse_int,
-        A: *mut f32,
-        ld: rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcoo2dense(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        coo_val: *const f64,
-        coo_row_ind: *const rocsparse_int,
-        coo_col_ind: *const rocsparse_int,
-        A: *mut f64,
-        ld: rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccoo2dense(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        coo_val: *const rocsparse_float_complex,
-        coo_row_ind: *const rocsparse_int,
-        coo_col_ind: *const rocsparse_int,
-        A: *mut rocsparse_float_complex,
-        ld: rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcoo2dense(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        coo_val: *const rocsparse_double_complex,
-        coo_row_ind: *const rocsparse_int,
-        coo_col_ind: *const rocsparse_int,
-        A: *mut rocsparse_double_complex,
-        ld: rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  Given a sparse CSR matrix and a non-negative tolerance, this function computes how many entries would be left\n  in each row of the matrix if elements less than the tolerance were removed. It also computes the total number\n  of remaining elements in the matrix.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle        handle to the rocsparse library context queue.\n\n  @param[in]\n  m             number of rows of the sparse CSR matrix.\n\n  @param[in]\n  descr_A       the descriptor of the sparse CSR matrix.\n\n  @param[in]\n  csr_val_A     array of \\p nnz_A elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr_A array of \\p m+1 elements that point to the start of every row of the\n                uncompressed sparse CSR matrix.\n  @param[out]\n  nnz_per_row   array of length \\p m containing the number of entries that will be kept per row in\n                the final compressed CSR matrix.\n  @param[out]\n  nnz_C         number of elements in the column indices and values arrays of the compressed\n                sparse CSR matrix. Can be either host or device pointer.\n  @param[in]\n  tol           the non-negative tolerance used for compression. If \\p tol is complex then only the magnitude\n                of the real part is used. Entries in the input uncompressed CSR array that are below the tolerance\n                are removed in output compressed CSR matrix.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n is invalid.\n  \\retval     rocsparse_status_invalid_value \\p tol is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_val_A or \\p csr_row_ptr_A or \\p nnz_per_row or \\p nnz_C\n              pointer is invalid.\n/\n/**@{"]
-    pub fn rocsparse_snnz_compress(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        descr_A: rocsparse_mat_descr,
-        csr_val_A: *const f32,
-        csr_row_ptr_A: *const rocsparse_int,
-        nnz_per_row: *mut rocsparse_int,
-        nnz_C: *mut rocsparse_int,
-        tol: f32,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dnnz_compress(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        descr_A: rocsparse_mat_descr,
-        csr_val_A: *const f64,
-        csr_row_ptr_A: *const rocsparse_int,
-        nnz_per_row: *mut rocsparse_int,
-        nnz_C: *mut rocsparse_int,
-        tol: f64,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cnnz_compress(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        descr_A: rocsparse_mat_descr,
-        csr_val_A: *const rocsparse_float_complex,
-        csr_row_ptr_A: *const rocsparse_int,
-        nnz_per_row: *mut rocsparse_int,
-        nnz_C: *mut rocsparse_int,
-        tol: rocsparse_float_complex,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_znnz_compress(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        descr_A: rocsparse_mat_descr,
-        csr_val_A: *const rocsparse_double_complex,
-        csr_row_ptr_A: *const rocsparse_int,
-        nnz_per_row: *mut rocsparse_int,
-        nnz_C: *mut rocsparse_int,
-        tol: rocsparse_double_complex,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse CSR matrix into a sparse COO matrix\n\n  \\details\n  \\p rocsparse_csr2coo converts the CSR array containing the row offsets, that point\n  to the start of every row, into a COO array of row indices.\n\n  \\note\n  It can also be used to convert a CSC array containing the column offsets into a COO\n  array of column indices.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row\n              of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[out]\n  coo_row_ind array of \\p nnz elements containing the row indices of the sparse COO\n              matrix.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_row_ptr or \\p coo_row_ind\n              pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n\n  \\par Example\n  This example converts a CSR matrix into a COO matrix.\n  \\code{.c}\n      //     1 2 0 3 0\n      // A = 0 4 5 0 0\n      //     6 0 0 7 8\n\n      rocsparse_int m   = 3;\n      rocsparse_int n   = 5;\n      rocsparse_int nnz = 8;\n\n      csr_row_ptr[m+1] = {0, 3, 5, 8};             // device memory\n      csr_col_ind[nnz] = {0, 1, 3, 1, 2, 0, 3, 4}; // device memory\n      csr_val[nnz]     = {1, 2, 3, 4, 5, 6, 7, 8}; // device memory\n\n      // Allocate COO matrix arrays\n      rocsparse_int* coo_row_ind;\n      rocsparse_int* coo_col_ind;\n      float* coo_val;\n\n      hipMalloc((void**)&coo_row_ind, sizeof(rocsparse_int) * nnz);\n      hipMalloc((void**)&coo_col_ind, sizeof(rocsparse_int) * nnz);\n      hipMalloc((void**)&coo_val, sizeof(float) * nnz);\n\n      // Convert the csr row offsets into coo row indices\n      rocsparse_csr2coo(handle,\n                        csr_row_ptr,\n                        nnz,\n                        m,\n                        coo_row_ind,\n                        rocsparse_index_base_zero);\n\n      // Copy the column and value arrays\n      hipMemcpy(coo_col_ind,\n                csr_col_ind,\n                sizeof(rocsparse_int) * nnz,\n                hipMemcpyDeviceToDevice);\n\n      hipMemcpy(coo_val,\n                csr_val,\n                sizeof(float) * nnz,\n                hipMemcpyDeviceToDevice);\n  \\endcode"]
-    pub fn rocsparse_csr2coo(
-        handle: rocsparse_handle,
-        csr_row_ptr: *const rocsparse_int,
-        nnz: rocsparse_int,
-        m: rocsparse_int,
-        coo_row_ind: *mut rocsparse_int,
-        idx_base: rocsparse_index_base,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse CSR matrix into a sparse CSC matrix\n\n  \\details\n  \\p rocsparse_csr2csc_buffer_size returns the size of the temporary storage buffer\n  required by rocsparse_scsr2csc(), rocsparse_dcsr2csc(), rocsparse_ccsr2csc() and\n  rocsparse_zcsr2csc(). The temporary storage buffer must be allocated by the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  n           number of columns of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[in]\n  copy_values \\ref rocsparse_action_symbolic or \\ref rocsparse_action_numeric.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_scsr2csc(), rocsparse_dcsr2csc(), rocsparse_ccsr2csc() and\n              rocsparse_zcsr2csc().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_row_ptr, \\p csr_col_ind or\n              \\p buffer_size pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred."]
-    pub fn rocsparse_csr2csc_buffer_size(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        copy_values: rocsparse_action,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse CSR matrix into a sparse CSC matrix\n\n  \\details\n  \\p rocsparse_csr2csc converts a CSR matrix into a CSC matrix. \\p rocsparse_csr2csc\n  can also be used to convert a CSC matrix into a CSR matrix. \\p copy_values decides\n  whether \\p csc_val is being filled during conversion (\\ref rocsparse_action_numeric)\n  or not (\\ref rocsparse_action_symbolic).\n\n  \\p rocsparse_csr2csc requires extra temporary storage buffer that has to be allocated\n  by the user. Storage buffer size can be determined by rocsparse_csr2csc_buffer_size().\n\n  \\note\n  The resulting matrix can also be seen as the transpose of the input matrix.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  n           number of columns of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[out]\n  csc_val     array of \\p nnz elements of the sparse CSC matrix.\n  @param[out]\n  csc_row_ind array of \\p nnz elements containing the row indices of the sparse CSC\n              matrix.\n  @param[out]\n  csc_col_ptr array of \\p n+1 elements that point to the start of every column of the\n              sparse CSC matrix.\n  @param[in]\n  copy_values \\ref rocsparse_action_symbolic or \\ref rocsparse_action_numeric.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user, size is returned by\n              rocsparse_csr2csc_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_val, \\p csr_row_ptr,\n              \\p csr_col_ind, \\p csc_val, \\p csc_row_ind, \\p csc_col_ptr or\n              \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n\n  \\par Example\n  This example computes the transpose of a CSR matrix.\n  \\code{.c}\n      //     1 2 0 3 0\n      // A = 0 4 5 0 0\n      //     6 0 0 7 8\n\n      rocsparse_int m_A   = 3;\n      rocsparse_int n_A   = 5;\n      rocsparse_int nnz_A = 8;\n\n      csr_row_ptr_A[m+1] = {0, 3, 5, 8};             // device memory\n      csr_col_ind_A[nnz] = {0, 1, 3, 1, 2, 0, 3, 4}; // device memory\n      csr_val_A[nnz]     = {1, 2, 3, 4, 5, 6, 7, 8}; // device memory\n\n      // Allocate memory for transposed CSR matrix\n      rocsparse_int m_T   = n_A;\n      rocsparse_int n_T   = m_A;\n      rocsparse_int nnz_T = nnz_A;\n\n      rocsparse_int* csr_row_ptr_T;\n      rocsparse_int* csr_col_ind_T;\n      float* csr_val_T;\n\n      hipMalloc((void**)&csr_row_ptr_T, sizeof(rocsparse_int) * (m_T + 1));\n      hipMalloc((void**)&csr_col_ind_T, sizeof(rocsparse_int) * nnz_T);\n      hipMalloc((void**)&csr_val_T, sizeof(float) * nnz_T);\n\n      // Obtain the temporary buffer size\n      size_t buffer_size;\n      rocsparse_csr2csc_buffer_size(handle,\n                                    m_A,\n                                    n_A,\n                                    nnz_A,\n                                    csr_row_ptr_A,\n                                    csr_col_ind_A,\n                                    rocsparse_action_numeric,\n                                    &buffer_size);\n\n      // Allocate temporary buffer\n      void* temp_buffer;\n      hipMalloc(&temp_buffer, buffer_size);\n\n      rocsparse_scsr2csc(handle,\n                         m_A,\n                         n_A,\n                         nnz_A,\n                         csr_val_A,\n                         csr_row_ptr_A,\n                         csr_col_ind_A,\n                         csr_val_T,\n                         csr_col_ind_T,\n                         csr_row_ptr_T,\n                         rocsparse_action_numeric,\n                         rocsparse_index_base_zero,\n                         temp_buffer);\n  \\endcode\n/\n/**@{"]
-    pub fn rocsparse_scsr2csc(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        csr_val: *const f32,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        csc_val: *mut f32,
-        csc_row_ind: *mut rocsparse_int,
-        csc_col_ptr: *mut rocsparse_int,
-        copy_values: rocsparse_action,
-        idx_base: rocsparse_index_base,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcsr2csc(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        csr_val: *const f64,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        csc_val: *mut f64,
-        csc_row_ind: *mut rocsparse_int,
-        csc_col_ptr: *mut rocsparse_int,
-        copy_values: rocsparse_action,
-        idx_base: rocsparse_index_base,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccsr2csc(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        csr_val: *const rocsparse_float_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        csc_val: *mut rocsparse_float_complex,
-        csc_row_ind: *mut rocsparse_int,
-        csc_col_ptr: *mut rocsparse_int,
-        copy_values: rocsparse_action,
-        idx_base: rocsparse_index_base,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcsr2csc(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        csr_val: *const rocsparse_double_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        csc_val: *mut rocsparse_double_complex,
-        csc_row_ind: *mut rocsparse_int,
-        csc_col_ptr: *mut rocsparse_int,
-        copy_values: rocsparse_action,
-        idx_base: rocsparse_index_base,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse GEneral BSR matrix into a sparse GEneral BSC matrix\n\n  \\details\n  \\p rocsparse_gebsr2gebsc_buffer_size returns the size of the temporary storage buffer\n  required by rocsparse_sgebsr2gebsc(), rocsparse_dgebsr2gebsc(), rocsparse_cgebsr2gebsc() and\n  rocsparse_zgebsr2gebsc(). The temporary storage buffer must be allocated by the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  mb           number of rows of the sparse GEneral BSR matrix.\n  @param[in]\n  nb           number of columns of the sparse GEneral BSR matrix.\n  @param[in]\n  nnzb         number of non-zero entries of the sparse GEneral BSR matrix.\n  @param[in]\n  bsr_val     array of \\p nnzb*row_block_dim*col_block_dim containing the values of the sparse GEneral BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every row of the\n              sparse GEneral BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the column indices of the sparse\n              GEneral BSR matrix.\n  @param[in]\n  row_block_dim   row size of the blocks in the sparse general BSR matrix.\n  @param[in]\n  col_block_dim   col size of the blocks in the sparse general BSR matrix.\n\n  @param[out]\n  p_buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_sgebsr2gebsc(), rocsparse_dgebsr2gebsc(), rocsparse_cgebsr2gebsc() and\n              rocsparse_zgebsr2gebsc().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nb or \\p nnzb is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p bsr_row_ptr, \\p bsr_col_ind or\n              \\p buffer_size pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n/\n/**@{"]
-    pub fn rocsparse_sgebsr2gebsc_buffer_size(
-        handle: rocsparse_handle,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        bsr_val: *const f32,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        p_buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dgebsr2gebsc_buffer_size(
-        handle: rocsparse_handle,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        bsr_val: *const f64,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        p_buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cgebsr2gebsc_buffer_size(
-        handle: rocsparse_handle,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        bsr_val: *const rocsparse_float_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        p_buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zgebsr2gebsc_buffer_size(
-        handle: rocsparse_handle,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        bsr_val: *const rocsparse_double_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        p_buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse GEneral BSR matrix into a sparse GEneral BSC matrix\n\n  \\details\n  \\p rocsparse_gebsr2gebsc converts a GEneral BSR matrix into a GEneral BSC matrix. \\p rocsparse_gebsr2gebsc\n  can also be used to convert a GEneral BSC matrix into a GEneral BSR matrix. \\p copy_values decides\n  whether \\p bsc_val is being filled during conversion (\\ref rocsparse_action_numeric)\n  or not (\\ref rocsparse_action_symbolic).\n\n  \\p rocsparse_gebsr2gebsc requires extra temporary storage buffer that has to be allocated\n  by the user. Storage buffer size can be determined by rocsparse_gebsr2gebsc_buffer_size().\n\n  \\note\n  The resulting matrix can also be seen as the transpose of the input matrix.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  mb          number of rows of the sparse GEneral BSR matrix.\n  @param[in]\n  nb          number of columns of the sparse GEneral BSR matrix.\n  @param[in]\n  nnzb        number of non-zero entries of the sparse GEneral BSR matrix.\n  @param[in]\n  bsr_val     array of \\p nnzb * \\p row_block_dim * \\p col_block_dim  elements of the sparse GEneral BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse GEneral BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              GEneral BSR matrix.\n  @param[in]\n  row_block_dim   row size of the blocks in the sparse general BSR matrix.\n  @param[in]\n  col_block_dim   col size of the blocks in the sparse general BSR matrix.\n  @param[out]\n  bsc_val     array of \\p nnz elements of the sparse BSC matrix.\n  @param[out]\n  bsc_row_ind array of \\p nnz elements containing the row indices of the sparse BSC\n              matrix.\n  @param[out]\n  bsc_col_ptr array of \\p n+1 elements that point to the start of every column of the\n              sparse BSC matrix.\n  @param[in]\n  copy_values \\ref rocsparse_action_symbolic or \\ref rocsparse_action_numeric.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user, size is returned by\n              rocsparse_gebsr2gebsc_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb, \\p nb or \\p nnzb is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p bsr_val, \\p bsr_row_ptr,\n              \\p bsr_col_ind, \\p bsc_val, \\p bsc_row_ind, \\p bsc_col_ptr or\n              \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_arch_mismatch the device is not supported.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n\n  \\par Example\n  This example computes the transpose of a GEneral BSR matrix.\n  \\code{.c}\n      //     1 2 0 3\n      // A = 0 4 5 0\n      //     6 0 0 7\n      //     1 2 3 4\n\n      rocsparse_int mb_A   = 2;\n      rocsparse_int row_block_dim = 2;\n      rocsparse_int col_block_dim = 2;\n      rocsparse_int nb_A   = 2;\n      rocsparse_int nnzb_A = 4;\n\n      bsr_row_ptr_A[mb_A+1] = {0, 2, 4};               // device memory\n      bsr_col_ind_A[nnzb_A] = {0, 1, 0, 1}; // device memory\n      bsr_val_A[nnzb_A]     = {1, 0, 2, 4, 0, 5, 3, 0, 6, 1, 0, 2, 0, 3, 7, 4}; // device memory\n\n      // Allocate memory for transposed BSR matrix\n      rocsparse_int mb_T   = nb_A;\n      rocsparse_int nb_T   = mb_A;\n      rocsparse_int nnzb_T = nnzb_A;\n\n      rocsparse_int* bsr_row_ptr_T;\n      rocsparse_int* bsr_col_ind_T;\n      float* bsr_val_T;\n\n      hipMalloc((void**)&bsr_row_ptr_T, sizeof(rocsparse_int) * (mb_T + 1));\n      hipMalloc((void**)&bsr_col_ind_T, sizeof(rocsparse_int) * nnzb_T);\n      hipMalloc((void**)&bsr_val_T, sizeof(float) * nnzb_T);\n\n      // Obtain the temporary buffer size\n      size_t buffer_size;\n      rocsparse_gebsr2gebsc_buffer_size(handle,\n                                    mb_A,\n                                    nb_A,\n                                    nnzb_A,\n                                    bsr_row_ptr_A,\n                                    bsr_col_ind_A,\n                                    rocsparse_action_numeric,\n                                    &buffer_size);\n\n      // Allocate temporary buffer\n      void* temp_buffer;\n      hipMalloc(&temp_buffer, buffer_size);\n\n      rocsparse_sgebsr2gebsc(handle,\n                         mb_A,\n                         nb_A,\n                         nnzb_A,\n                         bsr_val_A,\n                         bsr_row_ptr_A,\n                         bsr_col_ind_A,\n                         row_block_dim,\n                         col_block_dim,\n                         bsr_val_T,\n                         bsr_col_ind_T,\n                         bsr_row_ptr_T,\n                         rocsparse_action_numeric,\n                         rocsparse_index_base_zero,\n                         temp_buffer);\n  \\endcode\n/\n/**@{"]
-    pub fn rocsparse_sgebsr2gebsc(
-        handle: rocsparse_handle,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        bsr_val: *const f32,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        bsc_val: *mut f32,
-        bsc_row_ind: *mut rocsparse_int,
-        bsc_col_ptr: *mut rocsparse_int,
-        copy_values: rocsparse_action,
-        idx_base: rocsparse_index_base,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dgebsr2gebsc(
-        handle: rocsparse_handle,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        bsr_val: *const f64,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        bsc_val: *mut f64,
-        bsc_row_ind: *mut rocsparse_int,
-        bsc_col_ptr: *mut rocsparse_int,
-        copy_values: rocsparse_action,
-        idx_base: rocsparse_index_base,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cgebsr2gebsc(
-        handle: rocsparse_handle,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        bsr_val: *const rocsparse_float_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        bsc_val: *mut rocsparse_float_complex,
-        bsc_row_ind: *mut rocsparse_int,
-        bsc_col_ptr: *mut rocsparse_int,
-        copy_values: rocsparse_action,
-        idx_base: rocsparse_index_base,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zgebsr2gebsc(
-        handle: rocsparse_handle,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        bsr_val: *const rocsparse_double_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        bsc_val: *mut rocsparse_double_complex,
-        bsc_row_ind: *mut rocsparse_int,
-        bsc_col_ptr: *mut rocsparse_int,
-        copy_values: rocsparse_action,
-        idx_base: rocsparse_index_base,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse CSR matrix into a sparse ELL matrix\n\n  \\details\n  \\p rocsparse_csr2ell_width computes the maximum of the per row non-zero elements\n  over all rows, the ELL \\p width, for a given CSR matrix.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  csr_descr   descriptor of the sparse CSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[in]\n  ell_descr   descriptor of the sparse ELL matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  ell_width   pointer to the number of non-zero elements per row in ELL storage\n              format.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_descr, \\p csr_row_ptr, or\n              \\p ell_width pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general."]
-    pub fn rocsparse_csr2ell_width(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_row_ptr: *const rocsparse_int,
-        ell_descr: rocsparse_mat_descr,
-        ell_width: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse CSR matrix into a sparse ELL matrix\n\n  \\details\n  \\p rocsparse_csr2ell converts a CSR matrix into an ELL matrix. It is assumed,\n  that \\p ell_val and \\p ell_col_ind are allocated. Allocation size is computed by the\n  number of rows times the number of ELL non-zero elements per row, such that\n  \\f$\\text{nnz}_{\\text{ELL}} = m \\cdot \\text{ell_width}\\f$. The number of ELL\n  non-zero elements per row is obtained by rocsparse_csr2ell_width().\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  csr_descr   descriptor of the sparse CSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val     array containing the values of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[in]\n  csr_col_ind array containing the column indices of the sparse CSR matrix.\n  @param[in]\n  ell_descr   descriptor of the sparse ELL matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  ell_width   number of non-zero elements per row in ELL storage format.\n  @param[out]\n  ell_val     array of \\p m times \\p ell_width elements of the sparse ELL matrix.\n  @param[out]\n  ell_col_ind array of \\p m times \\p ell_width elements containing the column indices\n              of the sparse ELL matrix.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p ell_width is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_descr, \\p csr_val,\n              \\p csr_row_ptr, \\p csr_col_ind, \\p ell_descr, \\p ell_val or\n              \\p ell_col_ind pointer is invalid.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  This example converts a CSR matrix into an ELL matrix.\n  \\code{.c}\n      //     1 2 0 3 0\n      // A = 0 4 5 0 0\n      //     6 0 0 7 8\n\n      rocsparse_int m   = 3;\n      rocsparse_int n   = 5;\n      rocsparse_int nnz = 8;\n\n      csr_row_ptr[m+1] = {0, 3, 5, 8};             // device memory\n      csr_col_ind[nnz] = {0, 1, 3, 1, 2, 0, 3, 4}; // device memory\n      csr_val[nnz]     = {1, 2, 3, 4, 5, 6, 7, 8}; // device memory\n\n      // Create ELL matrix descriptor\n      rocsparse_mat_descr ell_descr;\n      rocsparse_create_mat_descr(&ell_descr);\n\n      // Obtain the ELL width\n      rocsparse_int ell_width;\n      rocsparse_csr2ell_width(handle,\n                              m,\n                              csr_descr,\n                              csr_row_ptr,\n                              ell_descr,\n                              &ell_width);\n\n      // Compute ELL non-zero entries\n      rocsparse_int ell_nnz = m * ell_width;\n\n      // Allocate ELL column and value arrays\n      rocsparse_int* ell_col_ind;\n      hipMalloc((void**)&ell_col_ind, sizeof(rocsparse_int) * ell_nnz);\n\n      float* ell_val;\n      hipMalloc((void**)&ell_val, sizeof(float) * ell_nnz);\n\n      // Format conversion\n      rocsparse_scsr2ell(handle,\n                         m,\n                         csr_descr,\n                         csr_val,\n                         csr_row_ptr,\n                         csr_col_ind,\n                         ell_descr,\n                         ell_width,\n                         ell_val,\n                         ell_col_ind);\n  \\endcode\n/\n/**@{"]
-    pub fn rocsparse_scsr2ell(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *const f32,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        ell_descr: rocsparse_mat_descr,
-        ell_width: rocsparse_int,
-        ell_val: *mut f32,
-        ell_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcsr2ell(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *const f64,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        ell_descr: rocsparse_mat_descr,
-        ell_width: rocsparse_int,
-        ell_val: *mut f64,
-        ell_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccsr2ell(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_float_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        ell_descr: rocsparse_mat_descr,
-        ell_width: rocsparse_int,
-        ell_val: *mut rocsparse_float_complex,
-        ell_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcsr2ell(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_double_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        ell_descr: rocsparse_mat_descr,
-        ell_width: rocsparse_int,
-        ell_val: *mut rocsparse_double_complex,
-        ell_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse CSR matrix into a sparse HYB matrix\n\n  \\details\n  \\p rocsparse_csr2hyb converts a CSR matrix into a HYB matrix. It is assumed\n  that \\p hyb has been initialized with rocsparse_create_hyb_mat().\n\n  \\note\n  This function requires a significant amount of storage for the HYB matrix,\n  depending on the matrix structure.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  m               number of rows of the sparse CSR matrix.\n  @param[in]\n  n               number of columns of the sparse CSR matrix.\n  @param[in]\n  descr           descriptor of the sparse CSR matrix. Currently, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val         array containing the values of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr     array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix.\n  @param[in]\n  csr_col_ind     array containing the column indices of the sparse CSR matrix.\n  @param[out]\n  hyb             sparse matrix in HYB format.\n  @param[in]\n  user_ell_width  width of the ELL part of the HYB matrix (only required if\n                  \\p partition_type == \\ref rocsparse_hyb_partition_user).\n  @param[in]\n  partition_type  \\ref rocsparse_hyb_partition_auto (recommended),\n                  \\ref rocsparse_hyb_partition_user or\n                  \\ref rocsparse_hyb_partition_max.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p user_ell_width is invalid.\n  \\retval     rocsparse_status_invalid_value \\p partition_type is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p hyb, \\p csr_val,\n              \\p csr_row_ptr or \\p csr_col_ind pointer is invalid.\n  \\retval     rocsparse_status_memory_error the buffer for the HYB matrix could not be\n              allocated.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  This example converts a CSR matrix into a HYB matrix using user defined partitioning.\n  \\code{.c}\n      // Create HYB matrix structure\n      rocsparse_hyb_mat hyb;\n      rocsparse_create_hyb_mat(&hyb);\n\n      // User defined ell width\n      rocsparse_int user_ell_width = 5;\n\n      // Perform the conversion\n      rocsparse_scsr2hyb(handle,\n                         m,\n                         n,\n                         descr,\n                         csr_val,\n                         csr_row_ptr,\n                         csr_col_ind,\n                         hyb,\n                         user_ell_width,\n                         rocsparse_hyb_partition_user);\n\n      // Do some work\n\n      // Clean up\n      rocsparse_destroy_hyb_mat(hyb);\n  \\endcode\n/\n/**@{"]
-    pub fn rocsparse_scsr2hyb(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f32,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        hyb: rocsparse_hyb_mat,
-        user_ell_width: rocsparse_int,
-        partition_type: rocsparse_hyb_partition,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcsr2hyb(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const f64,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        hyb: rocsparse_hyb_mat,
-        user_ell_width: rocsparse_int,
-        partition_type: rocsparse_hyb_partition,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccsr2hyb(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_float_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        hyb: rocsparse_hyb_mat,
-        user_ell_width: rocsparse_int,
-        partition_type: rocsparse_hyb_partition,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcsr2hyb(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_double_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        hyb: rocsparse_hyb_mat,
-        user_ell_width: rocsparse_int,
-        partition_type: rocsparse_hyb_partition,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief\n  This function computes the number of nonzero block columns per row and the total number of nonzero blocks in a sparse\n  BSR matrix given a sparse CSR matrix as input.\n\n  \\details\n  The routine does support asynchronous execution if the pointer mode is set to device.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  dir         direction that specified whether to count nonzero elements by \\ref rocsparse_direction_row or by\n              \\ref rocsparse_direction_row.\n\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n\n  @param[in]\n  n           number of columns of the sparse CSR matrix.\n\n  @param[in]\n  csr_descr    descriptor of the sparse CSR matrix. Currently, only\n               \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_row_ptr integer array containing \\p m+1 elements that point to the start of each row of the CSR matrix\n\n  @param[in]\n  csr_col_ind integer array of the column indices for each non-zero element in the CSR matrix\n\n  @param[in]\n  block_dim   the block dimension of the BSR matrix. Between 1 and min(m, n)\n\n  @param[in]\n  bsr_descr    descriptor of the sparse BSR matrix. Currently, only\n               \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  bsr_row_ptr integer array containing \\p mb+1 elements that point to the start of each block row of the BSR matrix\n\n  @param[out]\n  bsr_nnz     total number of nonzero elements in device or host memory.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_row_ptr or \\p csr_col_ind or \\p bsr_row_ptr or \\p bsr_nnz\n              pointer is invalid."]
-    pub fn rocsparse_csr2bsr_nnz(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        bsr_descr: rocsparse_mat_descr,
-        bsr_row_ptr: *mut rocsparse_int,
-        bsr_nnz: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse CSR matrix into a sparse BSR matrix\n\n  \\details\n  \\p rocsparse_csr2bsr converts a CSR matrix into a BSR matrix. It is assumed,\n  that \\p bsr_val, \\p bsr_col_ind and \\p bsr_row_ptr are allocated. Allocation size\n  for \\p bsr_row_ptr is computed as \\p mb+1 where \\p mb is the number of block rows in\n  the BSR matrix. Allocation size for \\p bsr_val and \\p bsr_col_ind is computed using\n  \\p csr2bsr_nnz() which also fills in \\p bsr_row_ptr.\n\n  \\p rocsparse_csr2bsr requires extra temporary storage that is allocated internally if \\p block_dim>16\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  dir          the storage format of the blocks, \\ref rocsparse_direction_row or \\ref rocsparse_direction_column\n  @param[in]\n  m            number of rows in the sparse CSR matrix.\n  @param[in]\n  n            number of columns in the sparse CSR matrix.\n  @param[in]\n  csr_descr    descriptor of the sparse CSR matrix. Currently, only\n               \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val      array of \\p nnz elements containing the values of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr  array of \\p m+1 elements that point to the start of every row of the\n               sparse CSR matrix.\n  @param[in]\n  csr_col_ind  array of \\p nnz elements containing the column indices of the sparse CSR matrix.\n  @param[in]\n  block_dim    size of the blocks in the sparse BSR matrix.\n  @param[in]\n  bsr_descr    descriptor of the sparse BSR matrix. Currently, only\n               \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  bsr_val      array of \\p nnzb*block_dim*block_dim containing the values of the sparse BSR matrix.\n  @param[out]\n  bsr_row_ptr  array of \\p mb+1 elements that point to the start of every block row of the\n               sparse BSR matrix.\n  @param[out]\n  bsr_col_ind  array of \\p nnzb elements containing the block column indices of the sparse BSR matrix.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p bsr_val,\n              \\p bsr_row_ptr, \\p bsr_col_ind, \\p csr_val, \\p csr_row_ptr or\n              \\p csr_col_ind pointer is invalid.\n\n  \\par Example\n  This example converts a CSR matrix into an BSR matrix.\n  \\code{.c}\n      //     1 4 0 0 0 0\n      // A = 0 2 3 0 0 0\n      //     5 0 0 7 8 0\n      //     0 0 9 0 6 0\n\n      rocsparse_int m   = 4;\n      rocsparse_int n   = 6;\n      rocsparse_int block_dim = 2;\n      rocsparse_int nnz = 9;\n      rocsparse_int mb = (m + block_dim - 1) / block_dim;\n      rocsparse_int nb = (n + block_dim - 1) / block_dim;\n\n      csr_row_ptr[m+1]  = {0, 2, 4, 7, 9};             // device memory\n      csr_col_ind[nnz]  = {0, 1, 1, 2, 0, 3, 4, 2, 4}; // device memory\n      csr_val[nnz]      = {1, 4, 2, 3, 5, 7, 8, 9, 6}; // device memory\n\n      hipMalloc(&bsr_row_ptr, sizeof(rocsparse_int) *(mb + 1));\n      rocsparse_int nnzb;\n      rocsparse_int* nnzTotalHostPtr = &nnzb;\n      csr2bsr_nnz(handle,\n                  rocsparse_direction_row,\n                  m,\n                  n,\n                  csr_descr,\n                  csr_row_ptr,\n                  csr_col_ind,\n                  block_dim,\n                  bsr_descr,\n                  bsr_row_ptr,\n                  nnzTotalHostPtr);\n      nnzb = *nnzTotalDevHostPtr;\n      hipMalloc(&bsr_col_ind, sizeof(int)*nnzb);\n      hipMalloc(&bsr_val, sizeof(float)*(block_dim * block_dim) * nnzb);\n      scsr2bsr(handle,\n               rocsparse_direction_row,\n               m,\n               n,\n               csr_descr,\n               csr_val,\n               csr_row_ptr,\n               csr_col_ind,\n               block_dim,\n               bsr_descr,\n               bsr_val,\n               bsr_row_ptr,\n               bsr_col_ind);\n  \\endcode\n/\n/**@{"]
-    pub fn rocsparse_scsr2bsr(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *const f32,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        bsr_descr: rocsparse_mat_descr,
-        bsr_val: *mut f32,
-        bsr_row_ptr: *mut rocsparse_int,
-        bsr_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcsr2bsr(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *const f64,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        bsr_descr: rocsparse_mat_descr,
-        bsr_val: *mut f64,
-        bsr_row_ptr: *mut rocsparse_int,
-        bsr_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccsr2bsr(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_float_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        bsr_descr: rocsparse_mat_descr,
-        bsr_val: *mut rocsparse_float_complex,
-        bsr_row_ptr: *mut rocsparse_int,
-        bsr_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcsr2bsr(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_double_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        bsr_descr: rocsparse_mat_descr,
-        bsr_val: *mut rocsparse_double_complex,
-        bsr_row_ptr: *mut rocsparse_int,
-        bsr_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Pads a value to the diagonal of the last block (if the last block is a diagonal block) in the sparse BSR matrix\n  when the matrix expands outside m x m\n\n  \\details When converting from a CSR matrix to a BSR matrix the resulting BSR matrix will be larger when m < mb * block_dim.\n  In these situations, the CSR to BSR conversion will expand the BSR matrix to have zeros when outside m x m. This routine\n  converts the resulting BSR matrix to one that has a value on the last diagonal blocks diagonal if this last block is a diagonal\n  block in the BSR matrix.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse BSR matrix.\n  @param[in]\n  mb          number of block rows of the sparse BSR matrix.\n  @param[in]\n  nnzb        number of non-zero blocks of the sparse BSR matrix.\n  @param[in]\n  block_dim   block dimension of the sparse BSR matrix.\n  @param[in]\n  value       scalar value that is set on the diagonal of the last block when the matrix expands outside of \\p m x \\p m\n  @param[in]\n  bsr_descr   descriptor of the sparse BSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[inout]\n  bsr_val     array of \\p nnzb blocks of the sparse BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of\n              the sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the block column indices of the sparse\n              BSR matrix.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p mb, \\p nnzb or \\p block_dim is\n              invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p bsr_descr, \\p bsr_val,\n              \\p bsr_row_ind, \\p bsr_col_ind, pointer is invalid.\n/\n/**@{"]
-    pub fn rocsparse_sbsrpad_value(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        mb: rocsparse_int,
-        nnzb: rocsparse_int,
-        block_dim: rocsparse_int,
-        value: f32,
-        bsr_descr: rocsparse_mat_descr,
-        bsr_val: *mut f32,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dbsrpad_value(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        mb: rocsparse_int,
-        nnzb: rocsparse_int,
-        block_dim: rocsparse_int,
-        value: f64,
-        bsr_descr: rocsparse_mat_descr,
-        bsr_val: *mut f64,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cbsrpad_value(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        mb: rocsparse_int,
-        nnzb: rocsparse_int,
-        block_dim: rocsparse_int,
-        value: rocsparse_float_complex,
-        bsr_descr: rocsparse_mat_descr,
-        bsr_val: *mut rocsparse_float_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zbsrpad_value(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        mb: rocsparse_int,
-        nnzb: rocsparse_int,
-        block_dim: rocsparse_int,
-        value: rocsparse_double_complex,
-        bsr_descr: rocsparse_mat_descr,
-        bsr_val: *mut rocsparse_double_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief\n  \\details\n  \\p rocsparse_csr2gebsr_buffer_size returns the size of the temporary buffer that\n  is required by \\p rocsparse_csr2gebcsr_nnz, \\p rocsparse_scsr2gebcsr, \\p rocsparse_dcsr2gebsr,\n  \\p rocsparse_ccsr2gebsr and \\p rocsparse_zcsr2gebsr. The temporary storage buffer must be\n  allocated by the user.\n\n  This function computes the number of nonzero block columns per row and the total number of nonzero blocks in a sparse\n  GEneral BSR matrix given a sparse CSR matrix as input.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  dir         direction that specified whether to count nonzero elements by \\ref rocsparse_direction_row or by\n              \\ref rocsparse_direction_row.\n\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n\n  @param[in]\n  n           number of columns of the sparse CSR matrix.\n\n  @param[in]\n  csr_descr    descriptor of the sparse CSR matrix. Currently, only\n               \\ref rocsparse_matrix_type_general is supported.\n\n  @param[in]\n  csr_val      array of \\p nnz elements containing the values of the sparse CSR matrix.\n\n  @param[in]\n  csr_row_ptr  integer array containing \\p m+1 elements that point to the start of each row of the CSR matrix\n\n  @param[in]\n  csr_col_ind  integer array of the column indices for each non-zero element in the CSR matrix\n\n  @param[in]\n  row_block_dim   the row block dimension of the GEneral BSR matrix. Between 1 and \\p m\n\n  @param[in]\n  col_block_dim   the col block dimension of the GEneral BSR matrix. Between 1 and \\p n\n\n  @param[out]\n  buffer_size  number of bytes of the temporary storage buffer required by \\p rocsparse_csr2gebsr_nnz and \\p rocsparse_Xcsr2gebsr.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p row_block_dim  \\p col_block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_val or \\p csr_row_ptr or \\p csr_col_ind or \\p bsr_row_ptr or \\p buffer_size\n              pointer is invalid.\n/\n/**@{"]
-    pub fn rocsparse_scsr2gebsr_buffer_size(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *const f32,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcsr2gebsr_buffer_size(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *const f64,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccsr2gebsr_buffer_size(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_float_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcsr2gebsr_buffer_size(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_double_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief\n  This function computes the number of nonzero block columns per row and the total number of nonzero blocks in a sparse\n  GEneral BSR matrix given a sparse CSR matrix as input.\n\n  \\details\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  dir         direction that specified whether to count nonzero elements by \\ref rocsparse_direction_row or by\n              \\ref rocsparse_direction_row.\n\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n\n  @param[in]\n  n           number of columns of the sparse CSR matrix.\n\n  @param[in]\n  csr_descr    descriptor of the sparse CSR matrix. Currently, only\n               \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_row_ptr integer array containing \\p m+1 elements that point to the start of each row of the CSR matrix\n\n  @param[in]\n  csr_col_ind integer array of the column indices for each non-zero element in the CSR matrix\n\n  @param[in]\n  bsr_descr    descriptor of the sparse GEneral BSR matrix. Currently, only\n               \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  bsr_row_ptr integer array containing \\p mb+1 elements that point to the start of each block row of the General BSR matrix\n\n  @param[in]\n  row_block_dim   the row block dimension of the GEneral BSR matrix. Between 1 and min(m, n)\n\n  @param[in]\n  col_block_dim   the col block dimension of the GEneral BSR matrix. Between 1 and min(m, n)\n\n  @param[out]\n  bsr_nnz_devhost  total number of nonzero elements in device or host memory.\n\n  @param[in]\n  temp_buffer    buffer allocated by the user whose size is determined by calling \\p rocsparse_xcsr2gebsr_buffer_size.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p row_block_dim \\p col_block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_row_ptr or \\p csr_col_ind or \\p bsr_row_ptr or \\p bsr_nnz_devhost\n              pointer is invalid.\n/\n/**@{"]
-    pub fn rocsparse_csr2gebsr_nnz(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        bsr_descr: rocsparse_mat_descr,
-        bsr_row_ptr: *mut rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        bsr_nnz_devhost: *mut rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse CSR matrix into a sparse GEneral BSR matrix\n\n  \\details\n  \\p rocsparse_csr2gebsr converts a CSR matrix into a GEneral BSR matrix. It is assumed,\n  that \\p bsr_val, \\p bsr_col_ind and \\p bsr_row_ptr are allocated. Allocation size\n  for \\p bsr_row_ptr is computed as \\p mb+1 where \\p mb is the number of block rows in\n  the GEneral BSR matrix. Allocation size for \\p bsr_val and \\p bsr_col_ind is computed using\n  \\p csr2gebsr_nnz() which also fills in \\p bsr_row_ptr.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  dir          the storage format of the blocks, \\ref rocsparse_direction_row or \\ref rocsparse_direction_column\n  @param[in]\n  m            number of rows in the sparse CSR matrix.\n  @param[in]\n  n            number of columns in the sparse CSR matrix.\n  @param[in]\n  csr_descr    descriptor of the sparse CSR matrix. Currently, only\n               \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val      array of \\p nnz elements containing the values of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr  array of \\p m+1 elements that point to the start of every row of the\n               sparse CSR matrix.\n  @param[in]\n  csr_col_ind  array of \\p nnz elements containing the column indices of the sparse CSR matrix.\n  @param[in]\n  bsr_descr    descriptor of the sparse BSR matrix. Currently, only\n               \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  bsr_val      array of \\p nnzb* \\p row_block_dim* \\p col_block_dim containing the values of the sparse BSR matrix.\n  @param[out]\n  bsr_row_ptr  array of \\p mb+1 elements that point to the start of every block row of the\n               sparse BSR matrix.\n  @param[out]\n  bsr_col_ind  array of \\p nnzb elements containing the block column indices of the sparse BSR matrix.\n  @param[in]\n  row_block_dim    row size of the blocks in the sparse GEneral BSR matrix.\n  @param[in]\n  col_block_dim    col size of the blocks in the sparse GEneral BSR matrix.\n  @param[in]\n  temp_buffer    buffer allocated by the user whose size is determined by calling \\p rocsparse_xcsr2gebsr_buffer_size.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p row_block_dim or \\p col_block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p bsr_val,\n              \\p bsr_row_ptr, \\p bsr_col_ind, \\p csr_val, \\p csr_row_ptr or\n              \\p csr_col_ind pointer is invalid.\n\n  \\par Example\n  This example converts a CSR matrix into an BSR matrix.\n  \\code{.c}\n      //     1 4 0 0 0 0\n      // A = 0 2 3 0 0 0\n      //     5 0 0 7 8 0\n      //     0 0 9 0 6 0\n\n      rocsparse_int m   = 4;\n      rocsparse_int n   = 6;\n      rocsparse_int row_block_dim = 2;\n      rocsparse_int col_block_dim = 3;\n      rocsparse_int nnz = 9;\n      rocsparse_int mb = (m + row_block_dim - 1) / row_block_dim;\n      rocsparse_int nb = (n + col_block_dim - 1) / col_block_dim;\n\n      csr_row_ptr[m+1]  = {0, 2, 4, 7, 9};             // device memory\n      csr_col_ind[nnz]  = {0, 1, 1, 2, 0, 3, 4, 2, 4}; // device memory\n      csr_val[nnz]      = {1, 4, 2, 3, 5, 7, 8, 9, 6}; // device memory\n\n      hipMalloc(&bsr_row_ptr, sizeof(rocsparse_int) *(mb + 1));\n      rocsparse_int nnzb;\n      rocsparse_int* nnzTotalHostPtr = &nnzb;\n      csr2gebsr_nnz(handle,\n                  rocsparse_direction_row,\n                  m,\n                  n,\n                  csr_descr,\n                  csr_row_ptr,\n                  csr_col_ind,\n                  row_block_dim,\n                  col_block_dim,\n                  bsr_descr,\n                  bsr_row_ptr,\n                  nnzTotalHostPtr);\n      nnzb = *nnzTotalHostPtr;\n      hipMalloc(&bsr_col_ind, sizeof(int)*nnzb);\n      hipMalloc(&bsr_val, sizeof(float)*(row_block_dim * col_block_dim) * nnzb);\n      scsr2gebsr(handle,\n               rocsparse_direction_row,\n               m,\n               n,\n               csr_descr,\n               csr_val,\n               csr_row_ptr,\n               csr_col_ind,\n               row_block_dim,\n               col_block_dim,\n               bsr_descr,\n               bsr_val,\n               bsr_row_ptr,\n               bsr_col_ind);\n  \\endcode\n/\n/**@{"]
-    pub fn rocsparse_scsr2gebsr(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *const f32,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        bsr_descr: rocsparse_mat_descr,
-        bsr_val: *mut f32,
-        bsr_row_ptr: *mut rocsparse_int,
-        bsr_col_ind: *mut rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcsr2gebsr(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *const f64,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        bsr_descr: rocsparse_mat_descr,
-        bsr_val: *mut f64,
-        bsr_row_ptr: *mut rocsparse_int,
-        bsr_col_ind: *mut rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccsr2gebsr(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_float_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        bsr_descr: rocsparse_mat_descr,
-        bsr_val: *mut rocsparse_float_complex,
-        bsr_row_ptr: *mut rocsparse_int,
-        bsr_col_ind: *mut rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcsr2gebsr(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *const rocsparse_double_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        bsr_descr: rocsparse_mat_descr,
-        bsr_val: *mut rocsparse_double_complex,
-        bsr_row_ptr: *mut rocsparse_int,
-        bsr_col_ind: *mut rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse CSR matrix into a compressed sparse CSR matrix\n\n  \\details\n  \\p rocsparse_csr2csr_compress converts a CSR matrix into a compressed CSR matrix by\n  removing entries in the input CSR matrix that are below a non-negative threshold \\p tol\n\n  \\note\n  In the case of complex matrices only the magnitude of the real part of \\p tol is used.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle        handle to the rocsparse library context queue.\n  @param[in]\n  m             number of rows of the sparse CSR matrix.\n  @param[in]\n  n             number of columns of the sparse CSR matrix.\n  @param[in]\n  descr_A       matrix descriptor for the CSR matrix\n  @param[in]\n  csr_val_A     array of \\p nnz_A elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr_A array of \\p m+1 elements that point to the start of every row of the\n                uncompressed sparse CSR matrix.\n  @param[in]\n  csr_col_ind_A array of \\p nnz_A elements containing the column indices of the uncompressed\n                sparse CSR matrix.\n  @param[in]\n  nnz_A         number of elements in the column indices and values arrays of the uncompressed\n                sparse CSR matrix.\n  @param[in]\n  nnz_per_row   array of length \\p m containing the number of entries that will be kept per row in\n                the final compressed CSR matrix.\n  @param[out]\n  csr_val_C     array of \\p nnz_C elements of the compressed sparse CSC matrix.\n  @param[out]\n  csr_row_ptr_C array of \\p m+1 elements that point to the start of every column of the compressed\n                sparse CSR matrix.\n  @param[out]\n  csr_col_ind_C array of \\p nnz_C elements containing the row indices of the compressed\n                sparse CSR matrix.\n  @param[in]\n  tol           the non-negative tolerance used for compression. If \\p tol is complex then only the magnitude\n                of the real part is used. Entries in the input uncompressed CSR array that are below the tolerance\n                are removed in output compressed CSR matrix.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p nnz_A is invalid.\n  \\retval     rocsparse_status_invalid_value \\p tol is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_val_A, \\p csr_row_ptr_A,\n              \\p csr_col_ind_A, \\p csr_val_C, \\p csr_row_ptr_C, \\p csr_col_ind_C or\n              \\p nnz_per_row pointer is invalid.\n\n  \\par Example\n  This example demonstrates how to compress a CSR matrix. Compressing a CSR matrix involves two steps. First we use\n  nnz_compress() to determine how many entries will be in the final compressed CSR matrix. Then we call csr2csr_compress()\n  to finish the compression and fill in the column indices and values arrays of the compressed CSR matrix.\n  \\code{.c}\n      //     1 2 0 3 0\n      // A = 0 4 5 0 0\n      //     6 0 0 7 8\n\n      float tol = 0.0f;\n\n      rocsparse_int m     = 3;\n      rocsparse_int n     = 5;\n      rocsparse_int nnz_A = 8;\n\n      csr_row_ptr_A[m+1]   = {0, 3, 5, 8};             // device memory\n      csr_col_ind_A[nnz_A] = {0, 1, 3, 1, 2, 0, 3, 4}; // device memory\n      csr_val_A[nnz_A]     = {1, 0, 3, 4, 0, 6, 7, 0}; // device memory\n\n      // Allocate memory for the row pointer array of the compressed CSR matrix\n      rocsparse_int* csr_row_ptr_C;\n      hipMalloc(csr_row_ptr_C, sizeof(rocsparse_int) * (m + 1));\n\n      // Allocate memory for the nnz_per_row array\n      rocsparse_int* nnz_per_row;\n      hipMalloc(nnz_per_row, sizeof(rocsparse_int) * m);\n\n      // Call nnz_compress() which fills in nnz_per_row array and finds the number\n      // of entries that will be in the compressed CSR matrix\n      rocsparse_int nnz_C;\n      nnz_compress(handle,\n                   m,\n                   descr_A,\n                   csr_val_A,\n                   csr_row_ptr_A,\n                   nnz_per_row,\n                   &nnz_C,\n                   tol);\n\n      // Allocate column indices and values array for the compressed CSR matrix\n      rocsparse_int* csr_col_ind_C;\n      rocsparse_int* csr_val_C;\n      hipMalloc(csr_col_ind_C, sizeof(rocsparse_int) * nnz_C;\n      hipMalloc(csr_val_C, sizeof(rocsparse_int) * nnz_C;\n\n      // Finish compression by calling csr2csr_compress()\n      csr2csr_compress(handle,\n                       m,\n                       n,\n                       descr_A,\n                       csr_val_A,\n                       csr_row_ptr_A,\n                       csr_col_ind_A,\n                       nnz_A,\n                       nnz_per_row,\n                       csr_val_C,\n                       csr_row_ptr_C,\n                       csr_col_ind_C,\n                       tol);\n  \\endcode\n/\n/**@{"]
-    pub fn rocsparse_scsr2csr_compress(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr_A: rocsparse_mat_descr,
-        csr_val_A: *const f32,
-        csr_row_ptr_A: *const rocsparse_int,
-        csr_col_ind_A: *const rocsparse_int,
-        nnz_A: rocsparse_int,
-        nnz_per_row: *const rocsparse_int,
-        csr_val_C: *mut f32,
-        csr_row_ptr_C: *mut rocsparse_int,
-        csr_col_ind_C: *mut rocsparse_int,
-        tol: f32,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dcsr2csr_compress(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr_A: rocsparse_mat_descr,
-        csr_val_A: *const f64,
-        csr_row_ptr_A: *const rocsparse_int,
-        csr_col_ind_A: *const rocsparse_int,
-        nnz_A: rocsparse_int,
-        nnz_per_row: *const rocsparse_int,
-        csr_val_C: *mut f64,
-        csr_row_ptr_C: *mut rocsparse_int,
-        csr_col_ind_C: *mut rocsparse_int,
-        tol: f64,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_ccsr2csr_compress(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr_A: rocsparse_mat_descr,
-        csr_val_A: *const rocsparse_float_complex,
-        csr_row_ptr_A: *const rocsparse_int,
-        csr_col_ind_A: *const rocsparse_int,
-        nnz_A: rocsparse_int,
-        nnz_per_row: *const rocsparse_int,
-        csr_val_C: *mut rocsparse_float_complex,
-        csr_row_ptr_C: *mut rocsparse_int,
-        csr_col_ind_C: *mut rocsparse_int,
-        tol: rocsparse_float_complex,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zcsr2csr_compress(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        descr_A: rocsparse_mat_descr,
-        csr_val_A: *const rocsparse_double_complex,
-        csr_row_ptr_A: *const rocsparse_int,
-        csr_col_ind_A: *const rocsparse_int,
-        nnz_A: rocsparse_int,
-        nnz_per_row: *const rocsparse_int,
-        csr_val_C: *mut rocsparse_double_complex,
-        csr_row_ptr_C: *mut rocsparse_int,
-        csr_col_ind_C: *mut rocsparse_int,
-        tol: rocsparse_double_complex,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Convert and prune sparse CSR matrix into a sparse CSR matrix\n\n  \\details\n  \\p rocsparse_prune_csr2csr_buffer_size returns the size of the temporary buffer that\n  is required by \\p rocsparse_sprune_csr2csr_nnz, \\p rocsparse_dprune_csr2csr_nnz,\n  \\p rocsparse_sprune_csr2csr, and \\p rocsparse_dprune_csr2csr. The temporary storage\n  buffer must be allocated by the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle        handle to the rocsparse library context queue.\n  @param[in]\n  m             number of rows in the sparse CSR matrix.\n  @param[in]\n  n             number of columns in the sparse CSR matrix.\n  @param[in]\n  nnz_A         number of non-zeros in the sparse CSR matrix A.\n  @param[in]\n  csr_descr_A   descriptor of the sparse CSR matrix A. Currently, only\n                \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val_A     array of \\p nnz_A elements containing the values of the sparse CSR matrix A.\n  @param[in]\n  csr_row_ptr_A array of \\p m+1 elements that point to the start of every row of the\n                sparse CSR matrix A.\n  @param[in]\n  csr_col_ind_A array of \\p nnz_A elements containing the column indices of the sparse CSR matrix A.\n  @param[in]\n  threshold     pointer to the non-negative pruning threshold which can exist in either host or device memory.\n  @param[in]\n  csr_descr_C   descriptor of the sparse CSR matrix C. Currently, only\n                \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val_C     array of \\p nnz_C elements containing the values of the sparse CSR matrix C.\n  @param[in]\n  csr_row_ptr_C array of \\p m+1 elements that point to the start of every row of the\n                sparse CSR matrix C.\n  @param[in]\n  csr_col_ind_C array of \\p nnz_C elements containing the column indices of the sparse CSR matrix C.\n  @param[out]\n  buffer_size   number of bytes of the temporary storage buffer required by \\p rocsparse_sprune_csr2csr_nnz,\n                \\p rocsparse_dprune_csr2csr_nnz, \\p rocsparse_sprune_csr2csr, and \\p rocsparse_dprune_csr2csr.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p buffer_size pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n\n/\n/**@{"]
-    pub fn rocsparse_sprune_csr2csr_buffer_size(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz_A: rocsparse_int,
-        csr_descr_A: rocsparse_mat_descr,
-        csr_val_A: *const f32,
-        csr_row_ptr_A: *const rocsparse_int,
-        csr_col_ind_A: *const rocsparse_int,
-        threshold: *const f32,
-        csr_descr_C: rocsparse_mat_descr,
-        csr_val_C: *const f32,
-        csr_row_ptr_C: *const rocsparse_int,
-        csr_col_ind_C: *const rocsparse_int,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dprune_csr2csr_buffer_size(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz_A: rocsparse_int,
-        csr_descr_A: rocsparse_mat_descr,
-        csr_val_A: *const f64,
-        csr_row_ptr_A: *const rocsparse_int,
-        csr_col_ind_A: *const rocsparse_int,
-        threshold: *const f64,
-        csr_descr_C: rocsparse_mat_descr,
-        csr_val_C: *const f64,
-        csr_row_ptr_C: *const rocsparse_int,
-        csr_col_ind_C: *const rocsparse_int,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Convert and prune sparse CSR matrix into a sparse CSR matrix\n\n  \\details\n  \\p rocsparse_prune_csr2csr_nnz computes the number of nonzero elements per row and the total\n  number of nonzero elements in a sparse CSR matrix once elements less than the threshold are\n  pruned from the matrix.\n\n  \\note The routine does support asynchronous execution if the pointer mode is set to device.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle        handle to the rocsparse library context queue.\n  @param[in]\n  m             number of rows in the sparse CSR matrix.\n  @param[in]\n  n             number of columns in the sparse CSR matrix.\n  @param[in]\n  nnz_A         number of non-zeros in the sparse CSR matrix A.\n  @param[in]\n  csr_descr_A   descriptor of the sparse CSR matrix A. Currently, only\n                \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val_A     array of \\p nnz_A elements containing the values of the sparse CSR matrix A.\n  @param[in]\n  csr_row_ptr_A array of \\p m+1 elements that point to the start of every row of the\n                sparse CSR matrix A.\n  @param[in]\n  csr_col_ind_A array of \\p nnz_A elements containing the column indices of the sparse CSR matrix A.\n  @param[in]\n  threshold     pointer to the non-negative pruning threshold which can exist in either host or device memory.\n  @param[in]\n  csr_descr_C   descriptor of the sparse CSR matrix C. Currently, only\n                \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  csr_row_ptr_C array of \\p m+1 elements that point to the start of every row of the\n                sparse CSR matrix C.\n  @param[out]\n  nnz_total_dev_host_ptr total number of nonzero elements in device or host memory.\n  @param[out]\n  temp_buffer   buffer allocated by the user whose size is determined by calling \\p rocsparse_xprune_csr2csr_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p nnz_A is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p threshold or \\p csr_descr_A or \\p csr_descr_C or \\p csr_val_A\n              or \\p csr_row_ptr_A or \\p csr_col_ind_A or \\p csr_row_ptr_C or \\p nnz_total_dev_host_ptr\n              or \\p temp_buffer pointer is invalid.\n\n/\n/**@{"]
-    pub fn rocsparse_sprune_csr2csr_nnz(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz_A: rocsparse_int,
-        csr_descr_A: rocsparse_mat_descr,
-        csr_val_A: *const f32,
-        csr_row_ptr_A: *const rocsparse_int,
-        csr_col_ind_A: *const rocsparse_int,
-        threshold: *const f32,
-        csr_descr_C: rocsparse_mat_descr,
-        csr_row_ptr_C: *mut rocsparse_int,
-        nnz_total_dev_host_ptr: *mut rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dprune_csr2csr_nnz(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz_A: rocsparse_int,
-        csr_descr_A: rocsparse_mat_descr,
-        csr_val_A: *const f64,
-        csr_row_ptr_A: *const rocsparse_int,
-        csr_col_ind_A: *const rocsparse_int,
-        threshold: *const f64,
-        csr_descr_C: rocsparse_mat_descr,
-        csr_row_ptr_C: *mut rocsparse_int,
-        nnz_total_dev_host_ptr: *mut rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Convert and prune sparse CSR matrix into a sparse CSR matrix\n\n  \\details\n  This function converts the sparse CSR matrix A into a sparse CSR matrix C by pruning values in A\n  that are less than the threshold. All the parameters are assumed to have been pre-allocated by the user.\n  The user first calls rocsparse_xprune_csr2csr_buffer_size() to determine the size of the buffer used\n  by rocsparse_xprune_csr2csr_nnz() and rocsparse_xprune_csr2csr() which the user then allocates. The user then\n  allocates \\p csr_row_ptr_C to have \\p m+1 elements and then calls rocsparse_xprune_csr2csr_nnz() which fills\n  in the \\p csr_row_ptr_C array stores then number of elements that are larger than the pruning threshold\n  in \\p nnz_total_dev_host_ptr. The user then calls rocsparse_xprune_csr2csr() to complete the conversion.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle        handle to the rocsparse library context queue.\n  @param[in]\n  m             number of rows in the sparse CSR matrix.\n  @param[in]\n  n             number of columns in the sparse CSR matrix.\n  @param[in]\n  nnz_A         number of non-zeros in the sparse CSR matrix A.\n  @param[in]\n  csr_descr_A   descriptor of the sparse CSR matrix A. Currently, only\n                \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val_A     array of \\p nnz_A elements containing the values of the sparse CSR matrix A.\n  @param[in]\n  csr_row_ptr_A array of \\p m+1 elements that point to the start of every row of the\n                sparse CSR matrix A.\n  @param[in]\n  csr_col_ind_A array of \\p nnz_A elements containing the column indices of the sparse CSR matrix A.\n  @param[in]\n  threshold     pointer to the non-negative pruning threshold which can exist in either host or device memory.\n  @param[in]\n  csr_descr_C   descriptor of the sparse CSR matrix C. Currently, only\n                \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  csr_val_C     array of \\p nnz_C elements containing the values of the sparse CSR matrix C.\n  @param[in]\n  csr_row_ptr_C array of \\p m+1 elements that point to the start of every row of the\n                sparse CSR matrix C.\n  @param[out]\n  csr_col_ind_C array of \\p nnz_C elements containing the column indices of the sparse CSR matrix C.\n  @param[in]\n  temp_buffer   buffer allocated by the user whose size is determined by calling \\p rocsparse_xprune_csr2csr_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p nnz_A is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p threshold or \\p csr_descr_A or \\p csr_descr_C or \\p csr_val_A\n              or \\p csr_row_ptr_A or \\p csr_col_ind_A or \\p csr_val_C or \\p csr_row_ptr_C or \\p csr_col_ind_C\n              or \\p temp_buffer pointer is invalid.\n/\n/**@{"]
-    pub fn rocsparse_sprune_csr2csr(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz_A: rocsparse_int,
-        csr_descr_A: rocsparse_mat_descr,
-        csr_val_A: *const f32,
-        csr_row_ptr_A: *const rocsparse_int,
-        csr_col_ind_A: *const rocsparse_int,
-        threshold: *const f32,
-        csr_descr_C: rocsparse_mat_descr,
-        csr_val_C: *mut f32,
-        csr_row_ptr_C: *const rocsparse_int,
-        csr_col_ind_C: *mut rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dprune_csr2csr(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz_A: rocsparse_int,
-        csr_descr_A: rocsparse_mat_descr,
-        csr_val_A: *const f64,
-        csr_row_ptr_A: *const rocsparse_int,
-        csr_col_ind_A: *const rocsparse_int,
-        threshold: *const f64,
-        csr_descr_C: rocsparse_mat_descr,
-        csr_val_C: *mut f64,
-        csr_row_ptr_C: *const rocsparse_int,
-        csr_col_ind_C: *mut rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Convert and prune by percentage a sparse CSR matrix into a sparse CSR matrix\n\n  \\details\n  \\p rocsparse_prune_csr2csr__by_percentage_buffer_size returns the size of the temporary buffer that\n  is required by \\p rocsparse_sprune_csr2csr_nnz_by_percentage, \\p rocsparse_dprune_csr2csr_nnz_by_percentage,\n  \\p rocsparse_sprune_csr2csr_by_percentage, and \\p rocsparse_dprune_csr2csr_by_percentage. The temporary storage\n  buffer must be allocated by the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle        handle to the rocsparse library context queue.\n  @param[in]\n  m             number of rows in the sparse CSR matrix.\n  @param[in]\n  n             number of columns in the sparse CSR matrix.\n  @param[in]\n  nnz_A         number of non-zeros in the sparse CSR matrix A.\n  @param[in]\n  csr_descr_A   descriptor of the sparse CSR matrix A. Currently, only\n                \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val_A     array of \\p nnz_A elements containing the values of the sparse CSR matrix A.\n  @param[in]\n  csr_row_ptr_A array of \\p m+1 elements that point to the start of every row of the\n                sparse CSR matrix A.\n  @param[in]\n  csr_col_ind_A array of \\p nnz_A elements containing the column indices of the sparse CSR matrix A.\n  @param[in]\n  percentage     percentage >= 0 and percentage <= 100.\n  @param[in]\n  csr_descr_C   descriptor of the sparse CSR matrix C. Currently, only\n                \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val_C     array of \\p nnz_C elements containing the values of the sparse CSR matrix C.\n  @param[in]\n  csr_row_ptr_C array of \\p m+1 elements that point to the start of every row of the\n                sparse CSR matrix C.\n  @param[in]\n  csr_col_ind_C array of \\p nnz_C elements containing the column indices of the sparse CSR matrix C.\n  @param[in]\n  info          prune info structure.\n  @param[out]\n  buffer_size   number of bytes of the temporary storage buffer required by \\p rocsparse_sprune_csr2csr_nnz_by_percentage,\n                \\p rocsparse_dprune_csr2csr_nnz_by_percentage, \\p rocsparse_sprune_csr2csr_by_percentage,\n                and \\p rocsparse_dprune_csr2csr_by_percentage.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p buffer_size pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n\n/\n/**@{"]
-    pub fn rocsparse_sprune_csr2csr_by_percentage_buffer_size(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz_A: rocsparse_int,
-        csr_descr_A: rocsparse_mat_descr,
-        csr_val_A: *const f32,
-        csr_row_ptr_A: *const rocsparse_int,
-        csr_col_ind_A: *const rocsparse_int,
-        percentage: f32,
-        csr_descr_C: rocsparse_mat_descr,
-        csr_val_C: *const f32,
-        csr_row_ptr_C: *const rocsparse_int,
-        csr_col_ind_C: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dprune_csr2csr_by_percentage_buffer_size(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz_A: rocsparse_int,
-        csr_descr_A: rocsparse_mat_descr,
-        csr_val_A: *const f64,
-        csr_row_ptr_A: *const rocsparse_int,
-        csr_col_ind_A: *const rocsparse_int,
-        percentage: f64,
-        csr_descr_C: rocsparse_mat_descr,
-        csr_val_C: *const f64,
-        csr_row_ptr_C: *const rocsparse_int,
-        csr_col_ind_C: *const rocsparse_int,
-        info: rocsparse_mat_info,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Convert and prune by percentage a sparse CSR matrix into a sparse CSR matrix\n\n  \\details\n  \\p rocsparse_prune_csr2csr_nnz_by_percentage computes the number of nonzero elements per row and the total\n  number of nonzero elements in a sparse CSR matrix once elements less than the threshold are\n  pruned from the matrix.\n\n  \\note The routine does support asynchronous execution if the pointer mode is set to device.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle        handle to the rocsparse library context queue.\n  @param[in]\n  m             number of rows in the sparse CSR matrix.\n  @param[in]\n  n             number of columns in the sparse CSR matrix.\n  @param[in]\n  nnz_A         number of non-zeros in the sparse CSR matrix A.\n  @param[in]\n  csr_descr_A   descriptor of the sparse CSR matrix A. Currently, only\n                \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val_A     array of \\p nnz_A elements containing the values of the sparse CSR matrix A.\n  @param[in]\n  csr_row_ptr_A array of \\p m+1 elements that point to the start of every row of the\n                sparse CSR matrix A.\n  @param[in]\n  csr_col_ind_A array of \\p nnz_A elements containing the column indices of the sparse CSR matrix A.\n  @param[in]\n  percentage    percentage >= 0 and percentage <= 100.\n  @param[in]\n  csr_descr_C   descriptor of the sparse CSR matrix C. Currently, only\n                \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  csr_row_ptr_C array of \\p m+1 elements that point to the start of every row of the\n                sparse CSR matrix C.\n  @param[out]\n  nnz_total_dev_host_ptr total number of nonzero elements in device or host memory.\n  @param[in]\n  info          prune info structure.\n  @param[out]\n  temp_buffer   buffer allocated by the user whose size is determined by calling\n                \\p rocsparse_xprune_csr2csr_by_percentage_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p nnz_A or \\p percentage is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_descr_A or \\p csr_descr_C or \\p info or \\p csr_val_A\n              or \\p csr_row_ptr_A or \\p csr_col_ind_A or \\p csr_row_ptr_C or \\p nnz_total_dev_host_ptr\n              or \\p temp_buffer pointer is invalid.\n\n/\n/**@{"]
-    pub fn rocsparse_sprune_csr2csr_nnz_by_percentage(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz_A: rocsparse_int,
-        csr_descr_A: rocsparse_mat_descr,
-        csr_val_A: *const f32,
-        csr_row_ptr_A: *const rocsparse_int,
-        csr_col_ind_A: *const rocsparse_int,
-        percentage: f32,
-        csr_descr_C: rocsparse_mat_descr,
-        csr_row_ptr_C: *mut rocsparse_int,
-        nnz_total_dev_host_ptr: *mut rocsparse_int,
-        info: rocsparse_mat_info,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dprune_csr2csr_nnz_by_percentage(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz_A: rocsparse_int,
-        csr_descr_A: rocsparse_mat_descr,
-        csr_val_A: *const f64,
-        csr_row_ptr_A: *const rocsparse_int,
-        csr_col_ind_A: *const rocsparse_int,
-        percentage: f64,
-        csr_descr_C: rocsparse_mat_descr,
-        csr_row_ptr_C: *mut rocsparse_int,
-        nnz_total_dev_host_ptr: *mut rocsparse_int,
-        info: rocsparse_mat_info,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Convert and prune by percentage a sparse CSR matrix into a sparse CSR matrix\n\n  \\details\n  This function converts the sparse CSR matrix A into a sparse CSR matrix C by pruning values in A\n  that are less than the threshold. All the parameters are assumed to have been pre-allocated by the user.\n  The user first calls rocsparse_xprune_csr2csr_buffer_size() to determine the size of the buffer used\n  by rocsparse_xprune_csr2csr_nnz() and rocsparse_xprune_csr2csr() which the user then allocates. The user then\n  allocates \\p csr_row_ptr_C to have \\p m+1 elements and then calls rocsparse_xprune_csr2csr_nnz() which fills\n  in the \\p csr_row_ptr_C array stores then number of elements that are larger than the pruning threshold\n  in \\p nnz_total_dev_host_ptr. The user then calls rocsparse_xprune_csr2csr() to complete the conversion.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle        handle to the rocsparse library context queue.\n  @param[in]\n  m             number of rows in the sparse CSR matrix.\n  @param[in]\n  n             number of columns in the sparse CSR matrix.\n  @param[in]\n  nnz_A         number of non-zeros in the sparse CSR matrix A.\n  @param[in]\n  csr_descr_A   descriptor of the sparse CSR matrix A. Currently, only\n                \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_val_A     array of \\p nnz_A elements containing the values of the sparse CSR matrix A.\n  @param[in]\n  csr_row_ptr_A array of \\p m+1 elements that point to the start of every row of the\n                sparse CSR matrix A.\n  @param[in]\n  csr_col_ind_A array of \\p nnz_A elements containing the column indices of the sparse CSR matrix A.\n  @param[in]\n  percentage    percentage >= 0 and percentage <= 100.\n  @param[in]\n  csr_descr_C   descriptor of the sparse CSR matrix C. Currently, only\n                \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  csr_val_C     array of \\p nnz_C elements containing the values of the sparse CSR matrix C.\n  @param[in]\n  csr_row_ptr_C array of \\p m+1 elements that point to the start of every row of the\n                sparse CSR matrix C.\n  @param[out]\n  csr_col_ind_C array of \\p nnz_C elements containing the column indices of the sparse CSR matrix C.\n  @param[in]\n  info          prune info structure.\n  @param[in]\n  temp_buffer   buffer allocated by the user whose size is determined by calling \\p rocsparse_xprune_csr2csr_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p n or \\p nnz_A or \\p percentage is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_descr_A or \\p csr_descr_C or \\p info or \\p csr_val_A\n              or \\p csr_row_ptr_A or \\p csr_col_ind_A or \\p csr_val_C or \\p csr_row_ptr_C or \\p csr_col_ind_C\n              or \\p temp_buffer pointer is invalid.\n/\n/**@{"]
-    pub fn rocsparse_sprune_csr2csr_by_percentage(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz_A: rocsparse_int,
-        csr_descr_A: rocsparse_mat_descr,
-        csr_val_A: *const f32,
-        csr_row_ptr_A: *const rocsparse_int,
-        csr_col_ind_A: *const rocsparse_int,
-        percentage: f32,
-        csr_descr_C: rocsparse_mat_descr,
-        csr_val_C: *mut f32,
-        csr_row_ptr_C: *const rocsparse_int,
-        csr_col_ind_C: *mut rocsparse_int,
-        info: rocsparse_mat_info,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dprune_csr2csr_by_percentage(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz_A: rocsparse_int,
-        csr_descr_A: rocsparse_mat_descr,
-        csr_val_A: *const f64,
-        csr_row_ptr_A: *const rocsparse_int,
-        csr_col_ind_A: *const rocsparse_int,
-        percentage: f64,
-        csr_descr_C: rocsparse_mat_descr,
-        csr_val_C: *mut f64,
-        csr_row_ptr_C: *const rocsparse_int,
-        csr_col_ind_C: *mut rocsparse_int,
-        info: rocsparse_mat_info,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse COO matrix into a sparse CSR matrix\n\n  \\details\n  \\p rocsparse_coo2csr converts the COO array containing the row indices into a\n  CSR array of row offsets, that point to the start of every row.\n  It is assumed that the COO row index array is sorted.\n\n  \\note It can also be used, to convert a COO array containing the column indices into\n  a CSC array of column offsets, that point to the start of every column. Then, it is\n  assumed that the COO column index array is sorted, instead.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  coo_row_ind array of \\p nnz elements containing the row indices of the sparse COO\n              matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[out]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p coo_row_ind or \\p csr_row_ptr\n              pointer is invalid.\n\n  \\par Example\n  This example converts a COO matrix into a CSR matrix.\n  \\code{.c}\n      //     1 2 0 3 0\n      // A = 0 4 5 0 0\n      //     6 0 0 7 8\n\n      rocsparse_int m   = 3;\n      rocsparse_int n   = 5;\n      rocsparse_int nnz = 8;\n\n      coo_row_ind[nnz] = {0, 0, 0, 1, 1, 2, 2, 2}; // device memory\n      coo_col_ind[nnz] = {0, 1, 3, 1, 2, 0, 3, 4}; // device memory\n      coo_val[nnz]     = {1, 2, 3, 4, 5, 6, 7, 8}; // device memory\n\n      // Allocate CSR matrix arrays\n      rocsparse_int* csr_row_ptr;\n      rocsparse_int* csr_col_ind;\n      float* csr_val;\n\n      hipMalloc((void**)&csr_row_ptr, sizeof(rocsparse_int) * (m + 1));\n      hipMalloc((void**)&csr_col_ind, sizeof(rocsparse_int) * nnz);\n      hipMalloc((void**)&csr_val, sizeof(float) * nnz);\n\n      // Convert the coo row indices into csr row offsets\n      rocsparse_coo2csr(handle,\n                        coo_row_ind,\n                        nnz,\n                        m,\n                        csr_row_ptr,\n                        rocsparse_index_base_zero);\n\n      // Copy the column and value arrays\n      hipMemcpy(csr_col_ind,\n                coo_col_ind,\n                sizeof(rocsparse_int) * nnz,\n                hipMemcpyDeviceToDevice);\n\n      hipMemcpy(csr_val,\n                coo_val,\n                sizeof(float) * nnz,\n                hipMemcpyDeviceToDevice);\n  \\endcode"]
-    pub fn rocsparse_coo2csr(
-        handle: rocsparse_handle,
-        coo_row_ind: *const rocsparse_int,
-        nnz: rocsparse_int,
-        m: rocsparse_int,
-        csr_row_ptr: *mut rocsparse_int,
-        idx_base: rocsparse_index_base,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse ELL matrix into a sparse CSR matrix\n\n  \\details\n  \\p rocsparse_ell2csr_nnz computes the total CSR non-zero elements and the CSR\n  row offsets, that point to the start of every row of the sparse CSR matrix, for\n  a given ELL matrix. It is assumed that \\p csr_row_ptr has been allocated with\n  size \\p m+1.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse ELL matrix.\n  @param[in]\n  n           number of columns of the sparse ELL matrix.\n  @param[in]\n  ell_descr   descriptor of the sparse ELL matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  ell_width   number of non-zero elements per row in ELL storage format.\n  @param[in]\n  ell_col_ind array of \\p m times \\p ell_width elements containing the column indices\n              of the sparse ELL matrix.\n  @param[in]\n  csr_descr   descriptor of the sparse CSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[out]\n  csr_nnz     pointer to the total number of non-zero elements in CSR storage\n              format.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p ell_width is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p ell_descr, \\p ell_col_ind,\n              \\p csr_descr, \\p csr_row_ptr or \\p csr_nnz pointer is invalid.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general."]
-    pub fn rocsparse_ell2csr_nnz(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        ell_descr: rocsparse_mat_descr,
-        ell_width: rocsparse_int,
-        ell_col_ind: *const rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_row_ptr: *mut rocsparse_int,
-        csr_nnz: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse ELL matrix into a sparse CSR matrix\n\n  \\details\n  \\p rocsparse_ell2csr converts an ELL matrix into a CSR matrix. It is assumed\n  that \\p csr_row_ptr has already been filled and that \\p csr_val and \\p csr_col_ind\n  are allocated by the user. \\p csr_row_ptr and allocation size of \\p csr_col_ind and\n  \\p csr_val is defined by the number of CSR non-zero elements. Both can be obtained\n  by rocsparse_ell2csr_nnz().\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse ELL matrix.\n  @param[in]\n  n           number of columns of the sparse ELL matrix.\n  @param[in]\n  ell_descr   descriptor of the sparse ELL matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  ell_width   number of non-zero elements per row in ELL storage format.\n  @param[in]\n  ell_val     array of \\p m times \\p ell_width elements of the sparse ELL matrix.\n  @param[in]\n  ell_col_ind array of \\p m times \\p ell_width elements containing the column indices\n              of the sparse ELL matrix.\n  @param[in]\n  csr_descr   descriptor of the sparse CSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  csr_val     array containing the values of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[out]\n  csr_col_ind array containing the column indices of the sparse CSR matrix.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p ell_width is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_descr, \\p csr_val,\n              \\p csr_row_ptr, \\p csr_col_ind, \\p ell_descr, \\p ell_val or\n              \\p ell_col_ind pointer is invalid.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  This example converts an ELL matrix into a CSR matrix.\n  \\code{.c}\n      //     1 2 0 3 0\n      // A = 0 4 5 0 0\n      //     6 0 0 7 8\n\n      rocsparse_int m         = 3;\n      rocsparse_int n         = 5;\n      rocsparse_int nnz       = 9;\n      rocsparse_int ell_width = 3;\n\n      ell_col_ind[nnz] = {0, 1, 0, 1, 2, 3, 3, -1, 4}; // device memory\n      ell_val[nnz]     = {1, 4, 6, 2, 5, 7, 3, 0, 8};  // device memory\n\n      // Create CSR matrix descriptor\n      rocsparse_mat_descr csr_descr;\n      rocsparse_create_mat_descr(&csr_descr);\n\n      // Allocate csr_row_ptr array for row offsets\n      rocsparse_int* csr_row_ptr;\n      hipMalloc((void**)&csr_row_ptr, sizeof(rocsparse_int) * (m + 1));\n\n      // Obtain the number of CSR non-zero entries\n      // and fill csr_row_ptr array with row offsets\n      rocsparse_int csr_nnz;\n      rocsparse_ell2csr_nnz(handle,\n                            m,\n                            n,\n                            ell_descr,\n                            ell_width,\n                            ell_col_ind,\n                            csr_descr,\n                            csr_row_ptr,\n                            &csr_nnz);\n\n      // Allocate CSR column and value arrays\n      rocsparse_int* csr_col_ind;\n      hipMalloc((void**)&csr_col_ind, sizeof(rocsparse_int) * csr_nnz);\n\n      float* csr_val;\n      hipMalloc((void**)&csr_val, sizeof(float) * csr_nnz);\n\n      // Format conversion\n      rocsparse_sell2csr(handle,\n                         m,\n                         n,\n                         ell_descr,\n                         ell_width,\n                         ell_val,\n                         ell_col_ind,\n                         csr_descr,\n                         csr_val,\n                         csr_row_ptr,\n                         csr_col_ind);\n  \\endcode\n/\n/**@{"]
-    pub fn rocsparse_sell2csr(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        ell_descr: rocsparse_mat_descr,
-        ell_width: rocsparse_int,
-        ell_val: *const f32,
-        ell_col_ind: *const rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *mut f32,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dell2csr(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        ell_descr: rocsparse_mat_descr,
-        ell_width: rocsparse_int,
-        ell_val: *const f64,
-        ell_col_ind: *const rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *mut f64,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cell2csr(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        ell_descr: rocsparse_mat_descr,
-        ell_width: rocsparse_int,
-        ell_val: *const rocsparse_float_complex,
-        ell_col_ind: *const rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *mut rocsparse_float_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zell2csr(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        ell_descr: rocsparse_mat_descr,
-        ell_width: rocsparse_int,
-        ell_val: *const rocsparse_double_complex,
-        ell_col_ind: *const rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *mut rocsparse_double_complex,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse HYB matrix into a sparse CSR matrix\n\n  \\details\n  \\p rocsparse_hyb2csr_buffer_size returns the size of the temporary storage buffer\n  required by rocsparse_shyb2csr(), rocsparse_dhyb2csr(), rocsparse_chyb2csr() and\n  rocsparse_dhyb2csr(). The temporary storage buffer must be allocated by the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  descr           descriptor of the sparse HYB matrix. Currently, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  hyb             sparse matrix in HYB format.\n  @param[in]\n  csr_row_ptr     array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix.\n  @param[out]\n  buffer_size     number of bytes of the temporary storage buffer required by\n                  rocsparse_shyb2csr(), rocsparse_dhyb2csr(), rocsparse_chyb2csr() and\n                  rocsparse_zhyb2csr().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p hyb, \\p csr_row_ptr or\n              \\p buffer_size pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general."]
-    pub fn rocsparse_hyb2csr_buffer_size(
-        handle: rocsparse_handle,
-        descr: rocsparse_mat_descr,
-        hyb: rocsparse_hyb_mat,
-        csr_row_ptr: *const rocsparse_int,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse HYB matrix into a sparse CSR matrix\n\n  \\details\n  \\p rocsparse_hyb2csr converts a HYB matrix into a CSR matrix.\n\n  \\p rocsparse_hyb2csr requires extra temporary storage buffer that has to be allocated\n  by the user. Storage buffer size can be determined by\n  rocsparse_hyb2csr_buffer_size().\n\n  \\note\n  This function is blocking with respect to the host.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  descr           descriptor of the sparse HYB matrix. Currently, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  hyb             sparse matrix in HYB format.\n  @param[out]\n  csr_val         array containing the values of the sparse CSR matrix.\n  @param[out]\n  csr_row_ptr     array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix.\n  @param[out]\n  csr_col_ind     array containing the column indices of the sparse CSR matrix.\n  @param[in]\n  temp_buffer     temporary storage buffer allocated by the user, size is returned by\n                  rocsparse_hyb2csr_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p hyb, \\p csr_val,\n              \\p csr_row_ptr, \\p csr_col_ind or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  This example converts a HYB matrix into a CSR matrix.\n  \\code{.c}\n      // Create CSR matrix arrays\n      rocsparse_int* csr_row_ptr;\n      rocsparse_int* csr_col_ind;\n      float* csr_val;\n\n      hipMalloc((void**)&csr_row_ptr, sizeof(rocsparse_int) * (m + 1));\n      hipMalloc((void**)&csr_col_ind, sizeof(rocsparse_int) * nnz);\n      hipMalloc((void**)&csr_val, sizeof(float) * nnz);\n\n      // Get required size of temporary buffer\n      size_t size;\n      rocsparse_hyb2csr_buffer_size(handle,\n                                    descr,\n                                    hyb,\n                                    csr_row_ptr,\n                                    &size);\n\n      // Allocate temporary buffer\n      void* buffer;\n      hipMalloc(&buffer, size);\n\n      // Perform the conversion\n      rocsparse_shyb2csr(handle,\n                         descr,\n                         hyb,\n                         csr_val,\n                         csr_row_ptr,\n                         csr_col_ind,\n                         buffer);\n  \\endcode\n/\n/**@{"]
-    pub fn rocsparse_shyb2csr(
-        handle: rocsparse_handle,
-        descr: rocsparse_mat_descr,
-        hyb: rocsparse_hyb_mat,
-        csr_val: *mut f32,
-        csr_row_ptr: *mut rocsparse_int,
-        csr_col_ind: *mut rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dhyb2csr(
-        handle: rocsparse_handle,
-        descr: rocsparse_mat_descr,
-        hyb: rocsparse_hyb_mat,
-        csr_val: *mut f64,
-        csr_row_ptr: *mut rocsparse_int,
-        csr_col_ind: *mut rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_chyb2csr(
-        handle: rocsparse_handle,
-        descr: rocsparse_mat_descr,
-        hyb: rocsparse_hyb_mat,
-        csr_val: *mut rocsparse_float_complex,
-        csr_row_ptr: *mut rocsparse_int,
-        csr_col_ind: *mut rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zhyb2csr(
-        handle: rocsparse_handle,
-        descr: rocsparse_mat_descr,
-        hyb: rocsparse_hyb_mat,
-        csr_val: *mut rocsparse_double_complex,
-        csr_row_ptr: *mut rocsparse_int,
-        csr_col_ind: *mut rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Create the identity map\n\n  \\details\n  \\p rocsparse_create_identity_permutation stores the identity map in \\p p, such that\n  \\f$p = 0:1:(n-1)\\f$.\n\n  \\code{.c}\n      for(i = 0; i < n; ++i)\n      {\n          p[i] = i;\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  n           size of the map \\p p.\n  @param[out]\n  p           array of \\p n integers containing the map.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p n is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p p pointer is invalid.\n\n  \\par Example\n  The following example creates an identity permutation.\n  \\code{.c}\n      rocsparse_int size = 200;\n\n      // Allocate memory to hold the identity map\n      rocsparse_int* perm;\n      hipMalloc((void**)&perm, sizeof(rocsparse_int) * size);\n\n      // Fill perm with the identity permutation\n      rocsparse_create_identity_permutation(handle, size, perm);\n  \\endcode"]
-    pub fn rocsparse_create_identity_permutation(
-        handle: rocsparse_handle,
-        n: rocsparse_int,
-        p: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Sort a sparse CSR matrix\n\n  \\details\n  \\p rocsparse_csrsort_buffer_size returns the size of the temporary storage buffer\n  required by rocsparse_csrsort(). The temporary storage buffer must be allocated by\n  the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  m               number of rows of the sparse CSR matrix.\n  @param[in]\n  n               number of columns of the sparse CSR matrix.\n  @param[in]\n  nnz             number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr     array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix.\n  @param[in]\n  csr_col_ind     array of \\p nnz elements containing the column indices of the sparse\n                  CSR matrix.\n  @param[out]\n  buffer_size     number of bytes of the temporary storage buffer required by\n                  rocsparse_csrsort().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csr_row_ptr, \\p csr_col_ind or\n              \\p buffer_size pointer is invalid."]
-    pub fn rocsparse_csrsort_buffer_size(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *const rocsparse_int,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Sort a sparse CSR matrix\n\n  \\details\n  \\p rocsparse_csrsort sorts a matrix in CSR format. The sorted permutation vector\n  \\p perm can be used to obtain sorted \\p csr_val array. In this case, \\p perm must be\n  initialized as the identity permutation, see rocsparse_create_identity_permutation().\n\n  \\p rocsparse_csrsort requires extra temporary storage buffer that has to be allocated by\n  the user. Storage buffer size can be determined by rocsparse_csrsort_buffer_size().\n\n  \\note\n  \\p perm can be \\p NULL if a sorted permutation vector is not required.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  m               number of rows of the sparse CSR matrix.\n  @param[in]\n  n               number of columns of the sparse CSR matrix.\n  @param[in]\n  nnz             number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  descr           descriptor of the sparse CSR matrix. Currently, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csr_row_ptr     array of \\p m+1 elements that point to the start of every row of the\n                  sparse CSR matrix.\n  @param[inout]\n  csr_col_ind     array of \\p nnz elements containing the column indices of the sparse\n                  CSR matrix.\n  @param[inout]\n  perm            array of \\p nnz integers containing the unsorted map indices, can be\n                  \\p NULL.\n  @param[in]\n  temp_buffer     temporary storage buffer allocated by the user, size is returned by\n                  rocsparse_csrsort_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p csr_row_ptr, \\p csr_col_ind\n              or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  The following example sorts a \\f$3 \\times 3\\f$ CSR matrix.\n  \\code{.c}\n      //     1 2 3\n      // A = 4 5 6\n      //     7 8 9\n      rocsparse_int m   = 3;\n      rocsparse_int n   = 3;\n      rocsparse_int nnz = 9;\n\n      csr_row_ptr[m + 1] = {0, 3, 6, 9};                // device memory\n      csr_col_ind[nnz]   = {2, 0, 1, 0, 1, 2, 0, 2, 1}; // device memory\n      csr_val[nnz]       = {3, 1, 2, 4, 5, 6, 7, 9, 8}; // device memory\n\n      // Create permutation vector perm as the identity map\n      rocsparse_int* perm;\n      hipMalloc((void**)&perm, sizeof(rocsparse_int) * nnz);\n      rocsparse_create_identity_permutation(handle, nnz, perm);\n\n      // Allocate temporary buffer\n      size_t buffer_size;\n      void* temp_buffer;\n      rocsparse_csrsort_buffer_size(handle, m, n, nnz, csr_row_ptr, csr_col_ind, &buffer_size);\n      hipMalloc(&temp_buffer, buffer_size);\n\n      // Sort the CSR matrix\n      rocsparse_csrsort(handle, m, n, nnz, descr, csr_row_ptr, csr_col_ind, perm, temp_buffer);\n\n      // Gather sorted csr_val array\n      float* csr_val_sorted;\n      hipMalloc((void**)&csr_val_sorted, sizeof(float) * nnz);\n      rocsparse_sgthr(handle, nnz, csr_val, csr_val_sorted, perm, rocsparse_index_base_zero);\n\n      // Clean up\n      hipFree(temp_buffer);\n      hipFree(perm);\n      hipFree(csr_val);\n  \\endcode"]
-    pub fn rocsparse_csrsort(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csr_row_ptr: *const rocsparse_int,
-        csr_col_ind: *mut rocsparse_int,
-        perm: *mut rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Sort a sparse CSC matrix\n\n  \\details\n  \\p rocsparse_cscsort_buffer_size returns the size of the temporary storage buffer\n  required by rocsparse_cscsort(). The temporary storage buffer must be allocated by\n  the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  m               number of rows of the sparse CSC matrix.\n  @param[in]\n  n               number of columns of the sparse CSC matrix.\n  @param[in]\n  nnz             number of non-zero entries of the sparse CSC matrix.\n  @param[in]\n  csc_col_ptr     array of \\p n+1 elements that point to the start of every column of\n                  the sparse CSC matrix.\n  @param[in]\n  csc_row_ind     array of \\p nnz elements containing the row indices of the sparse\n                  CSC matrix.\n  @param[out]\n  buffer_size     number of bytes of the temporary storage buffer required by\n                  rocsparse_cscsort().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p csc_col_ptr, \\p csc_row_ind or\n              \\p buffer_size pointer is invalid."]
-    pub fn rocsparse_cscsort_buffer_size(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        csc_col_ptr: *const rocsparse_int,
-        csc_row_ind: *const rocsparse_int,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Sort a sparse CSC matrix\n\n  \\details\n  \\p rocsparse_cscsort sorts a matrix in CSC format. The sorted permutation vector\n  \\p perm can be used to obtain sorted \\p csc_val array. In this case, \\p perm must be\n  initialized as the identity permutation, see rocsparse_create_identity_permutation().\n\n  \\p rocsparse_cscsort requires extra temporary storage buffer that has to be allocated by\n  the user. Storage buffer size can be determined by rocsparse_cscsort_buffer_size().\n\n  \\note\n  \\p perm can be \\p NULL if a sorted permutation vector is not required.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  m               number of rows of the sparse CSC matrix.\n  @param[in]\n  n               number of columns of the sparse CSC matrix.\n  @param[in]\n  nnz             number of non-zero entries of the sparse CSC matrix.\n  @param[in]\n  descr           descriptor of the sparse CSC matrix. Currently, only\n                  \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  csc_col_ptr     array of \\p n+1 elements that point to the start of every column of\n                  the sparse CSC matrix.\n  @param[inout]\n  csc_row_ind     array of \\p nnz elements containing the row indices of the sparse\n                  CSC matrix.\n  @param[inout]\n  perm            array of \\p nnz integers containing the unsorted map indices, can be\n                  \\p NULL.\n  @param[in]\n  temp_buffer     temporary storage buffer allocated by the user, size is returned by\n                  rocsparse_cscsort_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p descr, \\p csc_col_ptr, \\p csc_row_ind\n              or \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n  \\retval     rocsparse_status_not_implemented\n              \\ref rocsparse_matrix_type != \\ref rocsparse_matrix_type_general.\n\n  \\par Example\n  The following example sorts a \\f$3 \\times 3\\f$ CSC matrix.\n  \\code{.c}\n      //     1 2 3\n      // A = 4 5 6\n      //     7 8 9\n      rocsparse_int m   = 3;\n      rocsparse_int n   = 3;\n      rocsparse_int nnz = 9;\n\n      csc_col_ptr[m + 1] = {0, 3, 6, 9};                // device memory\n      csc_row_ind[nnz]   = {2, 0, 1, 0, 1, 2, 0, 2, 1}; // device memory\n      csc_val[nnz]       = {7, 1, 4, 2, 5, 8, 3, 9, 6}; // device memory\n\n      // Create permutation vector perm as the identity map\n      rocsparse_int* perm;\n      hipMalloc((void**)&perm, sizeof(rocsparse_int) * nnz);\n      rocsparse_create_identity_permutation(handle, nnz, perm);\n\n      // Allocate temporary buffer\n      size_t buffer_size;\n      void* temp_buffer;\n      rocsparse_cscsort_buffer_size(handle, m, n, nnz, csc_col_ptr, csc_row_ind, &buffer_size);\n      hipMalloc(&temp_buffer, buffer_size);\n\n      // Sort the CSC matrix\n      rocsparse_cscsort(handle, m, n, nnz, descr, csc_col_ptr, csc_row_ind, perm, temp_buffer);\n\n      // Gather sorted csc_val array\n      float* csc_val_sorted;\n      hipMalloc((void**)&csc_val_sorted, sizeof(float) * nnz);\n      rocsparse_sgthr(handle, nnz, csc_val, csc_val_sorted, perm, rocsparse_index_base_zero);\n\n      // Clean up\n      hipFree(temp_buffer);\n      hipFree(perm);\n      hipFree(csc_val);\n  \\endcode"]
-    pub fn rocsparse_cscsort(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        descr: rocsparse_mat_descr,
-        csc_col_ptr: *const rocsparse_int,
-        csc_row_ind: *mut rocsparse_int,
-        perm: *mut rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Sort a sparse COO matrix\n\n  \\details\n  \\p coosort_buffer_size returns the size of the temporary storage buffer that is\n  required by rocsparse_coosort_by_row() and rocsparse_coosort_by_column(). The\n  temporary storage buffer has to be allocated by the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  m               number of rows of the sparse COO matrix.\n  @param[in]\n  n               number of columns of the sparse COO matrix.\n  @param[in]\n  nnz             number of non-zero entries of the sparse COO matrix.\n  @param[in]\n  coo_row_ind     array of \\p nnz elements containing the row indices of the sparse\n                  COO matrix.\n  @param[in]\n  coo_col_ind     array of \\p nnz elements containing the column indices of the sparse\n                  COO matrix.\n  @param[out]\n  buffer_size     number of bytes of the temporary storage buffer required by\n                  rocsparse_coosort_by_row() and rocsparse_coosort_by_column().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p coo_row_ind, \\p coo_col_ind or\n              \\p buffer_size pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred."]
-    pub fn rocsparse_coosort_buffer_size(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        coo_row_ind: *const rocsparse_int,
-        coo_col_ind: *const rocsparse_int,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Sort a sparse COO matrix by row\n\n  \\details\n  \\p rocsparse_coosort_by_row sorts a matrix in COO format by row. The sorted\n  permutation vector \\p perm can be used to obtain sorted \\p coo_val array. In this\n  case, \\p perm must be initialized as the identity permutation, see\n  rocsparse_create_identity_permutation().\n\n  \\p rocsparse_coosort_by_row requires extra temporary storage buffer that has to be\n  allocated by the user. Storage buffer size can be determined by\n  rocsparse_coosort_buffer_size().\n\n  \\note\n  \\p perm can be \\p NULL if a sorted permutation vector is not required.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  m               number of rows of the sparse COO matrix.\n  @param[in]\n  n               number of columns of the sparse COO matrix.\n  @param[in]\n  nnz             number of non-zero entries of the sparse COO matrix.\n  @param[inout]\n  coo_row_ind     array of \\p nnz elements containing the row indices of the sparse\n                  COO matrix.\n  @param[inout]\n  coo_col_ind     array of \\p nnz elements containing the column indices of the sparse\n                  COO matrix.\n  @param[inout]\n  perm            array of \\p nnz integers containing the unsorted map indices, can be\n                  \\p NULL.\n  @param[in]\n  temp_buffer     temporary storage buffer allocated by the user, size is returned by\n                  rocsparse_coosort_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p coo_row_ind, \\p coo_col_ind or\n              \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n\n  \\par Example\n  The following example sorts a \\f$3 \\times 3\\f$ COO matrix by row indices.\n  \\code{.c}\n      //     1 2 3\n      // A = 4 5 6\n      //     7 8 9\n      rocsparse_int m   = 3;\n      rocsparse_int n   = 3;\n      rocsparse_int nnz = 9;\n\n      coo_row_ind[nnz] = {0, 1, 2, 0, 1, 2, 0, 1, 2}; // device memory\n      coo_col_ind[nnz] = {0, 0, 0, 1, 1, 1, 2, 2, 2}; // device memory\n      coo_val[nnz]     = {1, 4, 7, 2, 5, 8, 3, 6, 9}; // device memory\n\n      // Create permutation vector perm as the identity map\n      rocsparse_int* perm;\n      hipMalloc((void**)&perm, sizeof(rocsparse_int) * nnz);\n      rocsparse_create_identity_permutation(handle, nnz, perm);\n\n      // Allocate temporary buffer\n      size_t buffer_size;\n      void* temp_buffer;\n      rocsparse_coosort_buffer_size(handle,\n                                    m,\n                                    n,\n                                    nnz,\n                                    coo_row_ind,\n                                    coo_col_ind,\n                                    &buffer_size);\n      hipMalloc(&temp_buffer, buffer_size);\n\n      // Sort the COO matrix\n      rocsparse_coosort_by_row(handle,\n                               m,\n                               n,\n                               nnz,\n                               coo_row_ind,\n                               coo_col_ind,\n                               perm,\n                               temp_buffer);\n\n      // Gather sorted coo_val array\n      float* coo_val_sorted;\n      hipMalloc((void**)&coo_val_sorted, sizeof(float) * nnz);\n      rocsparse_sgthr(handle, nnz, coo_val, coo_val_sorted, perm, rocsparse_index_base_zero);\n\n      // Clean up\n      hipFree(temp_buffer);\n      hipFree(perm);\n      hipFree(coo_val);\n  \\endcode"]
-    pub fn rocsparse_coosort_by_row(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        coo_row_ind: *mut rocsparse_int,
-        coo_col_ind: *mut rocsparse_int,
-        perm: *mut rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Sort a sparse COO matrix by column\n\n  \\details\n  \\p rocsparse_coosort_by_column sorts a matrix in COO format by column. The sorted\n  permutation vector \\p perm can be used to obtain sorted \\p coo_val array. In this\n  case, \\p perm must be initialized as the identity permutation, see\n  rocsparse_create_identity_permutation().\n\n  \\p rocsparse_coosort_by_column requires extra temporary storage buffer that has to be\n  allocated by the user. Storage buffer size can be determined by\n  rocsparse_coosort_buffer_size().\n\n  \\note\n  \\p perm can be \\p NULL if a sorted permutation vector is not required.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle          handle to the rocsparse library context queue.\n  @param[in]\n  m               number of rows of the sparse COO matrix.\n  @param[in]\n  n               number of columns of the sparse COO matrix.\n  @param[in]\n  nnz             number of non-zero entries of the sparse COO matrix.\n  @param[inout]\n  coo_row_ind     array of \\p nnz elements containing the row indices of the sparse\n                  COO matrix.\n  @param[inout]\n  coo_col_ind     array of \\p nnz elements containing the column indices of the sparse\n                  COO matrix.\n  @param[inout]\n  perm            array of \\p nnz integers containing the unsorted map indices, can be\n                  \\p NULL.\n  @param[in]\n  temp_buffer     temporary storage buffer allocated by the user, size is returned by\n                  rocsparse_coosort_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p m, \\p n or \\p nnz is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p coo_row_ind, \\p coo_col_ind or\n              \\p temp_buffer pointer is invalid.\n  \\retval     rocsparse_status_internal_error an internal error occurred.\n\n  \\par Example\n  The following example sorts a \\f$3 \\times 3\\f$ COO matrix by column indices.\n  \\code{.c}\n      //     1 2 3\n      // A = 4 5 6\n      //     7 8 9\n      rocsparse_int m   = 3;\n      rocsparse_int n   = 3;\n      rocsparse_int nnz = 9;\n\n      coo_row_ind[nnz] = {0, 0, 0, 1, 1, 1, 2, 2, 2}; // device memory\n      coo_col_ind[nnz] = {0, 1, 2, 0, 1, 2, 0, 1, 2}; // device memory\n      coo_val[nnz]     = {1, 2, 3, 4, 5, 6, 7, 8, 9}; // device memory\n\n      // Create permutation vector perm as the identity map\n      rocsparse_int* perm;\n      hipMalloc((void**)&perm, sizeof(rocsparse_int) * nnz);\n      rocsparse_create_identity_permutation(handle, nnz, perm);\n\n      // Allocate temporary buffer\n      size_t buffer_size;\n      void* temp_buffer;\n      rocsparse_coosort_buffer_size(handle,\n                                    m,\n                                    n,\n                                    nnz,\n                                    coo_row_ind,\n                                    coo_col_ind,\n                                    &buffer_size);\n      hipMalloc(&temp_buffer, buffer_size);\n\n      // Sort the COO matrix\n      rocsparse_coosort_by_column(handle,\n                                  m,\n                                  n,\n                                  nnz,\n                                  coo_row_ind,\n                                  coo_col_ind,\n                                  perm,\n                                  temp_buffer);\n\n      // Gather sorted coo_val array\n      float* coo_val_sorted;\n      hipMalloc((void**)&coo_val_sorted, sizeof(float) * nnz);\n      rocsparse_sgthr(handle, nnz, coo_val, coo_val_sorted, perm, rocsparse_index_base_zero);\n\n      // Clean up\n      hipFree(temp_buffer);\n      hipFree(perm);\n      hipFree(coo_val);\n  \\endcode"]
-    pub fn rocsparse_coosort_by_column(
-        handle: rocsparse_handle,
-        m: rocsparse_int,
-        n: rocsparse_int,
-        nnz: rocsparse_int,
-        coo_row_ind: *mut rocsparse_int,
-        coo_col_ind: *mut rocsparse_int,
-        perm: *mut rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse BSR matrix into a sparse CSR matrix\n\n  \\details\n  \\p rocsparse_bsr2csr converts a BSR matrix into a CSR matrix. It is assumed,\n  that \\p csr_val, \\p csr_col_ind and \\p csr_row_ptr are allocated. Allocation size\n  for \\p csr_row_ptr is computed by the number of block rows multiplied by the block\n  dimension plus one. Allocation for \\p csr_val and \\p csr_col_ind is computed by the\n  the number of blocks in the BSR matrix multiplied by the block dimension squared.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         the storage format of the blocks, \\ref rocsparse_direction_row or \\ref rocsparse_direction_column\n  @param[in]\n  mb          number of block rows in the sparse BSR matrix.\n  @param[in]\n  nb          number of block columns in the sparse BSR matrix.\n  @param[in]\n  bsr_descr   descriptor of the sparse BSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  bsr_val     array of \\p nnzb*block_dim*block_dim containing the values of the sparse BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of the\n              sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the block column indices of the sparse BSR matrix.\n  @param[in]\n  block_dim   size of the blocks in the sparse BSR matrix.\n  @param[in]\n  csr_descr   descriptor of the sparse CSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  csr_val     array of \\p nnzb*block_dim*block_dim elements containing the values of the sparse CSR matrix.\n  @param[out]\n  csr_row_ptr array of \\p m+1 where \\p m=mb*block_dim elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[out]\n  csr_col_ind array of \\p nnzb*block_dim*block_dim elements containing the column indices of the sparse CSR matrix.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb or \\p nb or \\p block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p bsr_val,\n              \\p bsr_row_ptr, \\p bsr_col_ind, \\p csr_val, \\p csr_row_ptr or\n              \\p csr_col_ind pointer is invalid.\n\n  \\par Example\n  This example converts a BSR matrix into an CSR matrix.\n  \\code{.c}\n      //     1 4 0 0 0 0\n      // A = 0 2 3 0 0 0\n      //     5 0 0 7 8 0\n      //     0 0 9 0 6 0\n\n      rocsparse_int mb   = 2;\n      rocsparse_int nb   = 3;\n      rocsparse_int block_dim = 2;\n      rocsparse_int m = Mb * block_dim;\n      rocsparse_int n = Nb * block_dim;\n\n      bsr_row_ptr[mb+1]                 = {0, 2, 5};                                                    // device memory\n      bsr_col_ind[nnzb]                 = {0, 1, 0, 1, 2};                                              // device memory\n      bsr_val[nnzb*block_dim*block_dim] = {1, 0, 4, 2, 0, 3, 0, 0, 5, 0, 0, 0, 0, 9, 7, 0, 8, 6, 0, 0}; // device memory\n\n      rocsparse_int nnzb = bsr_row_ptr[mb] - bsr_row_ptr[0];\n\n      // Create CSR arrays on device\n      rocsparse_int* csr_row_ptr;\n      rocsparse_int* csr_col_ind;\n      float* csr_val;\n      hipMalloc((void**)&csr_row_ptr, sizeof(rocsparse_int) * (m + 1));\n      hipMalloc((void**)&csr_col_ind, sizeof(rocsparse_int) * nnzb * block_dim * block_dim);\n      hipMalloc((void**)&csr_val, sizeof(float) * nnzb * block_dim * block_dim);\n\n      // Create rocsparse handle\n      rocsparse_local_handle handle;\n\n      rocsparse_mat_descr bsr_descr = nullptr;\n      rocsparse_create_mat_descr(&bsr_descr);\n\n      rocsparse_mat_descr csr_descr = nullptr;\n      rocsparse_create_mat_descr(&csr_descr);\n\n      rocsparse_set_mat_index_base(bsr_descr, rocsparse_index_base_zero);\n      rocsparse_set_mat_index_base(csr_descr, rocsparse_index_base_zero);\n\n      // Format conversion\n      rocsparse_sbsr2csr(handle,\n                         rocsparse_direction_column,\n                         mb,\n                         nb,\n                         bsr_descr,\n                         bsr_val,\n                         bsr_row_ptr,\n                         bsr_col_ind,\n                         block_dim,\n                         csr_descr,\n                         csr_val,\n                         csr_row_ptr,\n                         csr_col_ind);\n  \\endcode\n/\n/**@{"]
-    pub fn rocsparse_sbsr2csr(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        bsr_descr: rocsparse_mat_descr,
-        bsr_val: *const f32,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *mut f32,
-        csr_row_ptr: *mut rocsparse_int,
-        csr_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dbsr2csr(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        bsr_descr: rocsparse_mat_descr,
-        bsr_val: *const f64,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *mut f64,
-        csr_row_ptr: *mut rocsparse_int,
-        csr_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cbsr2csr(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        bsr_descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_float_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *mut rocsparse_float_complex,
-        csr_row_ptr: *mut rocsparse_int,
-        csr_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zbsr2csr(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        bsr_descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_double_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        block_dim: rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *mut rocsparse_double_complex,
-        csr_row_ptr: *mut rocsparse_int,
-        csr_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief Convert a sparse general BSR matrix into a sparse CSR matrix\n\n  \\details\n  \\p rocsparse_gebsr2csr converts a BSR matrix into a CSR matrix. It is assumed,\n  that \\p csr_val, \\p csr_col_ind and \\p csr_row_ptr are allocated. Allocation size\n  for \\p csr_row_ptr is computed by the number of block rows multiplied by the block\n  dimension plus one. Allocation for \\p csr_val and \\p csr_col_ind is computed by the\n  the number of blocks in the BSR matrix multiplied by the product of the block dimensions.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir         the storage format of the blocks, \\ref rocsparse_direction_row or \\ref rocsparse_direction_column\n  @param[in]\n  mb          number of block rows in the sparse general BSR matrix.\n  @param[in]\n  nb          number of block columns in the sparse general BSR matrix.\n  @param[in]\n  bsr_descr   descriptor of the sparse general BSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[in]\n  bsr_val     array of \\p nnzb*row_block_dim*col_block_dim containing the values of the sparse BSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every block row of the\n              sparse BSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the block column indices of the sparse BSR matrix.\n  @param[in]\n  row_block_dim   row size of the blocks in the sparse general BSR matrix.\n  @param[in]\n  col_block_dim   column size of the blocks in the sparse general BSR matrix.\n  @param[in]\n  csr_descr   descriptor of the sparse CSR matrix. Currently, only\n              \\ref rocsparse_matrix_type_general is supported.\n  @param[out]\n  csr_val     array of \\p nnzb*row_block_dim*col_block_dim elements containing the values of the sparse CSR matrix.\n  @param[out]\n  csr_row_ptr array of \\p m+1 where \\p m=mb*row_block_dim elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[out]\n  csr_col_ind array of \\p nnzb*block_dim*block_dim elements containing the column indices of the sparse CSR matrix.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb or \\p nb or \\p block_dim is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p bsr_val,\n              \\p bsr_row_ptr, \\p bsr_col_ind, \\p csr_val, \\p csr_row_ptr or\n              \\p csr_col_ind pointer is invalid.\n\n  \\par Example\n  This example converts a general BSR matrix into an CSR matrix.\n  \\code{.c}\n      //     1 4 0 0 0 0\n      // A = 0 2 3 0 0 0\n      //     5 0 0 7 8 0\n      //     0 0 9 0 6 0\n\n      rocsparse_int mb   = 2;\n      rocsparse_int nb   = 2;\n      rocsparse_int row_block_dim = 2;\n      rocsparse_int col_block_dim = 3;\n      rocsparse_int m = Mb * row_block_dim;\n      rocsparse_int n = Nb * col_block_dim;\n\n      bsr_row_ptr[mb+1]                 = {0, 1, 3};                                              // device memory\n      bsr_col_ind[nnzb]                 = {0, 0, 1};                                              // device memory\n      bsr_val[nnzb*block_dim*block_dim] = {1, 0, 4, 2, 0, 3, 5, 0, 0, 0, 0, 9, 7, 0, 8, 6, 0, 0}; // device memory\n\n      rocsparse_int nnzb = bsr_row_ptr[mb] - bsr_row_ptr[0];\n\n      // Create CSR arrays on device\n      rocsparse_int* csr_row_ptr;\n      rocsparse_int* csr_col_ind;\n      float* csr_val;\n      hipMalloc((void**)&csr_row_ptr, sizeof(rocsparse_int) * (m + 1));\n      hipMalloc((void**)&csr_col_ind, sizeof(rocsparse_int) * nnzb * row_block_dim * col_block_dim);\n      hipMalloc((void**)&csr_val, sizeof(float) * nnzb * row_block_dim * col_block_dim);\n\n      // Create rocsparse handle\n      rocsparse_local_handle handle;\n\n      rocsparse_mat_descr bsr_descr = nullptr;\n      rocsparse_create_mat_descr(&bsr_descr);\n\n      rocsparse_mat_descr csr_descr = nullptr;\n      rocsparse_create_mat_descr(&csr_descr);\n\n      rocsparse_set_mat_index_base(bsr_descr, rocsparse_index_base_zero);\n      rocsparse_set_mat_index_base(csr_descr, rocsparse_index_base_zero);\n\n      // Format conversion\n      rocsparse_sgebsr2csr(handle,\n                         rocsparse_direction_column,\n                         mb,\n                         nb,\n                         bsr_descr,\n                         bsr_val,\n                         bsr_row_ptr,\n                         bsr_col_ind,\n                         row_block_dim,\n                         col_block_dim,\n                         csr_descr,\n                         csr_val,\n                         csr_row_ptr,\n                         csr_col_ind);\n  \\endcode\n/\n/**@{"]
-    pub fn rocsparse_sgebsr2csr(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        bsr_descr: rocsparse_mat_descr,
-        bsr_val: *const f32,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *mut f32,
-        csr_row_ptr: *mut rocsparse_int,
-        csr_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dgebsr2csr(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        bsr_descr: rocsparse_mat_descr,
-        bsr_val: *const f64,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *mut f64,
-        csr_row_ptr: *mut rocsparse_int,
-        csr_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cgebsr2csr(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        bsr_descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_float_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *mut rocsparse_float_complex,
-        csr_row_ptr: *mut rocsparse_int,
-        csr_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zgebsr2csr(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        bsr_descr: rocsparse_mat_descr,
-        bsr_val: *const rocsparse_double_complex,
-        bsr_row_ptr: *const rocsparse_int,
-        bsr_col_ind: *const rocsparse_int,
-        row_block_dim: rocsparse_int,
-        col_block_dim: rocsparse_int,
-        csr_descr: rocsparse_mat_descr,
-        csr_val: *mut rocsparse_double_complex,
-        csr_row_ptr: *mut rocsparse_int,
-        csr_col_ind: *mut rocsparse_int,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief\n  This function computes the the size of the user allocated temporary storage buffer used when converting a sparse\n  general BSR matrix to another sparse general BSR matrix.\n\n  \\details\n  \\p rocsparse_gebsr2gebsr_buffer_size returns the size of the temporary storage buffer\n  that is required by rocsparse_gebsr2gebsr_nnz(), rocsparse_sgebsr2gebsr(), rocsparse_dgebsr2gebsr(),\n  rocsparse_cgebsr2gebsr(), and rocsparse_zgebsr2gebsr(). The temporary\n  storage buffer must be allocated by the user.\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  \\note\n  This routine supports execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  dir         the storage format of the blocks, \\ref rocsparse_direction_row or \\ref rocsparse_direction_column\n\n  @param[in]\n  mb           number of block rows of the general BSR sparse matrix \\p A.\n\n  @param[in]\n  nb           number of block columns of the general BSR sparse matrix \\p A.\n\n  @param[in]\n  nnzb         number of blocks in the general BSR sparse matrix \\p A.\n\n  @param[in]\n  descr_A      the descriptor of the general BSR sparse matrix \\p A, the supported matrix type is rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[in]\n  bsr_val_A    array of \\p nnzb*row_block_dim_A*col_block_dim_A containing the values of the sparse general BSR matrix \\p A.\n\n  @param[in]\n  bsr_row_ptr_A array of \\p mb+1 elements that point to the start of every block row of the\n              sparse general BSR matrix \\p A.\n  @param[in]\n  bsr_col_ind_A array of \\p nnzb elements containing the block column indices of the sparse general BSR matrix \\p A.\n\n  @param[in]\n  row_block_dim_A   row size of the blocks in the sparse general BSR matrix \\p A.\n\n  @param[in]\n  col_block_dim_A   column size of the blocks in the sparse general BSR matrix \\p A.\n\n  @param[in]\n  row_block_dim_C   row size of the blocks in the sparse general BSR matrix \\p C.\n\n  @param[in]\n  col_block_dim_C   column size of the blocks in the sparse general BSR matrix \\p C.\n\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by rocsparse_gebsr2gebsr_nnz(),\n              rocsparse_sgebsr2gebsr(), rocsparse_dgebsr2gebsr(), rocsparse_cgebsr2gebsr(), and rocsparse_zgebsr2gebsr().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb or \\p nb or \\p nnzb or \\p row_block_dim_A or\n              \\p col_block_dim_A or \\p row_block_dim_C or \\p col_block_dim_C is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p bsr_row_ptr_A or \\p bsr_col_ind_A\n              or \\p descr_A or \\p buffer_size pointer is invalid.\n/\n/**@{"]
-    pub fn rocsparse_sgebsr2gebsr_buffer_size(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr_A: rocsparse_mat_descr,
-        bsr_val_A: *const f32,
-        bsr_row_ptr_A: *const rocsparse_int,
-        bsr_col_ind_A: *const rocsparse_int,
-        row_block_dim_A: rocsparse_int,
-        col_block_dim_A: rocsparse_int,
-        row_block_dim_C: rocsparse_int,
-        col_block_dim_C: rocsparse_int,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dgebsr2gebsr_buffer_size(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr_A: rocsparse_mat_descr,
-        bsr_val_A: *const f64,
-        bsr_row_ptr_A: *const rocsparse_int,
-        bsr_col_ind_A: *const rocsparse_int,
-        row_block_dim_A: rocsparse_int,
-        col_block_dim_A: rocsparse_int,
-        row_block_dim_C: rocsparse_int,
-        col_block_dim_C: rocsparse_int,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cgebsr2gebsr_buffer_size(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr_A: rocsparse_mat_descr,
-        bsr_val_A: *const rocsparse_float_complex,
-        bsr_row_ptr_A: *const rocsparse_int,
-        bsr_col_ind_A: *const rocsparse_int,
-        row_block_dim_A: rocsparse_int,
-        col_block_dim_A: rocsparse_int,
-        row_block_dim_C: rocsparse_int,
-        col_block_dim_C: rocsparse_int,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zgebsr2gebsr_buffer_size(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr_A: rocsparse_mat_descr,
-        bsr_val_A: *const rocsparse_double_complex,
-        bsr_row_ptr_A: *const rocsparse_int,
-        bsr_col_ind_A: *const rocsparse_int,
-        row_block_dim_A: rocsparse_int,
-        col_block_dim_A: rocsparse_int,
-        row_block_dim_C: rocsparse_int,
-        col_block_dim_C: rocsparse_int,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief This function is used when converting a general BSR sparse matrix \\p A to another general BSR sparse matrix \\p C.\n  Specifically, this function determines the number of non-zero blocks that will exist in \\p C (stored using either a host\n  or device pointer), and computes the row pointer array for \\p C.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  dir         the storage format of the blocks, \\ref rocsparse_direction_row or \\ref rocsparse_direction_column\n\n  @param[in]\n  mb           number of block rows of the general BSR sparse matrix \\p A.\n\n  @param[in]\n  nb           number of block columns of the general BSR sparse matrix \\p A.\n\n  @param[in]\n  nnzb         number of blocks in the general BSR sparse matrix \\p A.\n\n  @param[in]\n  descr_A      the descriptor of the general BSR sparse matrix \\p A, the supported matrix type is rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[in]\n  bsr_row_ptr_A array of \\p mb+1 elements that point to the start of every block row of the\n              sparse general BSR matrix \\p A.\n  @param[in]\n  bsr_col_ind_A array of \\p nnzb elements containing the block column indices of the sparse general BSR matrix \\p A.\n\n  @param[in]\n  row_block_dim_A   row size of the blocks in the sparse general BSR matrix \\p A.\n\n  @param[in]\n  col_block_dim_A   column size of the blocks in the sparse general BSR matrix \\p A.\n\n  @param[in]\n  descr_C      the descriptor of the general BSR sparse matrix \\p C, the supported matrix type is rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[in]\n  bsr_row_ptr_C array of \\p mb_C+1 elements that point to the start of every block row of the\n              sparse general BSR matrix \\p C where \\p mb_C=(m+row_block_dim_C-1)/row_block_dim_C.\n  @param[in]\n  row_block_dim_C   row size of the blocks in the sparse general BSR matrix \\p C.\n\n  @param[in]\n  col_block_dim_C   column size of the blocks in the sparse general BSR matrix \\p C.\n\n  @param[out]\n  nnz_total_dev_host_ptr\n              total number of nonzero blocks in general BSR sparse matrix \\p C stored using device or host memory.\n\n  @param[out]\n  temp_buffer\n              buffer allocated by the user whose size is determined by calling rocsparse_xgebsr2gebsr_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb or \\p nb or \\p nnzb or \\p row_block_dim_A or\n              \\p col_block_dim_A or \\p row_block_dim_C or \\p col_block_dim_C is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p bsr_row_ptr_A or \\p bsr_col_ind_A\n              or \\p bsr_row_ptr_C or \\p descr_A or \\p descr_C or \\p temp_buffer pointer is invalid."]
-    pub fn rocsparse_gebsr2gebsr_nnz(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr_A: rocsparse_mat_descr,
-        bsr_row_ptr_A: *const rocsparse_int,
-        bsr_col_ind_A: *const rocsparse_int,
-        row_block_dim_A: rocsparse_int,
-        col_block_dim_A: rocsparse_int,
-        descr_C: rocsparse_mat_descr,
-        bsr_row_ptr_C: *mut rocsparse_int,
-        row_block_dim_C: rocsparse_int,
-        col_block_dim_C: rocsparse_int,
-        nnz_total_dev_host_ptr: *mut rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup conv_module\n  \\brief\n  This function converts the general BSR sparse matrix \\p A to another general BSR sparse matrix \\p C.\n\n  \\details\n  The conversion uses three steps. First, the user calls rocsparse_xgebsr2gebsr_buffer_size() to determine the size of\n  the required temporary storage buffer. The user then allocates this buffer. Secondly, the user then allocates \\p mb_C+1\n  integers for the row pointer array for \\p C where \\p mb_C=(m+row_block_dim_C-1)/row_block_dim_C. The user then calls\n  rocsparse_xgebsr2gebsr_nnz() to fill in the row pointer array for \\p C ( \\p bsr_row_ptr_C ) and determine the number of\n  non-zero blocks that will exist in \\p C. Finally, the user allocates space for the colimn indices array of \\p C to have\n  \\p nnzb_C elements and space for the values array of \\p C to have \\p nnzb_C*roc_block_dim_C*col_block_dim_C and then calls\n  rocsparse_xgebsr2gebsr() to complete the conversion.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n\n  @param[in]\n  dir         the storage format of the blocks, \\ref rocsparse_direction_row or \\ref rocsparse_direction_column\n\n  @param[in]\n  mb           number of block rows of the general BSR sparse matrix \\p A.\n\n  @param[in]\n  nb           number of block columns of the general BSR sparse matrix \\p A.\n\n  @param[in]\n  nnzb         number of blocks in the general BSR sparse matrix \\p A.\n\n  @param[in]\n  descr_A      the descriptor of the general BSR sparse matrix \\p A, the supported matrix type is rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[in]\n  bsr_val_A    array of \\p nnzb*row_block_dim_A*col_block_dim_A containing the values of the sparse general BSR matrix \\p A.\n\n  @param[in]\n  bsr_row_ptr_A array of \\p mb+1 elements that point to the start of every block row of the\n              sparse general BSR matrix \\p A.\n  @param[in]\n  bsr_col_ind_A array of \\p nnzb elements containing the block column indices of the sparse general BSR matrix \\p A.\n\n  @param[in]\n  row_block_dim_A   row size of the blocks in the sparse general BSR matrix \\p A.\n\n  @param[in]\n  col_block_dim_A   column size of the blocks in the sparse general BSR matrix \\p A.\n\n  @param[in]\n  descr_C      the descriptor of the general BSR sparse matrix \\p C, the supported matrix type is rocsparse_matrix_type_general and also any valid value of the \\ref rocsparse_index_base.\n\n  @param[in]\n  bsr_val_C    array of \\p nnzb_C*row_block_dim_C*col_block_dim_C containing the values of the sparse general BSR matrix \\p C.\n\n  @param[in]\n  bsr_row_ptr_C array of \\p mb_C+1 elements that point to the start of every block row of the\n              sparse general BSR matrix \\p C.\n  @param[in]\n  bsr_col_ind_C array of \\p nnzb_C elements containing the block column indices of the sparse general BSR matrix \\p C.\n\n  @param[in]\n  row_block_dim_C   row size of the blocks in the sparse general BSR matrix \\p C.\n\n  @param[in]\n  col_block_dim_C   column size of the blocks in the sparse general BSR matrix \\p C.\n\n  @param[out]\n  temp_buffer\n              buffer allocated by the user whose size is determined by calling rocsparse_xgebsr2gebsr_buffer_size().\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_size \\p mb or \\p nb or \\p nnzb or \\p row_block_dim_A or\n              \\p col_block_dim_A or \\p row_block_dim_C or \\p col_block_dim_C is invalid.\n  \\retval     rocsparse_status_invalid_pointer \\p bsr_row_ptr_A or \\p bsr_col_ind_A or \\p bsr_val_A\n              or \\p bsr_row_ptr_C or \\p bsr_col_ind_C or \\p bsr_val_C or \\p descr_A or \\p descr_C\n              or \\p temp_buffer pointer is invalid.\n/\n/**@{"]
-    pub fn rocsparse_sgebsr2gebsr(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr_A: rocsparse_mat_descr,
-        bsr_val_A: *const f32,
-        bsr_row_ptr_A: *const rocsparse_int,
-        bsr_col_ind_A: *const rocsparse_int,
-        row_block_dim_A: rocsparse_int,
-        col_block_dim_A: rocsparse_int,
-        descr_C: rocsparse_mat_descr,
-        bsr_val_C: *mut f32,
-        bsr_row_ptr_C: *mut rocsparse_int,
-        bsr_col_ind_C: *mut rocsparse_int,
-        row_block_dim_C: rocsparse_int,
-        col_block_dim_C: rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_dgebsr2gebsr(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr_A: rocsparse_mat_descr,
-        bsr_val_A: *const f64,
-        bsr_row_ptr_A: *const rocsparse_int,
-        bsr_col_ind_A: *const rocsparse_int,
-        row_block_dim_A: rocsparse_int,
-        col_block_dim_A: rocsparse_int,
-        descr_C: rocsparse_mat_descr,
-        bsr_val_C: *mut f64,
-        bsr_row_ptr_C: *mut rocsparse_int,
-        bsr_col_ind_C: *mut rocsparse_int,
-        row_block_dim_C: rocsparse_int,
-        col_block_dim_C: rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_cgebsr2gebsr(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr_A: rocsparse_mat_descr,
-        bsr_val_A: *const rocsparse_float_complex,
-        bsr_row_ptr_A: *const rocsparse_int,
-        bsr_col_ind_A: *const rocsparse_int,
-        row_block_dim_A: rocsparse_int,
-        col_block_dim_A: rocsparse_int,
-        descr_C: rocsparse_mat_descr,
-        bsr_val_C: *mut rocsparse_float_complex,
-        bsr_row_ptr_C: *mut rocsparse_int,
-        bsr_col_ind_C: *mut rocsparse_int,
-        row_block_dim_C: rocsparse_int,
-        col_block_dim_C: rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_zgebsr2gebsr(
-        handle: rocsparse_handle,
-        dir: rocsparse_direction,
-        mb: rocsparse_int,
-        nb: rocsparse_int,
-        nnzb: rocsparse_int,
-        descr_A: rocsparse_mat_descr,
-        bsr_val_A: *const rocsparse_double_complex,
-        bsr_row_ptr_A: *const rocsparse_int,
-        bsr_col_ind_A: *const rocsparse_int,
-        row_block_dim_A: rocsparse_int,
-        col_block_dim_A: rocsparse_int,
-        descr_C: rocsparse_mat_descr,
-        bsr_val_C: *mut rocsparse_double_complex,
-        bsr_row_ptr_C: *mut rocsparse_int,
-        bsr_col_ind_C: *mut rocsparse_int,
-        row_block_dim_C: rocsparse_int,
-        col_block_dim_C: rocsparse_int,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup generic_module\n  \\brief Scale a sparse vector and add it to a scaled dense vector.\n\n  \\details\n  \\ref rocsparse_axpby multiplies the sparse vector \\f$x\\f$ with scalar \\f$\\alpha\\f$ and\n  adds the result to the dense vector \\f$y\\f$ that is multiplied with scalar\n  \\f$\\beta\\f$, such that\n\n  \\f[\n      y := \\alpha \\cdot x + \\beta \\cdot y\n  \\f]\n\n  \\code{.c}\n      for(i = 0; i < nnz; ++i)\n      {\n          y[x_ind[i]] = alpha * x_val[i] + beta * y[x_ind[i]]\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  alpha       scalar \\f$\\alpha\\f$.\n  @param[in]\n  x           sparse matrix descriptor.\n  @param[in]\n  beta        scalar \\f$\\beta\\f$.\n  @param[inout]\n  y           dense matrix descriptor.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_pointer \\p alpha, \\p x, \\p beta or \\p y pointer is\n          invalid."]
-    pub fn rocsparse_axpby(
-        handle: rocsparse_handle,
-        alpha: *const ::std::os::raw::c_void,
-        x: rocsparse_spvec_descr,
-        beta: *const ::std::os::raw::c_void,
-        y: rocsparse_dnvec_descr,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup generic_module\n  \\brief Gather elements from a dense vector and store them into a sparse vector.\n\n  \\details\n  \\ref rocsparse_gather gathers the elements from the dense vector \\f$y\\f$ and stores\n  them in the sparse vector \\f$x\\f$.\n\n  \\code{.c}\n      for(i = 0; i < nnz; ++i)\n      {\n          x_val[i] = y[x_ind[i]];\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  y            dense vector \\f$y\\f$.\n  @param[out]\n  x            sparse vector \\f$x\\f$.\n\n  \\retval      rocsparse_status_success the operation completed successfully.\n  \\retval      rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval      rocsparse_status_invalid_pointer \\p x or \\p y pointer is invalid."]
-    pub fn rocsparse_gather(
-        handle: rocsparse_handle,
-        y: rocsparse_dnvec_descr,
-        x: rocsparse_spvec_descr,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup generic_module\n  \\brief Scatter elements from a sparse vector into a dense vector.\n\n  \\details\n  \\ref rocsparse_scatter scatters the elements from the sparse vector \\f$x\\f$ in the dense\n  vector \\f$y\\f$.\n\n  \\code{.c}\n      for(i = 0; i < nnz; ++i)\n      {\n          y[x_ind[i]] = x_val[i];\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  x            sparse vector \\f$x\\f$.\n  @param[out]\n  y            dense vector \\f$y\\f$.\n\n  \\retval      rocsparse_status_success the operation completed successfully.\n  \\retval      rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval      rocsparse_status_invalid_pointer \\p x or \\p y pointer is invalid."]
-    pub fn rocsparse_scatter(
-        handle: rocsparse_handle,
-        x: rocsparse_spvec_descr,
-        y: rocsparse_dnvec_descr,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup generic_module\n  \\brief Apply Givens rotation to a dense and a sparse vector.\n\n  \\details\n  \\ref rocsparse_rot applies the Givens rotation matrix \\f$G\\f$ to the sparse vector\n  \\f$x\\f$ and the dense vector \\f$y\\f$, where\n  \\f[\n    G = \\begin{pmatrix} c & s \\\\ -s & c \\end{pmatrix}\n  \\f]\n\n  \\code{.c}\n      for(i = 0; i < nnz; ++i)\n      {\n          x_tmp = x_val[i];\n          y_tmp = y[x_ind[i]];\n\n          x_val[i]    = c * x_tmp + s * y_tmp;\n          y[x_ind[i]] = c * y_tmp - s * x_tmp;\n      }\n  \\endcode\n\n  \\note\n  This function is non blocking and executed asynchronously with respect to the host.\n  It may return before the actual computation has finished.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  c           pointer to the cosine element of \\f$G\\f$, can be on host or device.\n  @param[in]\n  s           pointer to the sine element of \\f$G\\f$, can be on host or device.\n  @param[inout]\n  x           sparse vector \\f$x\\f$.\n  @param[inout]\n  y           dense vector \\f$y\\f$.\n\n  \\retval     rocsparse_status_success the operation completed successfully.\n  \\retval     rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval     rocsparse_status_invalid_pointer \\p c, \\p s, \\p x or \\p y pointer is\n              invalid."]
-    pub fn rocsparse_rot(
-        handle: rocsparse_handle,
-        c: *const ::std::os::raw::c_void,
-        s: *const ::std::os::raw::c_void,
-        x: rocsparse_spvec_descr,
-        y: rocsparse_dnvec_descr,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup generic_module\n  \\brief Sparse matrix to dense matrix conversion\n\n  \\details\n  \\p rocsparse_sparse_to_dense\n  \\p rocsparse_sparse_to_dense performs the conversion of a sparse matrix in CSR, CSC, or COO format to\n     a dense matrix\n  \\note\n  This function writes the required allocation size (in bytes) to \\p buffer_size and\n  returns without performing the sparse to dense operation, when a nullptr is passed for\n  \\p temp_buffer.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  mat_A        sparse matrix descriptor.\n  @param[in]\n  mat_B        dense matrix descriptor.\n  @param[in]\n  alg          algorithm for the sparse to dense computation.\n  @param[out]\n  buffer_size  number of bytes of the temporary storage buffer. buffer_size is set when\n               \\p temp_buffer is nullptr.\n  @param[in]\n  temp_buffer  temporary storage buffer allocated by the user. When a nullptr is passed,\n               the required allocation size (in bytes) is written to \\p buffer_size and\n               function returns without performing the sparse to dense operation.\n\n  \\retval      rocsparse_status_success the operation completed successfully.\n  \\retval      rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval      rocsparse_status_invalid_pointer \\p mat_A, \\p mat_B, or \\p buffer_size\n               pointer is invalid."]
-    pub fn rocsparse_sparse_to_dense(
-        handle: rocsparse_handle,
-        mat_A: rocsparse_spmat_descr,
-        mat_B: rocsparse_dnmat_descr,
-        alg: rocsparse_sparse_to_dense_alg,
-        buffer_size: *mut usize,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup generic_module\n  \\brief Dense matrix to sparse matrix conversion\n\n  \\details\n  \\p rocsparse_dense_to_sparse\n  \\p rocsparse_dense_to_sparse performs the conversion of a dense matrix to a sparse matrix in CSR, CSC, or COO format.\n\n  \\note\n  This function writes the required allocation size (in bytes) to \\p buffer_size and\n  returns without performing the dense to sparse operation, when a nullptr is passed for\n  \\p temp_buffer.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  mat_A        dense matrix descriptor.\n  @param[in]\n  mat_B        sparse matrix descriptor.\n  @param[in]\n  alg          algorithm for the sparse to dense computation.\n  @param[out]\n  buffer_size  number of bytes of the temporary storage buffer. buffer_size is set when\n               \\p temp_buffer is nullptr.\n  @param[in]\n  temp_buffer  temporary storage buffer allocated by the user. When a nullptr is passed,\n               the required allocation size (in bytes) is written to \\p buffer_size and\n               function returns without performing the dense to sparse operation.\n\n  \\retval      rocsparse_status_success the operation completed successfully.\n  \\retval      rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval      rocsparse_status_invalid_pointer \\p mat_A, \\p mat_B, or \\p buffer_size\n               pointer is invalid."]
-    pub fn rocsparse_dense_to_sparse(
-        handle: rocsparse_handle,
-        mat_A: rocsparse_dnmat_descr,
-        mat_B: rocsparse_spmat_descr,
-        alg: rocsparse_dense_to_sparse_alg,
-        buffer_size: *mut usize,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup generic_module\n  \\brief Sparse vector inner dot product\n\n  \\details\n  \\ref rocsparse_spvv computes the inner dot product of the sparse vecotr \\f$x\\f$ with the\n  dense vector \\f$y\\f$, such that\n  \\f[\n    \\text{result} := x^{'} \\cdot y,\n  \\f]\n  with\n  \\f[\n    op(x) = \\left\\{\n    \\begin{array}{ll}\n        x,   & \\text{if trans == rocsparse_operation_none} \\\\\n        \\bar{x}, & \\text{if trans == rocsparse_operation_conjugate_transpose} \\\\\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\code{.c}\n      result = 0;\n      for(i = 0; i < nnz; ++i)\n      {\n          result += x_val[i] * y[x_ind[i]];\n      }\n  \\endcode\n\n  \\note\n  This function writes the required allocation size (in bytes) to \\p buffer_size and\n  returns without performing the SpVV operation, when a nullptr is passed for\n  \\p temp_buffer.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  trans        sparse vector operation type.\n  @param[in]\n  x            sparse vector descriptor.\n  @param[in]\n  y            dense vector descriptor.\n  @param[out]\n  result       pointer to the result, can be host or device memory\n  @param[in]\n  compute_type floating point precision for the SpVV computation.\n  @param[out]\n  buffer_size  number of bytes of the temporary storage buffer. buffer_size is set when\n               \\p temp_buffer is nullptr.\n  @param[in]\n  temp_buffer  temporary storage buffer allocated by the user. When a nullptr is passed,\n               the required allocation size (in bytes) is written to \\p buffer_size and\n               function returns without performing the SpVV operation.\n\n  \\retval      rocsparse_status_success the operation completed successfully.\n  \\retval      rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval      rocsparse_status_invalid_pointer \\p x, \\p y, \\p result or \\p buffer_size\n               pointer is invalid.\n  \\retval      rocsparse_status_not_implemented \\p compute_type is currently not\n               supported.\n\n  \\par Example\n  \\code{.c}\n   // Number of non-zeros of the sparse vector\n   int nnz = 3;\n\n   // Size of sparse and dense vector\n   int size = 9;\n\n   // Sparse index vector\n   std::vector<int> hx_ind = {0, 3, 5};\n\n   // Sparse value vector\n   std::vector<float> hx_val = {1.0f, 2.0f, 3.0f};\n\n   // Dense vector\n   std::vector<float> hy = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f};\n\n   // Offload data to device\n   int* dx_ind;\n   float* dx_val;\n   float* dy;\n   hipMalloc((void**)&dx_ind, sizeof(int) * nnz);\n   hipMalloc((void**)&dx_val, sizeof(float) * nnz);\n   hipMalloc((void**)&dy, sizeof(float) * size);\n\n   hipMemcpy(dx_ind, hx_ind.data(), sizeof(int) * nnz, hipMemcpyHostToDevice);\n   hipMemcpy(dx_val, hx_val.data(), sizeof(float) * nnz, hipMemcpyHostToDevice);\n   hipMemcpy(dy, hy.data(), sizeof(float) * size, hipMemcpyHostToDevice);\n\n   rocsparse_handle     handle;\n   rocsparse_spvec_descr vecX;\n   rocsparse_dnvec_descr vecY;\n\n   rocsparse_indextype idx_type = rocsparse_indextype_i32;\n   rocsparse_datatype  data_type = rocsparse_datatype_f32_r;\n   rocsparse_datatype  compute_type = rocsparse_datatype_f32_r;\n   rocsparse_operation trans = rocsparse_operation_none;\n   rocsparse_index_base idx_base = rocsparse_index_base_zero;\n\n   rocsparse_create_handle(&handle);\n\n   // Create sparse vector X\n   rocsparse_create_spvec_descr(&vecX,\n                                size,\n                                nnz,\n                                dx_ind,\n                                dx_val,\n                                idx_type,\n                                idx_base,\n                                data_type);\n\n   // Create dense vector Y\n   rocsparse_create_dnvec_descr(&vecY,\n                                size,\n                                dy,\n                                data_type);\n\n   // Obtain buffer size\n   float hresult = 0.0f;\n   size_t buffer_size;\n   rocsparse_spvv(handle,\n                  trans,\n                  vecX,\n                  vecY,\n                  &hresult,\n                  compute_type,\n                  &buffer_size,\n                  nullptr);\n\n   void* temp_buffer;\n   hipMalloc(&temp_buffer, buffer_size);\n\n   // SpVV\n   rocsparse_spvv(handle,\n                  trans,\n                  vecX,\n                  vecY,\n                  &hresult,\n                  compute_type,\n                  &buffer_size,\n                  temp_buffer);\n\n   hipDeviceSynchronize();\n\n   std::cout << \"hresult: \" << hresult << std::endl;\n\n   // Clear rocSPARSE\n   rocsparse_destroy_spvec_descr(vecX);\n   rocsparse_destroy_dnvec_descr(vecY);\n   rocsparse_destroy_handle(handle);\n\n   // Clear device memory\n   hipFree(dx_ind);\n   hipFree(dx_val);\n   hipFree(dy);\n   hipFree(temp_buffer);\n  \\endcode"]
-    pub fn rocsparse_spvv(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        x: rocsparse_spvec_descr,
-        y: rocsparse_dnvec_descr,
-        result: *mut ::std::os::raw::c_void,
-        compute_type: rocsparse_datatype,
-        buffer_size: *mut usize,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup generic_module\n  \\brief Sparse matrix vector multiplication\n\n  \\details\n  \\ref rocsparse_spmv multiplies the scalar \\f$\\alpha\\f$ with a sparse \\f$m \\times n\\f$\n  matrix and the dense vector \\f$x\\f$ and adds the result to the dense vector \\f$y\\f$\n  that is multiplied by the scalar \\f$\\beta\\f$, such that\n  \\f[\n    y := \\alpha \\cdot op(A) \\cdot x + \\beta \\cdot y,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\details\n  \\ref rocsparse_spmv supports multiple combinations of data types and compute types. The tables below indicate the currently\n  supported different data types that can be used for for the sparse matrix A and the dense vectors X and Y and the compute\n  type for \\f$\\alpha\\f$ and \\f$\\beta\\f$. The advantage of using different data types is to save on memory bandwidth and storage\n  when a user application allows while performing the actual computation in a higher precision.\n\n  Uniform Precisions:\n  \\verbatim\n  |----------------------------------------------------|\n  |             A / X / Y / compute_type               |\n  |----------------------------------------------------|\n  |             rocsparse_datatype_f32_r               |\n  |----------------------------------------------------|\n  |             rocsparse_datatype_f64_r               |\n  |----------------------------------------------------|\n  |             rocsparse_datatype_f32_c               |\n  |----------------------------------------------------|\n  |             rocsparse_datatype_f64_c               |\n  |----------------------------------------------------|\n  \\endverbatim\n\n  Mixed precisions:\n  \\verbatim\n  |-------------------------|--------------------------|--------------------------|\n  |         A / X           |             Y            |       compute_type       |\n  |-------------------------|--------------------------|--------------------------|\n  | rocsparse_datatype_i8_r | rocsparse_datatype_i32_r | rocsparse_datatype_i32_r |\n  |-------------------------|--------------------------|--------------------------|\n  | rocsparse_datatype_i8_r | rocsparse_datatype_f32_r | rocsparse_datatype_f32_r |\n  |-------------------------|--------------------------|--------------------------|\n  | rocsparse_datatype_i8_r | rocsparse_datatype_i32_r | rocsparse_datatype_i32_r |\n  |-------------------------|--------------------------|--------------------------|\n  \\endverbatim\n\n  Mixed-regular Complex precisions\n  \\verbatim\n  |----------------------------|----------------------------|\n  |              A             |    X / Y / compute_type    |\n  |----------------------------|----------------------------|\n  |  rocsparse_datatype_f32_r  |  rocsparse_datatype_f32_c  |\n  |----------------------------|----------------------------|\n  |  rocsparse_datatype_f64_r  |  rocsparse_datatype_f64_c  |\n  |----------------------------|----------------------------|\n  \\endverbatim\n\n  \\note\n  This function writes the required allocation size (in bytes) to \\p buffer_size and\n  returns without performing the SpMV operation, when a nullptr is passed for\n  \\p temp_buffer.\n\n  \\note\n  Only the \\ref rocsparse_spmv_stage_buffer_size stage and the \\ref rocsparse_spmv_stage_compute stage are non blocking\n  and executed asynchronously with respect to the host. They may return before the actual computation has finished.\n  The \\ref rocsparse_spmv_stage_preprocess stage is blocking with respect to the host.\n\n  \\note\n  Only the \\ref rocsparse_spmv_stage_buffer_size stage and the \\ref rocsparse_spmv_stage_compute stage\n  support execution in a hipGraph context. The \\ref rocsparse_spmv_stage_preprocess stage does not support hipGraph.\n\n  \\note\n  The sparse matrix formats currently supported are: rocsparse_format_bsr, rocsparse_format_coo,\n  rocsparse_format_coo_aos, rocsparse_format_csr, rocsparse_format_csc and rocsparse_format_ell.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  trans        matrix operation type.\n  @param[in]\n  alpha        scalar \\f$\\alpha\\f$.\n  @param[in]\n  mat          matrix descriptor.\n  @param[in]\n  x            vector descriptor.\n  @param[in]\n  beta         scalar \\f$\\beta\\f$.\n  @param[inout]\n  y            vector descriptor.\n  @param[in]\n  compute_type floating point precision for the SpMV computation.\n  @param[in]\n  alg          SpMV algorithm for the SpMV computation.\n  @param[out]\n  buffer_size  number of bytes of the temporary storage buffer. buffer_size is set when\n               \\p temp_buffer is nullptr.\n  @param[in]\n  temp_buffer  temporary storage buffer allocated by the user. When a nullptr is passed,\n               the required allocation size (in bytes) is written to \\p buffer_size and\n               function returns without performing the SpMV operation.\n\n  \\retval      rocsparse_status_success the operation completed successfully.\n  \\retval      rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval      rocsparse_status_invalid_pointer \\p alpha, \\p mat, \\p x, \\p beta, \\p y or\n               \\p buffer_size pointer is invalid.\n  \\retval      rocsparse_status_invalid_value the value of \\p trans, \\p trans\\_B, \\p compute\\_type, \\p alg is incorrect.\n  \\retval      rocsparse_status_not_implemented \\p compute_type or \\p alg is\n               currently not supported."]
-    pub fn rocsparse_spmv(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        alpha: *const ::std::os::raw::c_void,
-        mat: rocsparse_spmat_descr,
-        x: rocsparse_dnvec_descr,
-        beta: *const ::std::os::raw::c_void,
-        y: rocsparse_dnvec_descr,
-        compute_type: rocsparse_datatype,
-        alg: rocsparse_spmv_alg,
-        buffer_size: *mut usize,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup generic_module\n  \\brief Sparse matrix vector multiplication\n\n  \\details\n  \\ref rocsparse_spmv_ex multiplies the scalar \\f$\\alpha\\f$ with a sparse \\f$m \\times n\\f$\n  matrix and the dense vector \\f$x\\f$ and adds the result to the dense vector \\f$y\\f$\n  that is multiplied by the scalar \\f$\\beta\\f$, such that\n  \\f[\n    y := \\alpha \\cdot op(A) \\cdot x + \\beta \\cdot y,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\details\n  \\ref rocsparse_spmv supports multiple combinations of data types and compute types. The tables below indicate the currently\n  supported different data types that can be used for for the sparse matrix A and the dense vectors X and Y and the compute\n  type for \\f$\\alpha\\f$ and \\f$\\beta\\f$. The advantage of using different data types is to save on memory bandwidth and storage\n  when a user application allows while performing the actual computation in a higher precision.\n\n  Uniform Precisions:\n  \\verbatim\n  |----------------------------------------------------|\n  |             A / X / Y / compute_type               |\n  |----------------------------------------------------|\n  |             rocsparse_datatype_f32_r               |\n  |----------------------------------------------------|\n  |             rocsparse_datatype_f64_r               |\n  |----------------------------------------------------|\n  |             rocsparse_datatype_f32_c               |\n  |----------------------------------------------------|\n  |             rocsparse_datatype_f64_c               |\n  |----------------------------------------------------|\n  \\endverbatim\n\n  Mixed precisions:\n  \\verbatim\n  |-------------------------|--------------------------|--------------------------|\n  |         A / X           |             Y            |       compute_type       |\n  |-------------------------|--------------------------|--------------------------|\n  | rocsparse_datatype_i8_r | rocsparse_datatype_i32_r | rocsparse_datatype_i32_r |\n  |-------------------------|--------------------------|--------------------------|\n  | rocsparse_datatype_i8_r | rocsparse_datatype_f32_r | rocsparse_datatype_f32_r |\n  |-------------------------|--------------------------|--------------------------|\n  | rocsparse_datatype_i8_r | rocsparse_datatype_i32_r | rocsparse_datatype_i32_r |\n  |-------------------------|--------------------------|--------------------------|\n  \\endverbatim\n\n  Mixed-regular Complex precisions\n  \\verbatim\n  |----------------------------|----------------------------|\n  |              A             |    X / Y / compute_type    |\n  |----------------------------|----------------------------|\n  |  rocsparse_datatype_f32_r  |  rocsparse_datatype_f32_c  |\n  |----------------------------|----------------------------|\n  |  rocsparse_datatype_f64_r  |  rocsparse_datatype_f64_c  |\n  |----------------------------|----------------------------|\n  \\endverbatim\n\n  \\note\n  This function writes the required allocation size (in bytes) to \\p buffer_size and\n  returns without performing the SpMV operation, when a nullptr is passed for\n  \\p temp_buffer.\n\n  \\note\n  The sparse matrix formats currently supported are: rocsparse_format_bsr, rocsparse_format_coo,\n  rocsparse_format_coo_aos, rocsparse_format_csr, rocsparse_format_csc and rocsparse_format_ell.\n\n  \\note SpMV_ex requires three stages to complete. The first stage\n  \\ref rocsparse_spmv_stage_buffer_size will return the size of the temporary storage buffer\n  that is required for subsequent calls to \\ref rocsparse_spmv_ex. The second stage\n  \\ref rocsparse_spmv_stage_preprocess will preprocess data that would be saved in the temporary storage buffer.\n  In the final stage \\ref rocsparse_spmv_stage_compute, the actual computation is performed.\n  \\note If \\ref rocsparse_spmv_stage_auto is selected, rocSPARSE will automatically detect\n  which stage is required based on the following indicators:\n  If \\p temp_buffer is equal to \\p nullptr, the required buffer size will be returned.\n  Else, the SpMV_ex preprocess and the SpMV algorithm will be executed.\n\n  \\note\n  Only the \\ref rocsparse_spmv_stage_buffer_size stage and the \\ref rocsparse_spmv_stage_compute stage are non blocking\n  and executed asynchronously with respect to the host. They may return before the actual computation has finished.\n  The \\ref rocsparse_spmv_stage_preprocess stage is blocking with respect to the host.\n\n  \\note\n  Only the \\ref rocsparse_spmv_stage_buffer_size stage and the \\ref rocsparse_spmv_stage_compute stage\n  support execution in a hipGraph context. The \\ref rocsparse_spmv_stage_preprocess stage does not support hipGraph.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  trans        matrix operation type.\n  @param[in]\n  alpha        scalar \\f$\\alpha\\f$.\n  @param[in]\n  mat          matrix descriptor.\n  @param[in]\n  x            vector descriptor.\n  @param[in]\n  beta         scalar \\f$\\beta\\f$.\n  @param[inout]\n  y            vector descriptor.\n  @param[in]\n  compute_type floating point precision for the SpMV computation.\n  @param[in]\n  alg          SpMV algorithm for the SpMV computation.\n  @param[in]\n  stage        SpMV stage for the SpMV computation.\n  @param[out]\n  buffer_size  number of bytes of the temporary storage buffer. buffer_size is set when\n               \\p temp_buffer is nullptr.\n  @param[in]\n  temp_buffer  temporary storage buffer allocated by the user. When a nullptr is passed,\n               the required allocation size (in bytes) is written to \\p buffer_size and\n               function returns without performing the SpMV operation.\n\n  \\retval      rocsparse_status_success the operation completed successfully.\n  \\retval      rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval      rocsparse_status_invalid_pointer \\p alpha, \\p mat, \\p x, \\p beta, \\p y or\n               \\p buffer_size pointer is invalid.\n  \\retval      rocsparse_status_invalid_value the value of \\p trans, \\p trans\\_B, \\p compute\\_type, \\p alg or \\p stage is incorrect.\n  \\retval      rocsparse_status_not_implemented \\p compute_type or \\p alg is\n               currently not supported."]
-    pub fn rocsparse_spmv_ex(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        alpha: *const ::std::os::raw::c_void,
-        mat: rocsparse_spmat_descr,
-        x: rocsparse_dnvec_descr,
-        beta: *const ::std::os::raw::c_void,
-        y: rocsparse_dnvec_descr,
-        compute_type: rocsparse_datatype,
-        alg: rocsparse_spmv_alg,
-        stage: rocsparse_spmv_stage,
-        buffer_size: *mut usize,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup generic_module\n  \\brief Sparse triangular solve\n\n  \\details\n  \\p rocsparse_spsv_solve solves a sparse triangular linear system of a sparse\n  \\f$m \\times m\\f$ matrix, defined in CSR or COO storage format, a dense solution vector\n  \\f$y\\f$ and the right-hand side \\f$x\\f$ that is multiplied by \\f$\\alpha\\f$, such that\n  \\f[\n    op(A) \\cdot y = \\alpha \\cdot x,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\note SpSV requires three stages to complete. The first stage\n  \\ref rocsparse_spsv_stage_buffer_size will return the size of the temporary storage buffer\n  that is required for subsequent calls. The second stage\n  \\ref rocsparse_spsv_stage_preprocess will preprocess data that would be saved in the temporary storage buffer.\n  In the final stage \\ref rocsparse_spsv_stage_compute, the actual computation is performed.\n  \\note If \\ref rocsparse_spsv_stage_auto is selected, rocSPARSE will automatically detect\n  which stage is required based on the following indicators:\n  If \\p temp_buffer is equal to \\p nullptr, the required buffer size will be returned.\n  If \\p buffer_size is equal to \\p nullptr, analysis will be performed.\n  Otherwise, the SpSV preprocess and the SpSV algorithm will be executed.\n\n  \\note\n  Only the \\ref rocsparse_spsv_stage_buffer_size stage and the \\ref rocsparse_spsv_stage_compute stage are non blocking\n  and executed asynchronously with respect to the host. They may return before the actual computation has finished.\n  The \\ref rocsparse_spsv_stage_preprocess stage is blocking with respect to the host.\n\n  \\note\n  Currently, only \\p trans == \\ref rocsparse_operation_none and \\p trans == \\ref rocsparse_operation_transpose is supported.\n\n  \\note\n  Only the \\ref rocsparse_spsv_stage_buffer_size stage and the \\ref rocsparse_spsv_stage_compute stage\n  support execution in a hipGraph context. The \\ref rocsparse_spsv_stage_preprocess stage does not support hipGraph.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  trans        matrix operation type.\n  @param[in]\n  alpha        scalar \\f$\\alpha\\f$.\n  @param[in]\n  mat          matrix descriptor.\n  @param[in]\n  x            vector descriptor.\n  @param[inout]\n  y            vector descriptor.\n  @param[in]\n  compute_type floating point precision for the SpSV computation.\n  @param[in]\n  alg          SpSV algorithm for the SpSV computation.\n  @param[in]\n  stage        SpSV stage for the SpSV computation.\n  @param[out]\n  buffer_size  number of bytes of the temporary storage buffer.\n  @param[in]\n  temp_buffer  temporary storage buffer allocated by the user. When a nullptr is passed,\n               the required allocation size (in bytes) is written to \\p buffer_size and\n               function returns without performing the SpSV operation.\n\n  \\retval      rocsparse_status_success the operation completed successfully.\n  \\retval      rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval      rocsparse_status_invalid_pointer \\p alpha, \\p mat, \\p x, \\p y, \\p descr or\n               \\p buffer_size pointer is invalid.\n  \\retval      rocsparse_status_not_implemented \\p trans, \\p compute_type, \\p stage or \\p alg is\n               currently not supported."]
-    pub fn rocsparse_spsv(
-        handle: rocsparse_handle,
-        trans: rocsparse_operation,
-        alpha: *const ::std::os::raw::c_void,
-        mat: rocsparse_spmat_descr,
-        x: rocsparse_dnvec_descr,
-        y: rocsparse_dnvec_descr,
-        compute_type: rocsparse_datatype,
-        alg: rocsparse_spsv_alg,
-        stage: rocsparse_spsv_stage,
-        buffer_size: *mut usize,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup generic_module\n  \\brief Sparse iterative triangular solve\n\n  \\details\n  \\p rocsparse_spitsv solves, using the Jacobi iterative method, a sparse triangular linear system of a sparse\n  \\f$m \\times m\\f$ matrix, defined in CSR format, a dense solution vector\n  \\f$y\\f$ and the right-hand side \\f$x\\f$ that is multiplied by \\f$\\alpha\\f$, such that\n  \\f[\n    op(A) \\cdot y = \\alpha \\cdot x,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\note SpITSV requires three stages to complete. The first stage\n  \\ref rocsparse_spitsv_stage_buffer_size will return the size of the temporary storage buffer\n  that is required for subsequent calls. The second stage\n  \\ref rocsparse_spitsv_stage_preprocess will preprocess data that would be saved in the temporary storage buffer.\n  In the final stage \\ref rocsparse_spitsv_stage_compute, the actual computation is performed.\n  \\note If \\ref rocsparse_spitsv_stage_auto is selected, rocSPARSE will automatically detect\n  which stage is required based on the following indicators:\n  If \\p temp_buffer is equal to \\p nullptr, the required buffer size will be returned.\n  If \\p buffer_size is equal to \\p nullptr, analysis will be performed.\n  Otherwise, the SpITSV preprocess and the SpITSV iterative algorithm will be executed.\n\n  \\note\n  Currently, only non-mixed numerical precision is supported.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[inout]\n  host_nmaxiter     maximum number of iteration on input and maximum number of iteration on output.\n  @param[in]\n  host_tol          if the pointer is null then loop will execute \\p nmaxiter[0] iterations. The precision is float for f32 based calculation (including the complex case) and double for f64 based calculation (including the complex case).\n  @param[out]\n  host_history      Optional array to record the history. The precision is float for f32 based calculation (including the complex case) and double for f64 based calculation (including the complex case).\n  @param[in]\n  trans        matrix operation type.\n  @param[in]\n  alpha        scalar \\f$\\alpha\\f$.\n  @param[in]\n  mat          matrix descriptor.\n  @param[in]\n  x            vector descriptor.\n  @param[inout]\n  y            vector descriptor.\n  @param[in]\n  compute_type floating point precision for the SpITSV computation.\n  @param[in]\n  alg          SpITSV algorithm for the SpITSV computation.\n  @param[in]\n  stage        SpITSV stage for the SpITSV computation.\n  @param[out]\n  buffer_size  number of bytes of the temporary storage buffer.\n  @param[in]\n  temp_buffer  temporary storage buffer allocated by the user. When a nullptr is passed,\n               the required allocation size (in bytes) is written to \\p buffer_size and\n               function returns without performing the SpITSV operation.\n\n  \\retval      rocsparse_status_success the operation completed successfully.\n  \\retval      rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval      rocsparse_status_invalid_pointer \\p alpha, \\p mat, \\p x, \\p y, \\p descr or\n               \\p buffer_size pointer is invalid.\n  \\retval      rocsparse_status_not_implemented \\p trans, \\p compute_type, \\p stage or \\p alg is\n               currently not supported."]
-    pub fn rocsparse_spitsv(
-        handle: rocsparse_handle,
-        host_nmaxiter: *mut rocsparse_int,
-        host_tol: *const ::std::os::raw::c_void,
-        host_history: *mut ::std::os::raw::c_void,
-        trans: rocsparse_operation,
-        alpha: *const ::std::os::raw::c_void,
-        mat: rocsparse_spmat_descr,
-        x: rocsparse_dnvec_descr,
-        y: rocsparse_dnvec_descr,
-        compute_type: rocsparse_datatype,
-        alg: rocsparse_spitsv_alg,
-        stage: rocsparse_spitsv_stage,
-        buffer_size: *mut usize,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup generic_module\n  \\brief Sparse triangular system solve\n\n  \\details\n  \\p rocsparse_spsm_solve solves a sparse triangular linear system of a sparse\n  \\f$m \\times m\\f$ matrix, defined in CSR or COO storage format, a dense solution matrix\n  \\f$C\\f$ and the right-hand side \\f$B\\f$ that is multiplied by \\f$\\alpha\\f$, such that\n  \\f[\n    op(A) \\cdot C = \\alpha \\cdot op(B),\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n  and\n  \\f[\n    op(B) = \\left\\{\n    \\begin{array}{ll}\n        B,   & \\text{if trans_B == rocsparse_operation_none} \\\\\n        B^T, & \\text{if trans_B == rocsparse_operation_transpose} \\\\\n        B^H, & \\text{if trans_B == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\note SpSM requires three stages to complete. The first stage\n  \\ref rocsparse_spsm_stage_buffer_size will return the size of the temporary storage buffer\n  that is required for subsequent calls. The second stage\n  \\ref rocsparse_spsm_stage_preprocess will preprocess data that would be saved in the temporary storage buffer.\n  In the final stage \\ref rocsparse_spsm_stage_compute, the actual computation is performed.\n  \\note If \\ref rocsparse_spsm_stage_auto is selected, rocSPARSE will automatically detect\n  which stage is required based on the following indicators:\n  If \\p temp_buffer is equal to \\p nullptr, the required buffer size will be returned.\n  If \\p buffer_size is equal to \\p nullptr, analysis will be performed.\n  Otherwise, the SpSM preprocess and the SpSM algorithm will be executed.\n\n  \\note\n  Only the \\ref rocsparse_spsm_stage_buffer_size stage and the \\ref rocsparse_spsm_stage_compute stage are non blocking\n  and executed asynchronously with respect to the host. They may return before the actual computation has finished.\n  The \\ref rocsparse_spsm_stage_preprocess stage is blocking with respect to the host.\n\n  \\note\n  Currently, only \\p trans_A == \\ref rocsparse_operation_none and \\p trans_A == \\ref rocsparse_operation_transpose is supported.\n  Currently, only \\p trans_B == \\ref rocsparse_operation_none and \\p trans_B == \\ref rocsparse_operation_transpose is supported.\n\n  \\note\n  Only the \\ref rocsparse_spsm_stage_buffer_size stage and the \\ref rocsparse_spsm_stage_compute stage\n  support execution in a hipGraph context. The \\ref rocsparse_spsm_stage_preprocess stage does not support hipGraph.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  trans_A      matrix operation type for the sparse matrix A.\n  @param[in]\n  trans_B      matrix operation type for the dense matrix B.\n  @param[in]\n  alpha        scalar \\f$\\alpha\\f$.\n  @param[in]\n  matA          sparse matrix descriptor.\n  @param[in]\n  matB          dense matrix descriptor.\n  @param[inout]\n  matC          dense matrix descriptor.\n  @param[in]\n  compute_type floating point precision for the SpSM computation.\n  @param[in]\n  alg          SpSM algorithm for the SpSM computation.\n  @param[in]\n  stage        SpSM stage for the SpSM computation.\n  @param[out]\n  buffer_size  number of bytes of the temporary storage buffer.\n  @param[in]\n  temp_buffer  temporary storage buffer allocated by the user. When a nullptr is passed,\n               the required allocation size (in bytes) is written to \\p buffer_size and\n               function returns without performing the SpSM operation.\n\n  \\retval      rocsparse_status_success the operation completed successfully.\n  \\retval      rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval      rocsparse_status_invalid_pointer \\p alpha, \\p matA, \\p matB, \\p matC, \\p descr or\n               \\p buffer_size pointer is invalid.\n  \\retval      rocsparse_status_not_implemented \\p trans_A, \\p trans_B, \\p compute_type, \\p stage or \\p alg is\n               currently not supported."]
-    pub fn rocsparse_spsm(
-        handle: rocsparse_handle,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        alpha: *const ::std::os::raw::c_void,
-        matA: rocsparse_spmat_descr,
-        matB: rocsparse_dnmat_descr,
-        matC: rocsparse_dnmat_descr,
-        compute_type: rocsparse_datatype,
-        alg: rocsparse_spsm_alg,
-        stage: rocsparse_spsm_stage,
-        buffer_size: *mut usize,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup generic_module\n  \\brief Sparse matrix dense matrix multiplication, extension routine.\n\n  \\details\n  \\p rocsparse_spmm (or \\p rocsparse_spmm_ex ) multiplies the scalar \\f$\\alpha\\f$ with a sparse \\f$m \\times k\\f$\n  matrix \\f$A\\f$, defined in CSR or COO or Blocked ELL storage format, and the dense \\f$k \\times n\\f$\n  matrix \\f$B\\f$ and adds the result to the dense \\f$m \\times n\\f$ matrix \\f$C\\f$ that\n  is multiplied by the scalar \\f$\\beta\\f$, such that\n  \\f[\n    C := \\alpha \\cdot op(A) \\cdot op(B) + \\beta \\cdot C,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans_A == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans_A == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans_A == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n  and\n  \\f[\n    op(B) = \\left\\{\n    \\begin{array}{ll}\n        B,   & \\text{if trans_B == rocsparse_operation_none} \\\\\n        B^T, & \\text{if trans_B == rocsparse_operation_transpose} \\\\\n        B^H, & \\text{if trans_B == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\note\n  Only the \\ref rocsparse_spmm_stage_buffer_size stage and the \\ref rocsparse_spmm_stage_compute stage are non blocking\n  and executed asynchronously with respect to the host. They may return before the actual computation has finished.\n  The \\ref rocsparse_spmm_stage_preprocess stage is blocking with respect to the host.\n\n  \\note\n  Currently, only \\p trans_A == \\ref rocsparse_operation_none is supported for COO and Blocked ELL formats.\n\n  \\note\n  Only the \\ref rocsparse_spmm_stage_buffer_size stage and the \\ref rocsparse_spmm_stage_compute stage\n  support execution in a hipGraph context. The \\ref rocsparse_spmm_stage_preprocess stage does not support hipGraph.\n\n  \\note\n  Currently, only CSR, COO and Blocked ELL sparse formats are supported.\n\n  \\note\n  Different algorithms are available which can provide better performance for different matrices.\n  Currently, the available algorithms are rocsparse_spmm_alg_csr, rocsparse_spmm_alg_csr_row_split\n  or rocsparse_spmm_alg_csr_merge for CSR matrices, rocsparse_spmm_alg_bell for Blocked ELL matrices and\n  rocsparse_spmm_alg_coo_segmented or rocsparse_spmm_alg_coo_atomic for COO matrices. Additionally,\n  one can specify the algorithm to be rocsparse_spmm_alg_default. In the case of CSR matrices this will\n  set the algorithm to be rocsparse_spmm_alg_csr, in the case of Blocked ELL matrices this will set the\n  algorithm to be rocsparse_spmm_alg_bell and for COO matrices it will set the algorithm to be\n  rocsparse_spmm_alg_coo_atomic. When A is transposed, rocsparse_spmm will revert to using\n  rocsparse_spmm_alg_csr for CSR format and rocsparse_spmm_alg_coo_atomic for COO format regardless\n  of algorithm selected.\n\n  \\note\n  This function writes the required allocation size (in bytes) to \\p buffer_size and\n  returns without performing the SpMM operation, when a nullptr is passed for\n  \\p temp_buffer.\n\n  \\note SpMM requires three stages to complete. The first stage\n  \\ref rocsparse_spmm_stage_buffer_size will return the size of the temporary storage buffer\n  that is required for subsequent calls to \\ref rocsparse_spmm (or \\ref rocsparse_spmm_ex). The second stage\n  \\ref rocsparse_spmm_stage_preprocess will preprocess data that would be saved in the temporary storage buffer.\n  In the final stage \\ref rocsparse_spmm_stage_compute, the actual computation is performed.\n  \\note If \\ref rocsparse_spmm_stage_auto is selected, rocSPARSE will automatically detect\n  which stage is required based on the following indicators:\n  If \\p temp_buffer is equal to \\p nullptr, the required buffer size will be returned.\n  Else, the SpMM preprocess and the SpMM algorithm will be executed.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  trans_A      matrix operation type.\n  @param[in]\n  trans_B      matrix operation type.\n  @param[in]\n  alpha        scalar \\f$\\alpha\\f$.\n  @param[in]\n  mat_A        matrix descriptor.\n  @param[in]\n  mat_B        matrix descriptor.\n  @param[in]\n  beta         scalar \\f$\\beta\\f$.\n  @param[in]\n  mat_C        matrix descriptor.\n  @param[in]\n  compute_type floating point precision for the SpMM computation.\n  @param[in]\n  alg          SpMM algorithm for the SpMM computation.\n  @param[in]\n  stage        SpMM stage for the SpMM computation.\n  @param[out]\n  buffer_size  number of bytes of the temporary storage buffer. buffer_size is set when\n               \\p temp_buffer is nullptr.\n  @param[in]\n  temp_buffer  temporary storage buffer allocated by the user. When a nullptr is passed,\n               the required allocation size (in bytes) is written to \\p buffer_size and\n               function returns without performing the SpMM operation.\n\n  \\retval      rocsparse_status_success the operation completed successfully.\n  \\retval      rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval      rocsparse_status_invalid_pointer \\p alpha, \\p mat_A, \\p mat_B, \\p mat_C, \\p beta, or\n               \\p buffer_size pointer is invalid.\n  \\retval      rocsparse_status_not_implemented \\p trans_A, \\p trans_B, \\p compute_type or \\p alg is\n               currently not supported.\n  \\par Example\n  This example performs sparse matrix-dense matrix multiplication, C = alpha * A * B + beta * C\n  \\code{.c}\n      //     1 4 0 0 0 0\n      // A = 0 2 3 0 0 0\n      //     5 0 0 7 8 0\n      //     0 0 9 0 6 0\n\n      //     1 4 2\n      //     1 2 3\n      // B = 5 4 0\n      //     3 1 9\n      //     1 2 2\n      //     0 3 0\n\n      //     1 1 5\n      // C = 1 2 1\n      //     1 3 1\n      //     6 2 4\n\n      rocsparse_int m   = 4;\n      rocsparse_int k   = 6;\n      rocsparse_int n   = 3;\n\n      csr_row_ptr[m + 1] = {0, 1, 3};                                              // device memory\n      csr_col_ind[nnz]   = {0, 0, 1};                                              // device memory\n      csr_val[nnz]       = {1, 0, 4, 2, 0, 3, 5, 0, 0, 0, 0, 9, 7, 0, 8, 6, 0, 0}; // device memory\n\n      B[k * n]       = {1, 1, 5, 3, 1, 0, 4, 2, 4, 1, 2, 3, 2, 3, 0, 9, 2, 0};     // device memory\n      C[m * n]       = {1, 1, 1, 6, 1, 2, 3, 2, 5, 1, 1, 4};                       // device memory\n\n      rocsparse_int nnz = csr_row_ptr[m] - csr_row_ptr[0];\n\n      float alpha = 1.0f;\n      float beta  = 0.0f;\n\n      // Create CSR arrays on device\n      rocsparse_int* csr_row_ptr;\n      rocsparse_int* csr_col_ind;\n      float* csr_val;\n      float* B;\n      float* C;\n      hipMalloc((void**)&csr_row_ptr, sizeof(rocsparse_int) * (m + 1));\n      hipMalloc((void**)&csr_col_ind, sizeof(rocsparse_int) * nnz);\n      hipMalloc((void**)&csr_val, sizeof(float) * nnz);\n      hipMalloc((void**)&B, sizeof(float) * k * n);\n      hipMalloc((void**)&C, sizeof(float) * m * n);\n\n      // Create rocsparse handle\n      rocsparse_local_handle handle;\n\n      // Types\n      rocsparse_indextype itype = rocsparse_indextype_i32;\n      rocsparse_indextype jtype = rocsparse_indextype_i32;\n      rocsparse_datatype  ttype = rocsparse_datatype_f32_r;\n\n      // Create descriptors\n      rocsparse_spmat_descr mat_A;\n      rocsparse_dnmat_descr mat_B;\n      rocsparse_dnmat_descr mat_C;\n\n      rocsparse_create_csr_descr(&mat_A, m, k, nnz, csr_row_ptr, csr_col_ind, csr_val, itype, jtype, rocsparse_index_base_zero, ttype);\n      rocsparse_create_dnmat_descr(&mat_B, k, n, k, B, ttype, rocsparse_order_column);\n      rocsparse_create_dnmat_descr(&mat_C, m, n, m, C, ttype, rocsparse_order_column);\n\n      // Query SpMM buffer\n      size_t buffer_size;\n      rocsparse_spmm(handle,\n                     rocsparse_operation_none,\n                     rocsparse_operation_none,\n                     &alpha,\n                     mat_A,\n                     mat_B,\n                     &beta,\n                     mat_C,\n                     ttype,\n                     rocsparse_spmm_alg_default,\n                     rocsparse_spmm_stage_buffer_size,\n                     &buffer_size,\n                     nullptr));\n\n      // Allocate buffer\n      void* buffer;\n      hipMalloc(&buffer, buffer_size);\n\n      rocsparse_spmm(handle,\n                     rocsparse_operation_none,\n                     rocsparse_operation_none,\n                     &alpha,\n                     mat_A,\n                     mat_B,\n                     &beta,\n                     mat_C,\n                     ttype,\n                     rocsparse_spmm_alg_default,\n                     rocsparse_spmm_stage_preprocess,\n                     &buffer_size,\n                     buffer));\n\n      // Pointer mode host\n      rocsparse_spmm(handle,\n                     rocsparse_operation_none,\n                     rocsparse_operation_none,\n                     &alpha,\n                     mat_A,\n                     mat_B,\n                     &beta,\n                     mat_C,\n                     ttype,\n                     rocsparse_spmm_alg_default,\n                     rocsparse_spmm_stage_compute,\n                     &buffer_size,\n                     buffer));\n\n      // Clear up on device\n      hipFree(csr_row_ptr);\n      hipFree(csr_col_ind);\n      hipFree(csr_val);\n      hipFree(B);\n      hipFree(C);\n      hipFree(temp_buffer);\n\n      rocsparse_destroy_spmat_descr(mat_A);\n      rocsparse_destroy_dnmat_descr(mat_B);\n      rocsparse_destroy_dnmat_descr(mat_C);\n  \\endcode\n\n  \\par Example\n  SpMM also supports batched computation for CSR and COO matrices. There are three supported batch modes:\n      C_i = A * B_i\n      C_i = A_i * B\n      C_i = A_i * B_i\n  The batch mode is determined by the batch count and stride passed for each matrix. For example\n  to use the first batch mode (C_i = A * B_i) with 100 batches for non-transposed A, B, and C, one passes:\n      batch_count_A = 1\n      batch_count_B = 100\n      batch_count_C = 100\n      offsets_batch_stride_A        = 0\n      columns_values_batch_stride_A = 0\n      batch_stride_B                = k * n\n      batch_stride_C                = m * n\n  To use the second batch mode (C_i = A_i * B) one could use:\n      batch_count_A = 100\n      batch_count_B = 1\n      batch_count_C = 100\n      offsets_batch_stride_A        = m + 1\n      columns_values_batch_stride_A = nnz\n      batch_stride_B                = 0\n      batch_stride_C                = m * n\n  And to use the third batch mode (C_i = A_i * B_i) one could use:\n      batch_count_A = 100\n      batch_count_B = 100\n      batch_count_C = 100\n      offsets_batch_stride_A        = m + 1\n      columns_values_batch_stride_A = nnz\n      batch_stride_B                = k * n\n      batch_stride_C                = m * n\n  An example of the first batch mode (C_i = A * B_i) is provided below.\n  \\code{.c}\n      //     1 4 0 0 0 0\n      // A = 0 2 3 0 0 0\n      //     5 0 0 7 8 0\n      //     0 0 9 0 6 0\n\n      rocsparse_int m   = 4;\n      rocsparse_int k   = 6;\n      rocsparse_int n   = 3;\n\n      csr_row_ptr[m + 1] = {0, 1, 3};                                              // device memory\n      csr_col_ind[nnz]   = {0, 0, 1};                                              // device memory\n      csr_val[nnz]       = {1, 0, 4, 2, 0, 3, 5, 0, 0, 0, 0, 9, 7, 0, 8, 6, 0, 0}; // device memory\n\n      B[batch_count_B * k * n]       = {...};     // device memory\n      C[batch_count_C * m * n]       = {...};     // device memory\n\n      rocsparse_int nnz = csr_row_ptr[m] - csr_row_ptr[0];\n\n      rocsparse_int batch_count_A = 1;\n      rocsparse_int batch_count_B = 100;\n      rocsparse_int batch_count_C = 100;\n\n      rocsparse_int offsets_batch_stride_A        = 0;\n      rocsparse_int columns_values_batch_stride_A = 0;\n      rocsparse_int batch_stride_B                = k * n;\n      rocsparse_int batch_stride_C                = m * n;\n\n      float alpha = 1.0f;\n      float beta  = 0.0f;\n\n      // Create CSR arrays on device\n      rocsparse_int* csr_row_ptr;\n      rocsparse_int* csr_col_ind;\n      float* csr_val;\n      float* B;\n      float* C;\n      hipMalloc((void**)&csr_row_ptr, sizeof(rocsparse_int) * (m + 1));\n      hipMalloc((void**)&csr_col_ind, sizeof(rocsparse_int) * nnz);\n      hipMalloc((void**)&csr_val, sizeof(float) * nnz);\n      hipMalloc((void**)&B, sizeof(float) * batch_count_B * k * n);\n      hipMalloc((void**)&C, sizeof(float) * batch_count_C * m * n);\n\n      // Create rocsparse handle\n      rocsparse_local_handle handle;\n\n      // Types\n      rocsparse_indextype itype = rocsparse_indextype_i32;\n      rocsparse_indextype jtype = rocsparse_indextype_i32;\n      rocsparse_datatype  ttype = rocsparse_datatype_f32_r;\n\n      // Create descriptors\n      rocsparse_spmat_descr mat_A;\n      rocsparse_dnmat_descr mat_B;\n      rocsparse_dnmat_descr mat_C;\n\n      rocsparse_create_csr_descr(&mat_A, m, k, nnz, csr_row_ptr, csr_col_ind, csr_val, itype, jtype, rocsparse_index_base_zero, ttype);\n      rocsparse_create_dnmat_descr(&mat_B, k, n, k, B, ttype, rocsparse_order_column);\n      rocsparse_create_dnmat_descr(&mat_C, m, n, m, C, ttype, rocsparse_order_column);\n\n      rocsparse_csr_set_strided_batch(mat_A, batch_count_A, offsets_batch_stride_A, columns_values_batch_stride_A);\n      rocsparse_dnmat_set_strided_batch(B, batch_count_B, batch_stride_B);\n      rocsparse_dnmat_set_strided_batch(C, batch_count_C, batch_stride_C);\n\n      // Query SpMM buffer\n      size_t buffer_size;\n      rocsparse_spmm(handle,\n                     rocsparse_operation_none,\n                     rocsparse_operation_none,\n                     &alpha,\n                     mat_A,\n                     mat_B,\n                     &beta,\n                     mat_C,\n                     ttype,\n                     rocsparse_spmm_alg_default,\n                     rocsparse_spmm_stage_buffer_size,\n                     &buffer_size,\n                     nullptr));\n\n      // Allocate buffer\n      void* buffer;\n      hipMalloc(&buffer, buffer_size);\n\n      rocsparse_spmm(handle,\n                     rocsparse_operation_none,\n                     rocsparse_operation_none,\n                     &alpha,\n                     mat_A,\n                     mat_B,\n                     &beta,\n                     mat_C,\n                     ttype,\n                     rocsparse_spmm_alg_default,\n                     rocsparse_spmm_stage_preprocess,\n                     &buffer_size,\n                     buffer));\n\n      // Pointer mode host\n      rocsparse_spmm(handle,\n                     rocsparse_operation_none,\n                     rocsparse_operation_none,\n                     &alpha,\n                     mat_A,\n                     mat_B,\n                     &beta,\n                     mat_C,\n                     ttype,\n                     rocsparse_spmm_alg_default,\n                     rocsparse_spmm_stage_compute,\n                     &buffer_size,\n                     buffer));\n\n      // Clear up on device\n      hipFree(csr_row_ptr);\n      hipFree(csr_col_ind);\n      hipFree(csr_val);\n      hipFree(B);\n      hipFree(C);\n      hipFree(temp_buffer);\n\n      rocsparse_destroy_spmat_descr(mat_A);\n      rocsparse_destroy_dnmat_descr(mat_B);\n      rocsparse_destroy_dnmat_descr(mat_C);\n  \\endcode\n/\n/**@{"]
-    pub fn rocsparse_spmm_ex(
-        handle: rocsparse_handle,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        alpha: *const ::std::os::raw::c_void,
-        mat_A: rocsparse_spmat_descr,
-        mat_B: rocsparse_dnmat_descr,
-        beta: *const ::std::os::raw::c_void,
-        mat_C: rocsparse_dnmat_descr,
-        compute_type: rocsparse_datatype,
-        alg: rocsparse_spmm_alg,
-        stage: rocsparse_spmm_stage,
-        buffer_size: *mut usize,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    pub fn rocsparse_spmm(
-        handle: rocsparse_handle,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        alpha: *const ::std::os::raw::c_void,
-        mat_A: rocsparse_spmat_descr,
-        mat_B: rocsparse_dnmat_descr,
-        beta: *const ::std::os::raw::c_void,
-        mat_C: rocsparse_dnmat_descr,
-        compute_type: rocsparse_datatype,
-        alg: rocsparse_spmm_alg,
-        stage: rocsparse_spmm_stage,
-        buffer_size: *mut usize,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup generic_module\n  \\brief Sparse matrix sparse matrix multiplication\n\n  \\details\n  \\ref rocsparse_spgemm multiplies the scalar \\f$\\alpha\\f$ with the sparse\n  \\f$m \\times k\\f$ matrix \\f$A\\f$ and the sparse \\f$k \\times n\\f$ matrix \\f$B\\f$ and\n  adds the result to the sparse \\f$m \\times n\\f$ matrix \\f$D\\f$ that is multiplied by\n  \\f$\\beta\\f$. The final result is stored in the sparse \\f$m \\times n\\f$ matrix \\f$C\\f$,\n  such that\n  \\f[\n    C := \\alpha \\cdot op(A) \\cdot op(B) + \\beta \\cdot D,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if trans_A == rocsparse_operation_none} \\\\\n        A^T, & \\text{if trans_A == rocsparse_operation_transpose} \\\\\n        A^H, & \\text{if trans_A == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n  and\n  \\f[\n    op(B) = \\left\\{\n    \\begin{array}{ll}\n        B,   & \\text{if trans_B == rocsparse_operation_none} \\\\\n        B^T, & \\text{if trans_B == rocsparse_operation_transpose} \\\\\n        B^H, & \\text{if trans_B == rocsparse_operation_conjugate_transpose}\n    \\end{array}\n    \\right.\n  \\f]\n\n  \\note SpGEMM requires three stages to complete. The first stage\n  \\ref rocsparse_spgemm_stage_buffer_size will return the size of the temporary storage buffer\n  that is required for subsequent calls to \\ref rocsparse_spgemm. The second stage\n  \\ref rocsparse_spgemm_stage_nnz will determine the number of non-zero elements of the\n  resulting \\f$C\\f$ matrix. If the sparsity pattern of \\f$C\\f$ is already known, this\n  stage can be skipped. In the final stage \\ref rocsparse_spgemm_stage_compute, the actual\n  computation is performed.\n  \\note If \\ref rocsparse_spgemm_stage_auto is selected, rocSPARSE will automatically detect\n  which stage is required based on the following indicators:\n  If \\p temp_buffer is equal to \\p nullptr, the required buffer size will be returned.\n  Else, if the number of non-zeros of \\f$C\\f$ is zero, the number of non-zero entries will be\n  computed.\n  Else, the SpGEMM algorithm will be executed.\n  \\note If \\f$\\alpha == 0\\f$, then \\f$C = \\beta \\cdot D\\f$ will be computed.\n  \\note If \\f$\\beta == 0\\f$, then \\f$C = \\alpha \\cdot op(A) \\cdot op(B)\\f$ will be\n  computed.\n  \\note Currently only CSR and BSR formats are supported.\n  \\note If \\ref rocsparse_spgemm_stage_symbolic is selected then the symbolic computation is performed only.\n  \\note If \\ref rocsparse_spgemm_stage_numeric is selected then the numeric computation is performed only.\n  \\note For the \\ref rocsparse_spgemm_stage_symbolic and \\ref rocsparse_spgemm_stage_numeric stages, only\n  CSR matrix format is currently supported.\n  \\note \\f$\\alpha == beta == 0\\f$ is invalid.\n  \\note It is allowed to pass the same sparse matrix for \\f$C\\f$ and \\f$D\\f$, if both\n  matrices have the same sparsity pattern.\n  \\note Currently, only \\p trans_A == \\ref rocsparse_operation_none is supported.\n  \\note Currently, only \\p trans_B == \\ref rocsparse_operation_none is supported.\n  \\note This function is non blocking and executed asynchronously with respect to the\n        host. It may return before the actual computation has finished.\n  \\note Please note, that for rare matrix products with more than 4096 non-zero entries\n  per row, additional temporary storage buffer is allocated by the algorithm.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  trans_A      sparse matrix \\f$A\\f$ operation type.\n  @param[in]\n  trans_B      sparse matrix \\f$B\\f$ operation type.\n  @param[in]\n  alpha        scalar \\f$\\alpha\\f$.\n  @param[in]\n  A            sparse matrix \\f$A\\f$ descriptor.\n  @param[in]\n  B            sparse matrix \\f$B\\f$ descriptor.\n  @param[in]\n  beta         scalar \\f$\\beta\\f$.\n  @param[in]\n  D            sparse matrix \\f$D\\f$ descriptor.\n  @param[out]\n  C            sparse matrix \\f$C\\f$ descriptor.\n  @param[in]\n  compute_type floating point precision for the SpGEMM computation.\n  @param[in]\n  alg          SpGEMM algorithm for the SpGEMM computation.\n  @param[in]\n  stage        SpGEMM stage for the SpGEMM computation.\n  @param[out]\n  buffer_size  number of bytes of the temporary storage buffer. buffer_size is set when\n               \\p temp_buffer is nullptr.\n  @param[in]\n  temp_buffer  temporary storage buffer allocated by the user. When a nullptr is passed,\n               the required allocation size (in bytes) is written to \\p buffer_size and\n               function returns without performing the SpGEMM operation.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_pointer \\p alpha and \\p beta are invalid,\n          \\p A, \\p B, \\p D, \\p C or \\p buffer_size pointer is invalid.\n  \\retval rocsparse_status_memory_error additional buffer for long rows could not be\n          allocated.\n  \\retval rocsparse_status_not_implemented\n          \\p trans_A != \\ref rocsparse_operation_none or\n          \\p trans_B != \\ref rocsparse_operation_none."]
-    pub fn rocsparse_spgemm(
-        handle: rocsparse_handle,
-        trans_A: rocsparse_operation,
-        trans_B: rocsparse_operation,
-        alpha: *const ::std::os::raw::c_void,
-        A: rocsparse_spmat_descr,
-        B: rocsparse_spmat_descr,
-        beta: *const ::std::os::raw::c_void,
-        D: rocsparse_spmat_descr,
-        C: rocsparse_spmat_descr,
-        compute_type: rocsparse_datatype,
-        alg: rocsparse_spgemm_alg,
-        stage: rocsparse_spgemm_stage,
-        buffer_size: *mut usize,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup generic_module\n  \\brief  Sampled Dense-Dense Matrix Multiplication.\n\n  \\details\n  \\ref rocsparse_sddmm multiplies the scalar \\f$\\alpha\\f$ with the dense\n  \\f$m \\times k\\f$ matrix \\f$A\\f$, the dense \\f$k \\times n\\f$ matrix \\f$B\\f$, filtered by the sparsity pattern of the \\f$m \\times n\\f$ sparse matrix \\f$C\\f$ and\n  adds the result to \\f$C\\f$ scaled by\n  \\f$\\beta\\f$. The final result is stored in the sparse \\f$m \\times n\\f$ matrix \\f$C\\f$,\n  such that\n  \\f[\n    C := \\alpha ( opA(A) \\cdot opB(B) ) \\cdot spy(C) + \\beta C,\n  \\f]\n  with\n  \\f[\n    op(A) = \\left\\{\n    \\begin{array}{ll}\n        A,   & \\text{if opA == rocsparse_operation_none} \\\\\n        A^T,   & \\text{if opA == rocsparse_operation_transpose} \\\\\n    \\end{array}\n    \\right.\n  \\f],\n  \\f[\n    op(B) = \\left\\{\n    \\begin{array}{ll}\n        B,   & \\text{if opB == rocsparse_operation_none} \\\\\n        B^T,   & \\text{if opB == rocsparse_operation_transpose} \\\\\n    \\end{array}\n    \\right.\n  \\f]\n   and\n  \\f[\n    spy(C)_ij = \\left\\{\n    \\begin{array}{ll}\n        1 \\text{if i == j},   & 0 \\text{if i != j} \\\\\n    \\end{array}\n    \\right.\n  \\f]\n  \\note \\p opA == \\ref rocsparse_operation_conjugate_transpose is not supported.\n  \\note \\p opB == \\ref rocsparse_operation_conjugate_transpose is not supported.\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  opA      dense matrix \\f$A\\f$ operation type.\n  @param[in]\n  opB      dense matrix \\f$B\\f$ operation type.\n  @param[in]\n  alpha        scalar \\f$\\alpha\\f$.\n  @param[in]\n  A            dense matrix \\f$A\\f$ descriptor.\n  @param[in]\n  B            dense matrix \\f$B\\f$ descriptor.\n  @param[in]\n  beta         scalar \\f$\\beta\\f$.\n  @param[inout]\n  C            sparse matrix \\f$C\\f$ descriptor.\n  @param[in]\n  compute_type floating point precision for the SDDMM computation.\n  @param[in]\n  alg specification of the algorithm to use.\n  @param[in]\n  temp_buffer  temporary storage buffer allocated by the user.\n  The size must be greater or equal to the size obtained with \\ref rocsparse_sddmm_buffer_size.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_value the value of \\p trans\\_A, \\p trans\\_B, \\p compute\\_type or alg is incorrect.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_pointer \\p alpha and \\p beta are invalid,\n          \\p A, \\p B, \\p D, \\p C or \\p temp_buffer pointer is invalid.\n  \\retval rocsparse_status_not_implemented\n          \\p opA == \\ref rocsparse_operation_conjugate_transpose or\n          \\p opB == \\ref rocsparse_operation_conjugate_transpose."]
-    pub fn rocsparse_sddmm(
-        handle: rocsparse_handle,
-        opA: rocsparse_operation,
-        opB: rocsparse_operation,
-        alpha: *const ::std::os::raw::c_void,
-        A: rocsparse_dnmat_descr,
-        B: rocsparse_dnmat_descr,
-        beta: *const ::std::os::raw::c_void,
-        C: rocsparse_spmat_descr,
-        compute_type: rocsparse_datatype,
-        alg: rocsparse_sddmm_alg,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup generic_module\n  \\brief Calculate the size in bytes of the required buffer for the use of \\ref rocsparse_sddmm and \\ref rocsparse_sddmm_preprocess\n\n  \\details\n  \\ref rocsparse_sddmm_buffer_size returns the size of the required buffer to execute the SDDMM operation from a given configuration.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  opA      dense matrix \\f$A\\f$ operation type.\n  @param[in]\n  opB      dense matrix \\f$B\\f$ operation type.\n  @param[in]\n  alpha        scalar \\f$\\alpha\\f$.\n  @param[in]\n  A            dense matrix \\f$A\\f$ descriptor.\n  @param[in]\n  B            dense matrix \\f$B\\f$ descriptor.\n  @param[in]\n  beta         scalar \\f$\\beta\\f$.\n  @param[inout]\n  C            sparse matrix \\f$C\\f$ descriptor.\n  @param[in]\n  compute_type floating point precision for the SDDMM computation.\n  @param[in]\n  alg specification of the algorithm to use.\n  @param[out]\n  buffer_size  number of bytes of the temporary storage buffer.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_value the value of \\p trans\\_A or \\p trans\\_B is incorrect.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_pointer \\p alpha and \\p beta are invalid,\n          \\p A, \\p B, \\p D, \\p C or \\p buffer_size pointer is invalid.\n  \\retval rocsparse_status_not_implemented\n          \\p opA == \\ref rocsparse_operation_conjugate_transpose or\n          \\p opB == \\ref rocsparse_operation_conjugate_transpose."]
-    pub fn rocsparse_sddmm_buffer_size(
-        handle: rocsparse_handle,
-        opA: rocsparse_operation,
-        opB: rocsparse_operation,
-        alpha: *const ::std::os::raw::c_void,
-        A: rocsparse_dnmat_descr,
-        B: rocsparse_dnmat_descr,
-        beta: *const ::std::os::raw::c_void,
-        C: rocsparse_spmat_descr,
-        compute_type: rocsparse_datatype,
-        alg: rocsparse_sddmm_alg,
-        buffer_size: *mut usize,
-    ) -> rocsparse_status;
-}
-extern "C" {
-    #[must_use]
-    #[doc = " \\ingroup generic_module\n  \\brief Preprocess data before the use of \\ref rocsparse_sddmm.\n\n  \\details\n  \\ref rocsparse_sddmm_preprocess executes a part of the algorithm that can be calculated once in the context of multiple calls of the \\ref rocsparse_sddmm\n  with the same sparsity pattern.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n  @param[in]\n  handle       handle to the rocsparse library context queue.\n  @param[in]\n  opA      dense matrix \\f$A\\f$ operation type.\n  @param[in]\n  opB      dense matrix \\f$B\\f$ operation type.\n  @param[in]\n  alpha        scalar \\f$\\alpha\\f$.\n  @param[in]\n  A            dense matrix \\f$A\\f$ descriptor.\n  @param[in]\n  B            dense matrix \\f$B\\f$ descriptor.\n  @param[in]\n  beta         scalar \\f$\\beta\\f$.\n  @param[inout]\n  C            sparse matrix \\f$C\\f$ descriptor.\n  @param[in]\n  compute_type floating point precision for the SDDMM computation.\n  @param[in]\n  alg specification of the algorithm to use.\n  @param[in]\n  temp_buffer  temporary storage buffer allocated by the user.\n  The size must be greater or equal to the size obtained with \\ref rocsparse_sddmm_buffer_size.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_value the value of \\p trans\\_A or \\p trans\\_B is incorrect.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_pointer \\p alpha and \\p beta are invalid,\n          \\p A, \\p B, \\p D, \\p C or \\p temp_buffer pointer is invalid.\n  \\retval rocsparse_status_not_implemented\n          \\p opA == \\ref rocsparse_operation_conjugate_transpose or\n          \\p opB == \\ref rocsparse_operation_conjugate_transpose."]
-    pub fn rocsparse_sddmm_preprocess(
-        handle: rocsparse_handle,
-        opA: rocsparse_operation,
-        opB: rocsparse_operation,
-        alpha: *const ::std::os::raw::c_void,
-        A: rocsparse_dnmat_descr,
-        B: rocsparse_dnmat_descr,
-        beta: *const ::std::os::raw::c_void,
-        C: rocsparse_spmat_descr,
-        compute_type: rocsparse_datatype,
-        alg: rocsparse_sddmm_alg,
-        temp_buffer: *mut ::std::os::raw::c_void,
-    ) -> rocsparse_status;
-}
 extern "C" {
     #[must_use]
     #[doc = " \\ingroup reordering_module\n  \\brief Coloring of the adjacency graph of the matrix \\f$A\\f$ stored in the CSR format.\n\n  \\details\n  \\p rocsparse_csrcolor performs the coloring of the undirected graph represented by the (symmetric) sparsity pattern of the\n  matrix \\f$A\\f$ stored in CSR format. Graph coloring is a way of coloring the nodes of a graph such that no two adjacent nodes\n  are of the same color. The \\p fraction_to_color is a parameter to only color a given percentage of the graph nodes, the\n  remaining uncolored nodes receive distinct new colors. The optional \\p reordering array is a permutation array such that\n  unknowns of the same color are grouped. The matrix \\f$A\\f$ must be stored as a general matrix with a symmetric sparsity pattern,\n  and if the matrix \\f$A\\f$ is non-symmetric then the user is responsible to provide the symmetric part \\f$\\frac{A+A^T}{2}\\f$.\n\n  \\note\n  This function is blocking with respect to the host.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of sparse matrix \\f$A\\f$.\n  @param[in]\n  nnz         number of non-zero entries of sparse matrix \\f$A\\f$.\n  @param[in]\n  descr      sparse matrix descriptor.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[in]\n  fraction_to_color  fraction of nodes to be colored, which should be in the interval [0.0,1.0], for example 0.8 implies that 80 percent of nodes will be colored.\n  @param[out]\n  ncolors      resulting number of distinct colors.\n  @param[out]\n  coloring     resulting mapping of colors.\n  @param[out]\n  reordering   optional resulting reordering permutation if \\p reordering is a non-null pointer.\n  @param[inout]\n  info    structure that holds the information collected during the coloring algorithm.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_size \\p m or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p descr, \\p csr_val, \\p csr_row_ptr, \\p csr_col_ind, \\p fraction_to_color, \\p ncolors, \\p coloring or \\p info pointer is invalid.\n/\n/**@{"]
@@ -10629,3 +10323,922 @@ extern "C" {
         info: rocsparse_mat_info,
     ) -> rocsparse_status;
 }
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_coo_buffer_size computes the required buffer size needed when\n  calling \\p rocsparse_check_matrix_coo\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  n           number of columns of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  coo_val     array of \\p nnz elements of the sparse COO matrix.\n  @param[in]\n  coo_row_ind array of \\p nnz elements containing the row indices of the sparse\n              COO matrix.\n  @param[in]\n  coo_col_ind array of \\p nnz elements containing the column indices of the sparse\n              COO matrix.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_scheck_matrix_coo(), rocsparse_dcheck_matrix_coo(),\n              rocsparse_ccheck_matrix_coo() and rocsparse_zcheck_matrix_coo().\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_size \\p m \\p n or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p coo_val, \\p coo_row_ind, \\p coo_col_ind or \\p buffer_size pointer\n          is invalid.\n/\n/**@{"]
+    pub fn rocsparse_scheck_matrix_coo_buffer_size(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        coo_val: *const f32,
+        coo_row_ind: *const rocsparse_int,
+        coo_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcheck_matrix_coo_buffer_size(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        coo_val: *const f64,
+        coo_row_ind: *const rocsparse_int,
+        coo_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccheck_matrix_coo_buffer_size(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        coo_val: *const rocsparse_float_complex,
+        coo_row_ind: *const rocsparse_int,
+        coo_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcheck_matrix_coo_buffer_size(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        coo_val: *const rocsparse_double_complex,
+        coo_row_ind: *const rocsparse_int,
+        coo_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_coo checks if the input COO matrix is valid.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse COO matrix.\n  @param[in]\n  n           number of columns of the sparse COO matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse COO matrix.\n  @param[in]\n  coo_val     array of \\p nnz elements of the sparse COO matrix.\n  @param[in]\n  coo_row_ind array of \\p nnz elements containing the row indices of the sparse\n              COO matrix.\n  @param[in]\n  coo_col_ind array of \\p nnz elements containing the column indices of the sparse\n              COO matrix.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  data_status modified to indicate the status of the data\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_size \\p m \\p n or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p coo_val, \\p coo_row_ind, \\p coo_col_ind, \\p temp_buffer or \\p data_status  pointer\n          is invalid.\n/\n/**@{"]
+    pub fn rocsparse_scheck_matrix_coo(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        coo_val: *const f32,
+        coo_row_ind: *const rocsparse_int,
+        coo_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        data_status: *mut rocsparse_data_status,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcheck_matrix_coo(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        coo_val: *const f64,
+        coo_row_ind: *const rocsparse_int,
+        coo_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        data_status: *mut rocsparse_data_status,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccheck_matrix_coo(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        coo_val: *const rocsparse_float_complex,
+        coo_row_ind: *const rocsparse_int,
+        coo_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        data_status: *mut rocsparse_data_status,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcheck_matrix_coo(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        coo_val: *const rocsparse_double_complex,
+        coo_row_ind: *const rocsparse_int,
+        coo_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        data_status: *mut rocsparse_data_status,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_csc_buffer_size computes the required buffer size needed when\n  calling \\p rocsparse_check_matrix_csc\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse CSC matrix.\n  @param[in]\n  n           number of columns of the sparse CSC matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSC matrix.\n  @param[in]\n  csc_val     array of \\p nnz elements of the sparse CSC matrix.\n  @param[in]\n  csc_col_ptr array of \\p m+1 elements that point to the start of every column of the\n              sparse CSC matrix.\n  @param[in]\n  csc_row_ind array of \\p nnz elements containing the row indices of the sparse\n              CSC matrix.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_scheck_matrix_csc(), rocsparse_dcheck_matrix_csc(),\n              rocsparse_ccheck_matrix_csc() and rocsparse_zcheck_matrix_csc().\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_size \\p m \\p n or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p csc_val, \\p csc_col_ptr, \\p csc_row_ind or \\p buffer_size pointer\n          is invalid.\n/\n/**@{"]
+    pub fn rocsparse_scheck_matrix_csc_buffer_size(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        csc_val: *const f32,
+        csc_col_ptr: *const rocsparse_int,
+        csc_row_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcheck_matrix_csc_buffer_size(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        csc_val: *const f64,
+        csc_col_ptr: *const rocsparse_int,
+        csc_row_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccheck_matrix_csc_buffer_size(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        csc_val: *const rocsparse_float_complex,
+        csc_col_ptr: *const rocsparse_int,
+        csc_row_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcheck_matrix_csc_buffer_size(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        csc_val: *const rocsparse_double_complex,
+        csc_col_ptr: *const rocsparse_int,
+        csc_row_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_csc checks if the input CSC matrix is valid.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse CSC matrix.\n  @param[in]\n  n           number of columns of the sparse CSC matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSC matrix.\n  @param[in]\n  csc_val     array of \\p nnz elements of the sparse CSC matrix.\n  @param[in]\n  csc_col_ptr array of \\p m+1 elements that point to the start of every column of the\n              sparse CSC matrix.\n  @param[in]\n  csc_row_ind array of \\p nnz elements containing the row indices of the sparse\n              CSC matrix.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  data_status modified to indicate the status of the data\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_size \\p m \\p n or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p csc_val, \\p csc_col_ptr, \\p csc_row_ind, \\p temp_buffer or \\p data_status pointer\n          is invalid.\n/\n/**@{"]
+    pub fn rocsparse_scheck_matrix_csc(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        csc_val: *const f32,
+        csc_col_ptr: *const rocsparse_int,
+        csc_row_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        data_status: *mut rocsparse_data_status,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcheck_matrix_csc(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        csc_val: *const f64,
+        csc_col_ptr: *const rocsparse_int,
+        csc_row_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        data_status: *mut rocsparse_data_status,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccheck_matrix_csc(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        csc_val: *const rocsparse_float_complex,
+        csc_col_ptr: *const rocsparse_int,
+        csc_row_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        data_status: *mut rocsparse_data_status,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcheck_matrix_csc(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        csc_val: *const rocsparse_double_complex,
+        csc_col_ptr: *const rocsparse_int,
+        csc_row_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        data_status: *mut rocsparse_data_status,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_csr_buffer_size computes the required buffer size needed when calling \\p rocsparse_check_matrix_csr\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  n           number of columns of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_scheck_matrix_csr(), rocsparse_dcheck_matrix_csr(),\n              rocsparse_ccheck_matrix_csr() and rocsparse_zcheck_matrix_csr().\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_size \\p m \\p n or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p csr_val, \\p csr_row_ptr, \\p csr_col_ind or \\p buffer_size pointer\n          is invalid.\n/\n/**@{"]
+    pub fn rocsparse_scheck_matrix_csr_buffer_size(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        csr_val: *const f32,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcheck_matrix_csr_buffer_size(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        csr_val: *const f64,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccheck_matrix_csr_buffer_size(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        csr_val: *const rocsparse_float_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcheck_matrix_csr_buffer_size(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        csr_val: *const rocsparse_double_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_csr checks if the input CSR matrix is valid.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse CSR matrix.\n  @param[in]\n  n           number of columns of the sparse CSR matrix.\n  @param[in]\n  nnz         number of non-zero entries of the sparse CSR matrix.\n  @param[in]\n  csr_val     array of \\p nnz elements of the sparse CSR matrix.\n  @param[in]\n  csr_row_ptr array of \\p m+1 elements that point to the start of every row of the\n              sparse CSR matrix.\n  @param[in]\n  csr_col_ind array of \\p nnz elements containing the column indices of the sparse\n              CSR matrix.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  data_status modified to indicate the status of the data\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_size \\p m \\p n or \\p nnz is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p csr_val, \\p csr_row_ptr, \\p csr_col_ind, \\p temp_buffer or \\p data_status pointer\n          is invalid.\n/\n/**@{"]
+    pub fn rocsparse_scheck_matrix_csr(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        csr_val: *const f32,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        data_status: *mut rocsparse_data_status,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcheck_matrix_csr(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        csr_val: *const f64,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        data_status: *mut rocsparse_data_status,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccheck_matrix_csr(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        csr_val: *const rocsparse_float_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        data_status: *mut rocsparse_data_status,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcheck_matrix_csr(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        nnz: rocsparse_int,
+        csr_val: *const rocsparse_double_complex,
+        csr_row_ptr: *const rocsparse_int,
+        csr_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        data_status: *mut rocsparse_data_status,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_ell_buffer_size computes the required buffer size needed when\n  calling \\p rocsparse_check_matrix_ell\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse ELL matrix.\n  @param[in]\n  n           number of columns of the sparse ELL matrix.\n  @param[in]\n  ell_width   number of non-zero elements per row of the sparse ELL matrix.\n  @param[in]\n  ell_val     array that contains the elements of the sparse ELL matrix. Padded\n              elements should be zero.\n  @param[in]\n  ell_col_ind array that contains the column indices of the sparse ELL matrix.\n              Padded column indices should be -1.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_scheck_matrix_ell(), rocsparse_dcheck_matrix_ell(),\n              rocsparse_ccheck_matrix_ell() and rocsparse_zcheck_matrix_ell().\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_size \\p m \\p n or \\p ell_width is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p ell_val, \\p ell_col_ind or \\p buffer_size pointer\n          is invalid.\n/\n/**@{"]
+    pub fn rocsparse_scheck_matrix_ell_buffer_size(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        ell_width: rocsparse_int,
+        ell_val: *const f32,
+        ell_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcheck_matrix_ell_buffer_size(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        ell_width: rocsparse_int,
+        ell_val: *const f64,
+        ell_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccheck_matrix_ell_buffer_size(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        ell_width: rocsparse_int,
+        ell_val: *const rocsparse_float_complex,
+        ell_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcheck_matrix_ell_buffer_size(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        ell_width: rocsparse_int,
+        ell_val: *const rocsparse_double_complex,
+        ell_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_ell checks if the input ELL matrix is valid.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  m           number of rows of the sparse ELL matrix.\n  @param[in]\n  n           number of columns of the sparse ELL matrix.\n  @param[in]\n  ell_width   number of non-zero elements per row of the sparse ELL matrix.\n  @param[in]\n  ell_val     array that contains the elements of the sparse ELL matrix. Padded\n              elements should be zero.\n  @param[in]\n  ell_col_ind array that contains the column indices of the sparse ELL matrix.\n              Padded column indices should be -1.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  data_status modified to indicate the status of the data\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_size \\p m \\p n or \\p ell_width is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p ell_val, \\p ell_col_ind, \\p temp_buffer or \\p data_status pointer\n          is invalid.\n/\n/**@{"]
+    pub fn rocsparse_scheck_matrix_ell(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        ell_width: rocsparse_int,
+        ell_val: *const f32,
+        ell_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        data_status: *mut rocsparse_data_status,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcheck_matrix_ell(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        ell_width: rocsparse_int,
+        ell_val: *const f64,
+        ell_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        data_status: *mut rocsparse_data_status,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccheck_matrix_ell(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        ell_width: rocsparse_int,
+        ell_val: *const rocsparse_float_complex,
+        ell_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        data_status: *mut rocsparse_data_status,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcheck_matrix_ell(
+        handle: rocsparse_handle,
+        m: rocsparse_int,
+        n: rocsparse_int,
+        ell_width: rocsparse_int,
+        ell_val: *const rocsparse_double_complex,
+        ell_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        data_status: *mut rocsparse_data_status,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_gebsc_buffer_size computes the required buffer size needed when\n  calling \\p rocsparse_check_matrix_gebsc\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir          matrix storage of GEBSC blocks.\n  @param[in]\n  mb           number of block rows of the sparse GEBSC matrix.\n  @param[in]\n  nb           number of block columns of the sparse GEBSC matrix.\n  @param[in]\n  nnzb         number of non-zero blocks of the sparse GEBSC matrix.\n  @param[in]\n  row_block_dim row block dimension of the sparse GEBSC matrix.\n  @param[in]\n  col_block_dim column block dimension of the sparse GEBSC matrix.\n  @param[in]\n  bsc_val     array of \\p nnzb elements of the sparse GEBSC matrix.\n  @param[in]\n  bsc_col_ptr array of \\p nb+1 elements that point to the start of every column of the\n              sparse GEBSC matrix.\n  @param[in]\n  bsc_row_ind array of \\p nnzb elements containing the row indices of the sparse\n              GEBSC matrix.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_scheck_matrix_gebsc(), rocsparse_dcheck_matrix_gebsc(),\n              rocsparse_ccheck_matrix_gebsc() and rocsparse_zcheck_matrix_gebsc().\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p dir or \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_size \\p mb \\p nb \\p nnzb \\p row_block_dim or \\p col_block_dim is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p bsc_val, \\p bsc_col_ptr, \\p bsc_row_ind or \\p buffer_size pointer\n          is invalid.\n/\n/**@{"]
+    pub fn rocsparse_scheck_matrix_gebsc_buffer_size(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        bsc_val: *const f32,
+        bsc_col_ptr: *const rocsparse_int,
+        bsc_row_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcheck_matrix_gebsc_buffer_size(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        bsc_val: *const f64,
+        bsc_col_ptr: *const rocsparse_int,
+        bsc_row_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccheck_matrix_gebsc_buffer_size(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        bsc_val: *const rocsparse_float_complex,
+        bsc_col_ptr: *const rocsparse_int,
+        bsc_row_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcheck_matrix_gebsc_buffer_size(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        bsc_val: *const rocsparse_double_complex,
+        bsc_col_ptr: *const rocsparse_int,
+        bsc_row_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_gebsc checks if the input GEBSC matrix is valid.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir          matrix storage of GEBSC blocks.\n  @param[in]\n  mb           number of block rows of the sparse GEBSC matrix.\n  @param[in]\n  nb           number of block columns of the sparse GEBSC matrix.\n  @param[in]\n  nnzb         number of non-zero blocks of the sparse GEBSC matrix.\n  @param[in]\n  row_block_dim row block dimension of the sparse GEBSC matrix.\n  @param[in]\n  col_block_dim column block dimension of the sparse GEBSC matrix.\n  @param[in]\n  bsc_val     array of \\p nnzb elements of the sparse GEBSC matrix.\n  @param[in]\n  bsc_col_ptr array of \\p nb+1 elements that point to the start of every column of the\n              sparse GEBSC matrix.\n  @param[in]\n  bsc_row_ind array of \\p nnzb elements containing the row indices of the sparse\n              GEBSC matrix.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  data_status modified to indicate the status of the data\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p dir or \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_size \\p mb \\p nb \\p nnzb \\p row_block_dim or \\p col_block_dim is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p bsc_val, \\p bsc_col_ptr, \\p bsc_row_ind, \\p temp_buffer or \\p data_status pointer\n          is invalid.\n/\n/**@{"]
+    pub fn rocsparse_scheck_matrix_gebsc(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        bsc_val: *const f32,
+        bsc_col_ptr: *const rocsparse_int,
+        bsc_row_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        data_status: *mut rocsparse_data_status,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcheck_matrix_gebsc(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        bsc_val: *const f64,
+        bsc_col_ptr: *const rocsparse_int,
+        bsc_row_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        data_status: *mut rocsparse_data_status,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccheck_matrix_gebsc(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        bsc_val: *const rocsparse_float_complex,
+        bsc_col_ptr: *const rocsparse_int,
+        bsc_row_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        data_status: *mut rocsparse_data_status,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcheck_matrix_gebsc(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        bsc_val: *const rocsparse_double_complex,
+        bsc_col_ptr: *const rocsparse_int,
+        bsc_row_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        data_status: *mut rocsparse_data_status,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_gebsr_buffer_size computes the required buffer size needed when\n  calling \\p rocsparse_check_matrix_gebsr\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir          matrix storage of GEBSR blocks.\n  @param[in]\n  mb           number of block rows of the sparse GEBSR matrix.\n  @param[in]\n  nb           number of block columns of the sparse GEBSR matrix.\n  @param[in]\n  nnzb         number of non-zero blocks of the sparse GEBSR matrix.\n  @param[in]\n  row_block_dim row block dimension of the sparse GEBSR matrix.\n  @param[in]\n  col_block_dim column block dimension of the sparse GEBSR matrix.\n  @param[in]\n  bsr_val     array of \\p nnzb elements of the sparse GEBSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every row of the\n              sparse GEBSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the column indices of the sparse\n              GEBSR matrix.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_scheck_matrix_gebsr(), rocsparse_dcheck_matrix_gebsr(),\n              rocsparse_ccheck_matrix_gebsr() and rocsparse_zcheck_matrix_gebsr().\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p dir or \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_size \\p mb \\p nb \\p nnzb \\p row_block_dim or \\p col_block_dim is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p bsr_val, \\p bsr_row_ptr, \\p bsr_col_ind or \\p buffer_size pointer\n          is invalid.\n/\n/**@{"]
+    pub fn rocsparse_scheck_matrix_gebsr_buffer_size(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        bsr_val: *const f32,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcheck_matrix_gebsr_buffer_size(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        bsr_val: *const f64,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccheck_matrix_gebsr_buffer_size(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        bsr_val: *const rocsparse_float_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcheck_matrix_gebsr_buffer_size(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        bsr_val: *const rocsparse_double_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_gebsr checks if the input GEBSR matrix is valid.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  dir          matrix storage of GEBSR blocks.\n  @param[in]\n  mb           number of block rows of the sparse GEBSR matrix.\n  @param[in]\n  nb           number of block columns of the sparse GEBSR matrix.\n  @param[in]\n  nnzb         number of non-zero blocks of the sparse GEBSR matrix.\n  @param[in]\n  row_block_dim row block dimension of the sparse GEBSR matrix.\n  @param[in]\n  col_block_dim column block dimension of the sparse GEBSR matrix.\n  @param[in]\n  bsr_val     array of \\p nnzb elements of the sparse GEBSR matrix.\n  @param[in]\n  bsr_row_ptr array of \\p mb+1 elements that point to the start of every row of the\n              sparse GEBSR matrix.\n  @param[in]\n  bsr_col_ind array of \\p nnzb elements containing the column indices of the sparse\n              GEBSR matrix.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  data_status modified to indicate the status of the data\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p dir or \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_size \\p mb \\p nb \\p nnzb \\p row_block_dim or \\p col_block_dim is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p bsr_val, \\p bsr_row_ptr, \\p bsr_col_ind, \\p temp_buffer or \\p data_status pointer\n          is invalid.\n/\n/**@{"]
+    pub fn rocsparse_scheck_matrix_gebsr(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        bsr_val: *const f32,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        data_status: *mut rocsparse_data_status,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_dcheck_matrix_gebsr(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        bsr_val: *const f64,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        data_status: *mut rocsparse_data_status,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_ccheck_matrix_gebsr(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        bsr_val: *const rocsparse_float_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        data_status: *mut rocsparse_data_status,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocsparse_zcheck_matrix_gebsr(
+        handle: rocsparse_handle,
+        dir: rocsparse_direction,
+        mb: rocsparse_int,
+        nb: rocsparse_int,
+        nnzb: rocsparse_int,
+        row_block_dim: rocsparse_int,
+        col_block_dim: rocsparse_int,
+        bsr_val: *const rocsparse_double_complex,
+        bsr_row_ptr: *const rocsparse_int,
+        bsr_col_ind: *const rocsparse_int,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        data_status: *mut rocsparse_data_status,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_hyb_buffer_size computes the required buffer size needed when\n  calling \\p rocsparse_check_matrix_hyb\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  hyb         matrix in HYB storage format.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  buffer_size number of bytes of the temporary storage buffer required by\n              rocsparse_check_matrix_hyb().\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p hyb or \\p buffer_size pointer is invalid."]
+    pub fn rocsparse_check_matrix_hyb_buffer_size(
+        handle: rocsparse_handle,
+        hyb: rocsparse_hyb_mat,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        buffer_size: *mut usize,
+    ) -> rocsparse_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\ingroup utility_module\n  \\brief Check matrix to see if it is valid.\n\n  \\details\n  \\p rocsparse_check_matrix_hyb checks if the input HYB matrix is valid.\n\n  \\note\n  This routine does not support execution in a hipGraph context.\n\n  @param[in]\n  handle      handle to the rocsparse library context queue.\n  @param[in]\n  hyb         matrix in HYB storage format.\n  @param[in]\n  idx_base    \\ref rocsparse_index_base_zero or \\ref rocsparse_index_base_one.\n  @param[in]\n  matrix_type \\ref rocsparse_matrix_type_general, \\ref rocsparse_matrix_type_symmetric,\n              \\ref rocsparse_matrix_type_hermitian or \\ref rocsparse_matrix_type_triangular.\n  @param[in]\n  uplo        \\ref rocsparse_fill_mode_lower or \\ref rocsparse_fill_mode_upper.\n  @param[in]\n  storage     \\ref rocsparse_storage_mode_sorted or \\ref rocsparse_storage_mode_sorted.\n  @param[out]\n  data_status modified to indicate the status of the data\n  @param[in]\n  temp_buffer temporary storage buffer allocated by the user.\n\n  \\retval rocsparse_status_success the operation completed successfully.\n  \\retval rocsparse_status_invalid_handle the library context was not initialized.\n  \\retval rocsparse_status_invalid_value \\p idx_base or \\p matrix_type or \\p uplo or \\p storage is invalid.\n  \\retval rocsparse_status_invalid_pointer \\p hyb or \\p data_status pointer is invalid."]
+    pub fn rocsparse_check_matrix_hyb(
+        handle: rocsparse_handle,
+        hyb: rocsparse_hyb_mat,
+        idx_base: rocsparse_index_base,
+        matrix_type: rocsparse_matrix_type,
+        uplo: rocsparse_fill_mode,
+        storage: rocsparse_storage_mode,
+        data_status: *mut rocsparse_data_status,
+        temp_buffer: *mut ::std::os::raw::c_void,
+    ) -> rocsparse_status;
+}
diff --git a/zluda/src/impl/device.rs b/zluda/src/impl/device.rs
index b7dd0f5..844e456 100644
--- a/zluda/src/impl/device.rs
+++ b/zluda/src/impl/device.rs
@@ -176,7 +176,7 @@ pub(crate) unsafe fn get_attribute(
         | CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR => {
             // in practical terms max group size = max blocks * warp size
             let mut prop = mem::zeroed();
-            hip_call_cuda! { hipGetDeviceProperties(&mut prop, dev) };
+            hip_call_cuda! { hipGetDevicePropertiesR0600(&mut prop, dev) };
             *pi = (prop.maxThreadsPerBlock / 2) / prop.warpSize;
             return Ok(());
         }
@@ -260,13 +260,13 @@ pub(crate) unsafe fn get_attribute(
         // linear sizes
         CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH => {
             let mut prop = mem::zeroed();
-            hip_call_cuda! { hipGetDeviceProperties(&mut prop, dev) };
+            hip_call_cuda! { hipGetDevicePropertiesR0600(&mut prop, dev) };
             *pi = prop.maxTexture1DLinear;
             return Ok(());
         }
         CUdevice_attribute::CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID => {
             let mut prop = mem::zeroed();
-            hip_call_cuda! { hipGetDeviceProperties(&mut prop, dev) };
+            hip_call_cuda! { hipGetDevicePropertiesR0600(&mut prop, dev) };
             *pi = prop.pciDomainID;
             return Ok(());
         }
@@ -455,7 +455,7 @@ pub(crate) unsafe fn get_properties(
         return Err(CUresult::CUDA_ERROR_INVALID_VALUE);
     }
     let mut hip_props = mem::zeroed();
-    hip_call_cuda! { hipGetDeviceProperties(&mut hip_props, dev) };
+    hip_call_cuda! { hipGetDevicePropertiesR0600(&mut hip_props, dev) };
     (*prop).maxThreadsPerBlock = hip_props.maxThreadsPerBlock;
     (*prop).maxThreadsDim = hip_props.maxThreadsDim;
     (*prop).maxGridSize = hip_props.maxGridSize;
diff --git a/zluda/src/impl/mod.rs b/zluda/src/impl/mod.rs
index 73c6efa..d892928 100644
--- a/zluda/src/impl/mod.rs
+++ b/zluda/src/impl/mod.rs
@@ -299,46 +299,30 @@ macro_rules! try_downcast {
 
 #[allow(non_snake_case)]
 pub(crate) fn memcpy3d_from_cuda(this: &CUDA_MEMCPY3D) -> Result<HIP_MEMCPY3D, CUresult> {
-    // TODO: remove the casts when HIP fixes it
-    let srcXInBytes = try_downcast!(this.srcXInBytes, usize => u32);
-    let srcY = try_downcast!(this.srcY, usize => u32);
-    let srcZ = try_downcast!(this.srcZ, usize => u32);
-    let srcLOD = try_downcast!(this.srcLOD, usize => u32);
-    let srcPitch = try_downcast!(this.srcPitch, usize => u32);
-    let srcHeight = try_downcast!(this.srcHeight, usize => u32);
-    let dstXInBytes = try_downcast!(this.dstXInBytes, usize => u32);
-    let dstY = try_downcast!(this.dstY, usize => u32);
-    let dstZ = try_downcast!(this.dstZ, usize => u32);
-    let dstLOD = try_downcast!(this.dstLOD, usize => u32);
-    let dstPitch = try_downcast!(this.dstPitch, usize => u32);
-    let dstHeight = try_downcast!(this.dstHeight, usize => u32);
-    let WidthInBytes = try_downcast!(this.WidthInBytes, usize => u32);
-    let Height = try_downcast!(this.Height, usize => u32);
-    let Depth = try_downcast!(this.Depth, usize => u32);
     Ok(HIP_MEMCPY3D {
-        srcXInBytes,
-        srcY,
-        srcZ,
-        srcLOD,
+        srcXInBytes: this.srcXInBytes,
+        srcY: this.srcY,
+        srcZ: this.srcZ,
+        srcLOD: this.srcLOD,
         srcMemoryType: memory_type_from_cuda(this.srcMemoryType),
         srcHost: this.srcHost,
         srcDevice: FromCuda::from_cuda(this.srcDevice),
         srcArray: hipfix::array::get(this.srcArray),
-        srcPitch,
-        srcHeight,
-        dstXInBytes,
-        dstY,
-        dstZ,
-        dstLOD,
+        srcPitch: this.srcPitch,
+        srcHeight: this.srcHeight,
+        dstXInBytes: this.dstXInBytes,
+        dstY: this.dstY,
+        dstZ: this.dstZ,
+        dstLOD: this.dstLOD,
         dstMemoryType: memory_type_from_cuda(this.dstMemoryType),
         dstHost: this.dstHost,
         dstDevice: FromCuda::from_cuda(this.dstDevice),
         dstArray: hipfix::array::get(this.dstArray),
-        dstPitch,
-        dstHeight,
-        WidthInBytes,
-        Height,
-        Depth,
+        dstPitch: this.dstPitch,
+        dstHeight: this.dstHeight,
+        WidthInBytes: this.WidthInBytes,
+        Height: this.Height,
+        Depth: this.Depth,
     })
 }
 
@@ -348,7 +332,7 @@ pub(crate) fn memory_type_from_cuda(this: CUmemorytype) -> hipMemoryType {
         CUmemorytype::CU_MEMORYTYPE_DEVICE => hipMemoryType::hipMemoryTypeDevice,
         CUmemorytype::CU_MEMORYTYPE_ARRAY => hipMemoryType::hipMemoryTypeArray,
         CUmemorytype::CU_MEMORYTYPE_UNIFIED => hipMemoryType::hipMemoryTypeUnified,
-        CUmemorytype(val) => hipMemoryType(val - 1),
+        _ => panic!("[ZLUDA] Unknown memory type: {}", this.0),
     }
 }
 
diff --git a/zluda/src/impl/pointer.rs b/zluda/src/impl/pointer.rs
index caeacf4..22a3a1f 100644
--- a/zluda/src/impl/pointer.rs
+++ b/zluda/src/impl/pointer.rs
@@ -25,7 +25,7 @@ pub(crate) unsafe fn get_attribute(
     match attribute {
         hipPointer_attribute::HIP_POINTER_ATTRIBUTE_MEMORY_TYPE => {
             *(data as *mut _) =
-                memory_type(attribs.__bindgen_anon_1.memoryType).map_err(IntoCuda::into_cuda)?;
+                memory_type(attribs.type_).map_err(IntoCuda::into_cuda)?;
             Ok(())
         }
         hipPointer_attribute::HIP_POINTER_ATTRIBUTE_DEVICE_POINTER => {
diff --git a/zluda_blas/src/lib.rs b/zluda_blas/src/lib.rs
index e18a94c..fd3d7d9 100644
--- a/zluda_blas/src/lib.rs
+++ b/zluda_blas/src/lib.rs
@@ -868,7 +868,7 @@ unsafe fn dtrmm_v2(
     let uplo = to_fill(uplo);
     let transa = op_from_cuda(transa);
     let diag = to_diag(diag);
-    to_cuda(rocblas_dtrmm_outofplace(
+    to_cuda(rocblas_dtrmm(
         handle.cast(),
         side,
         uplo,
@@ -952,7 +952,7 @@ unsafe fn get_pointer_mode(
 unsafe fn set_pointer_mode(handle: cublasHandle_t, mode: cublasPointerMode_t) -> cublasStatus_t {
     to_cuda(rocblas_set_pointer_mode(
         handle.cast(),
-        rocblas_pointer_mode_(mode.0),
+        rocblas_pointer_mode_(mode.0 as _),
     ))
 }
 
@@ -1207,7 +1207,7 @@ unsafe fn gemm_batched_ex(
         compute_type,
         algo,
         0,
-        rocblas_gemm_flags::rocblas_gemm_flags_none.0,
+        0,
     ))
 }
 
@@ -1272,6 +1272,6 @@ unsafe fn gemm_strided_batched_ex(
         compute_type,
         algo,
         0,
-        rocblas_gemm_flags::rocblas_gemm_flags_none.0,
+        0,
     ))
 }
diff --git a/zluda_sparse/src/lib.rs b/zluda_sparse/src/lib.rs
index a103bde..290a616 100644
--- a/zluda_sparse/src/lib.rs
+++ b/zluda_sparse/src/lib.rs
@@ -502,6 +502,7 @@ unsafe fn spmv(
         vec_y.cast(),
         compute_type,
         alg,
+        rocsparse_spmv_stage::rocsparse_spmv_stage_compute,
         &mut size,
         external_buffer,
     ))
@@ -532,6 +533,7 @@ unsafe fn spmv_buffersize(
         vec_y.cast(),
         compute_type,
         alg,
+        rocsparse_spmv_stage::rocsparse_spmv_stage_buffer_size,
         buffer_size,
         ptr::null_mut(),
     ))