diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
index 5ea5ccbddf6..5b7be2b1034 100644
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -25,4 +25,4 @@ If this is a **feature request**, show what you expect to happen if the feature
 
 
 #### Current Version:
-v4.0.0
\ No newline at end of file
+v4.0.1
\ No newline at end of file
diff --git a/RELEASE-NOTES.txt b/RELEASE-NOTES.txt
index 2fd9e7f4a72..c260bbc3be0 100644
--- a/RELEASE-NOTES.txt
+++ b/RELEASE-NOTES.txt
@@ -1,7 +1,15 @@
 Stan Math Library Release Notes
 
 ======================================================================
-v4.0.0 (26 January 2020)
+v4.0.1 (17 February 2021)
+======================================================================
+
+- Fixed issue with `cholesky_decompose` not propagating derivatives correctly .
+- Fixed OpenCL implementations of distributions mostly not working with row vectors.
+- Fixed handling of input expressions with reduce_sum.
+
+======================================================================
+v4.0.0 (26 January 2021)
 ======================================================================
 
 New functions:  
diff --git a/doxygen/doxygen.cfg b/doxygen/doxygen.cfg
index c6dbd7f9101..e678ffde17d 100644
--- a/doxygen/doxygen.cfg
+++ b/doxygen/doxygen.cfg
@@ -38,7 +38,7 @@ PROJECT_NAME           = "Stan Math Library"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = 4.0.0
+PROJECT_NUMBER         = 4.0.1
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/stan/math/fwd.hpp b/stan/math/fwd.hpp
index 36fa9e0e794..7f30955b0f2 100644
--- a/stan/math/fwd.hpp
+++ b/stan/math/fwd.hpp
@@ -3,6 +3,10 @@
 
 #include <stan/math/prim/fun/Eigen.hpp>
 
+#ifdef STAN_OPENCL
+#include <stan/math/opencl/prim.hpp>
+#endif
+
 #include <stan/math/fwd/core.hpp>
 #include <stan/math/fwd/meta.hpp>
 #include <stan/math/fwd/fun.hpp>
diff --git a/stan/math/mix.hpp b/stan/math/mix.hpp
index d60496114fa..a1027e4d645 100644
--- a/stan/math/mix.hpp
+++ b/stan/math/mix.hpp
@@ -1,6 +1,10 @@
 #ifndef STAN_MATH_MIX_HPP
 #define STAN_MATH_MIX_HPP
 
+#ifdef STAN_OPENCL
+#include <stan/math/opencl/rev.hpp>
+#endif
+
 #include <stan/math/mix/meta.hpp>
 #include <stan/math/mix/fun.hpp>
 #include <stan/math/mix/functor.hpp>
diff --git a/stan/math/mix/mat.hpp b/stan/math/mix/mat.hpp
index 562c38e9527..612542ef0aa 100644
--- a/stan/math/mix/mat.hpp
+++ b/stan/math/mix/mat.hpp
@@ -1,2 +1 @@
 #include "fun.hpp"
-
diff --git a/stan/math/opencl/kernel_generator.hpp b/stan/math/opencl/kernel_generator.hpp
index dc54b9b1282..18eaa06ebb7 100644
--- a/stan/math/opencl/kernel_generator.hpp
+++ b/stan/math/opencl/kernel_generator.hpp
@@ -109,6 +109,7 @@
 #include <stan/math/opencl/kernel_generator/name_generator.hpp>
 #include <stan/math/opencl/kernel_generator/type_str.hpp>
 
+#include <stan/math/opencl/kernel_generator/as_column_vector_or_scalar.hpp>
 #include <stan/math/opencl/kernel_generator/load.hpp>
 #include <stan/math/opencl/kernel_generator/scalar.hpp>
 #include <stan/math/opencl/kernel_generator/constant.hpp>
diff --git a/stan/math/opencl/kernel_generator/as_column_vector_or_scalar.hpp b/stan/math/opencl/kernel_generator/as_column_vector_or_scalar.hpp
new file mode 100644
index 00000000000..cc995636933
--- /dev/null
+++ b/stan/math/opencl/kernel_generator/as_column_vector_or_scalar.hpp
@@ -0,0 +1,334 @@
+#ifndef STAN_MATH_OPENCL_KERNEL_GENERATOR_AS_COLUMN_VECTOR_OR_SCALAR_HPP
+#define STAN_MATH_OPENCL_KERNEL_GENERATOR_AS_COLUMN_VECTOR_OR_SCALAR_HPP
+#ifdef STAN_OPENCL
+
+#include <stan/math/opencl/matrix_cl_view.hpp>
+#include <stan/math/opencl/kernel_generator/type_str.hpp>
+#include <stan/math/opencl/kernel_generator/name_generator.hpp>
+#include <stan/math/opencl/kernel_generator/operation_cl_lhs.hpp>
+#include <stan/math/opencl/kernel_generator/as_operation_cl.hpp>
+#include <stan/math/opencl/kernel_generator/constant.hpp>
+#include <stan/math/prim/err/check_vector.hpp>
+#include <stan/math/prim/meta.hpp>
+#include <algorithm>
+#include <map>
+#include <string>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+
+namespace stan {
+namespace math {
+
+/** \addtogroup opencl_kernel_generator
+ *  @{
+ */
+
+/**
+ * Represents as_column_vector_or_scalar of a row or column vector in
+ * kernel generator expressions.
+ * @tparam Derived derived type
+ * @tparam T type of argument
+ */
+template <typename T>
+class as_column_vector_or_scalar_
+    : public operation_cl_lhs<as_column_vector_or_scalar_<T>,
+                              typename std::remove_reference_t<T>::Scalar, T> {
+ public:
+  using Scalar = typename std::remove_reference_t<T>::Scalar;
+  using base = operation_cl_lhs<as_column_vector_or_scalar_<T>, Scalar, T>;
+  using base::var_name_;
+  using base::operator=;
+
+  /**
+   * Constructor
+   * @param a expression (must be row or column vector)
+   */
+  explicit as_column_vector_or_scalar_(T&& a) : base(std::forward<T>(a)) {
+    check_vector("as_column_vector_or_scalar", "a", a);
+  }
+
+  /**
+   * Creates a deep copy of this expression.
+   * @return copy of \c *this
+   */
+  inline auto deep_copy() const {
+    auto&& arg_copy = this->template get_arg<0>().deep_copy();
+    return as_column_vector_or_scalar_<
+        std::remove_reference_t<decltype(arg_copy)>>{std::move(arg_copy)};
+  }
+
+  /**
+   * Generates kernel code for this and nested expressions.
+   * @param[in,out] generated map from (pointer to) already generated local
+   * operations to variable names
+   * @param[in,out] generated_all map from (pointer to) already generated all
+   * operations to variable names
+   * @param name_gen name generator for this kernel
+   * @param row_index_name row index variable name
+   * @param col_index_name column index variable name
+   * @param view_handled whether caller already handled matrix view
+   * @return part of kernel with code for this and nested expressions
+   */
+  inline kernel_parts get_kernel_parts(
+      std::map<const void*, const char*>& generated,
+      std::map<const void*, const char*>& generated_all,
+      name_generator& name_gen, const std::string& row_index_name,
+      const std::string& col_index_name, bool view_handled) const {
+    kernel_parts res{};
+    if (generated.count(this) == 0) {
+      this->var_name_ = name_gen.generate();
+      generated[this] = "";
+      std::string row_index_name_arg = row_index_name;
+      std::string col_index_name_arg = col_index_name;
+      modify_argument_indices(row_index_name_arg, col_index_name_arg);
+      std::map<const void*, const char*> generated2;
+      res = this->template get_arg<0>().get_kernel_parts(
+          generated2, generated_all, name_gen, row_index_name_arg,
+          col_index_name_arg, view_handled);
+      kernel_parts my
+          = this->generate(row_index_name, col_index_name, view_handled,
+                           this->template get_arg<0>().var_name_);
+      if (generated_all.count(this) == 0) {
+        generated_all[this] = "";
+      } else {
+        my.args = "";
+      }
+      res += my;
+      res.body = res.body_prefix + res.body;
+      res.body_prefix = "";
+    }
+    return res;
+  }
+
+  /**
+   * Generates kernel code for this expression.
+   * @param row_index_name row index variable name
+   * @param col_index_name column index variable name
+   * @param view_handled whether whether caller already handled matrix view
+   * @param var_name_arg name of the variable in kernel that holds argument to
+   * this expression
+   * @return part of kernel with code for this expression
+   */
+  inline kernel_parts generate(const std::string& row_index_name,
+                               const std::string& col_index_name,
+                               const bool view_handled,
+                               const std::string& var_name_arg) const {
+    kernel_parts res;
+    res.args = "int " + var_name_ + "_transpose, ";
+    res.body
+        = type_str<Scalar>() + " " + var_name_ + " = " + var_name_arg + ";\n";
+    return res;
+  }
+
+  /**
+   * Generates kernel code for this expression if it appears on the left hand
+   * side of an assignment.
+   * @param[in,out] generated map from (pointer to) already generated local
+   * operations to variable names
+   * @param[in,out] generated_all map from (pointer to) already generated all
+   * operations to variable names
+   * @param name_gen name generator for this kernel
+   * @param row_index_name row index variable name
+   * @param col_index_name column index variable name
+   * @return part of kernel with code for this expressions
+   */
+  inline kernel_parts get_kernel_parts_lhs(
+      std::map<const void*, const char*>& generated,
+      std::map<const void*, const char*>& generated_all,
+      name_generator& name_gen, const std::string& row_index_name,
+      const std::string& col_index_name) const {
+    if (generated.count(this) == 0) {
+      generated[this] = "";
+      this->var_name_ = name_gen.generate();
+    }
+    std::string row_index_name_arg = row_index_name;
+    std::string col_index_name_arg = col_index_name;
+    modify_argument_indices(row_index_name_arg, col_index_name_arg);
+    std::map<const void*, const char*> generated2;
+    kernel_parts res = this->template get_arg<0>().get_kernel_parts_lhs(
+        generated2, generated_all, name_gen, row_index_name_arg,
+        col_index_name_arg);
+    res += this->derived().generate_lhs(row_index_name, col_index_name,
+                                        this->template get_arg<0>().var_name_);
+    if (generated_all.count(this) == 0) {
+      generated_all[this] = "";
+    } else {
+      res.args = "";
+    }
+    return res;
+  }
+
+  /**
+   * Generates kernel code for this and nested expressions if this expression
+   * appears on the left hand side of an assignment.
+   * @param i row index variable name
+   * @param j column index variable name
+   * @param var_name_arg name of the variable in kernel that holds argument to
+   * this expression
+   * @return part of kernel with code for this expression
+   */
+  inline kernel_parts generate_lhs(const std::string& i, const std::string& j,
+                                   const std::string& var_name_arg) const {
+    kernel_parts res;
+    res.args = "int " + var_name_ + "_transpose, ";
+    return res;
+  }
+
+  /**
+   * Sets kernel arguments for this and nested expressions.
+   * @param[in,out] generated map from (pointer to) already generated local
+   * operations to variable names
+   * @param[in,out] generated_all map from (pointer to) already generated all
+   * operations to variable names
+   * @param kernel kernel to set arguments on
+   * @param[in,out] arg_num consecutive number of the first argument to set.
+   * This is incremented for each argument set by this function.
+   */
+  inline void set_args(std::map<const void*, const char*>& generated,
+                       std::map<const void*, const char*>& generated_all,
+                       cl::Kernel& kernel, int& arg_num) const {
+    if (generated.count(this) == 0) {
+      generated[this] = "";
+      std::map<const void*, const char*> generated2;
+      this->template get_arg<0>().set_args(generated2, generated_all, kernel,
+                                           arg_num);
+      if (generated_all.count(this) == 0) {
+        generated_all[this] = "";
+        kernel.setArg(arg_num++,
+                      static_cast<int>(this->template get_arg<0>().rows()
+                                       < this->template get_arg<0>().cols()));
+      }
+    }
+  }
+
+  /**
+   * Swaps indices \c row_index_name and \c col_index_name for the argument
+   * expression if necessary.
+   * @param[in, out] row_index_name row index
+   * @param[in, out] col_index_name column index
+   */
+  inline void modify_argument_indices(std::string& row_index_name,
+                                      std::string& col_index_name) const {
+    std::string row_index_name2 = "(" + var_name_ + "_transpose ? "
+                                  + col_index_name + " : " + row_index_name
+                                  + ")";
+    col_index_name = "(" + var_name_ + "_transpose ? " + row_index_name + " : "
+                     + col_index_name + ")";
+    row_index_name = std::move(row_index_name2);
+  }
+
+  /**
+   * Number of rows of a matrix that would be the result of evaluating this
+   * expression.
+   * @return number of rows
+   */
+  inline int rows() const {
+    return std::max(this->template get_arg<0>().rows(),
+                    this->template get_arg<0>().cols());
+  }
+
+  /**
+   * Number of columns of a matrix that would be the result of evaluating this
+   * expression.
+   * @return 1
+   */
+  inline int cols() const {
+    return std::min(this->template get_arg<0>().rows(),
+                    this->template get_arg<0>().cols());
+  }
+
+  /**
+   * Sets the view of the underlying matrix depending on which of its parts are
+   * written to.
+   * @param bottom_diagonal Index of the top sub- or super- diagonal written
+   * with nonzero elements.
+   * @param top_diagonal Index of the top sub- or super- diagonal written with
+   * nonzero elements.
+   * @param bottom_zero_diagonal Index of the top sub- or super- diagonal
+   * written with zeros if it ie more extreme than \c bottom_diagonal. Otherwise
+   * it should be set to equal value as \c bottom_diagonal.
+   * @param top_zero_diagonal Index of the top sub- or super- diagonal written
+   * with zeros if it ie more extreme than \c top_diagonal. Otherwise it should
+   * be set to equal value as \c top_diagonal.
+   */
+  inline void set_view(int bottom_diagonal, int top_diagonal,
+                       int bottom_zero_diagonal, int top_zero_diagonal) const {
+    auto& arg = this->template get_arg<0>();
+    if (arg.rows() >= arg.cols()) {
+      arg.set_view(bottom_diagonal, top_diagonal, bottom_zero_diagonal,
+                   top_zero_diagonal);
+    } else {
+      arg.set_view(top_diagonal, bottom_diagonal, top_zero_diagonal,
+                   bottom_zero_diagonal);
+    }
+  }
+
+  /**
+   * Determine indices of extreme sub- and superdiagonals written.
+   * @return pair of indices - bottom and top diagonal
+   */
+  inline std::pair<int, int> extreme_diagonals() const {
+    auto& arg = this->template get_arg<0>();
+    std::pair<int, int> arg_diags = arg.extreme_diagonals();
+    if (arg.rows() >= arg.cols()) {
+      return arg_diags;
+    } else {
+      return {-arg_diags.second, -arg_diags.first};
+    }
+  }
+
+  /**
+   * Checks if desired dimensions match dimensions of the argument vector.
+   * @param rows desired number of rows
+   * @param cols desired number of columns
+   * @throws std::invalid_argument desired dimensions do not match dimensions
+   * of the block.
+   */
+  inline void check_assign_dimensions(int rows, int cols) const {
+    // use a dummy expression with same number of rows and cols to simplify the
+    // check
+    check_vector("as_column_vector_or_scalar_::check_assign_dimensions()",
+                 "expression assigned to as_column_vector_or_scalar",
+                 constant(0, rows, cols));
+    auto& arg = this->template get_arg<0>();
+    int arg_rows = arg.rows();
+    int arg_cols = arg.cols();
+    if (arg_rows >= arg_cols) {
+      check_size_match("block_.check_assign_dimensions", "Rows of ",
+                       "check_assign_dimensions argument", arg_rows, "rows of ",
+                       "expression", rows);
+      check_size_match("block_.check_assign_dimensions", "Columns of ",
+                       "check_assign_dimensions argument", arg_cols,
+                       "columns of ", "expression", cols);
+    } else {
+      check_size_match("block_.check_assign_dimensions", "Columns of ",
+                       "check_assign_dimensions argument", arg_cols, "rows of ",
+                       "expression", rows);
+      check_size_match("block_.check_assign_dimensions", "Rows of ",
+                       "check_assign_dimensions argument", arg_rows,
+                       "columns of ", "expression", cols);
+    }
+  }
+};
+
+/**
+ * as_column_vector_or_scalar of a kernel generator expression.
+ *
+ * @tparam T type of argument
+ * @param a input argument (must be a row or a column vector)
+ * @return as_column_vector_or_scalar of given expression
+ */
+template <typename T,
+          require_all_kernel_expressions_and_none_scalar_t<T>* = nullptr>
+inline auto as_column_vector_or_scalar(T&& a) {
+  auto&& a_operation = as_operation_cl(std::forward<T>(a)).deep_copy();
+  return as_column_vector_or_scalar_<
+      std::remove_reference_t<decltype(a_operation)>>(std::move(a_operation));
+}
+/** @}*/
+}  // namespace math
+}  // namespace stan
+
+#endif
+#endif
diff --git a/stan/math/opencl/prim.hpp b/stan/math/opencl/prim.hpp
index e92db9f641e..411c27444b9 100644
--- a/stan/math/opencl/prim.hpp
+++ b/stan/math/opencl/prim.hpp
@@ -83,6 +83,7 @@
 #include <stan/math/opencl/opencl_context.hpp>
 #include <stan/math/opencl/matrix_cl.hpp>
 
+#include <stan/math/opencl/scalar_type.hpp>
 #include <stan/math/opencl/copy.hpp>
 #include <stan/math/opencl/cholesky_decompose.hpp>
 #include <stan/math/opencl/is_constant.hpp>
@@ -92,7 +93,6 @@
 #include <stan/math/opencl/pinned_matrix.hpp>
 #include <stan/math/opencl/plain_type.hpp>
 #include <stan/math/opencl/ref_type_for_opencl.hpp>
-#include <stan/math/opencl/scalar_type.hpp>
 #include <stan/math/opencl/to_ref_for_opencl.hpp>
 #include <stan/math/opencl/triangular_transpose.hpp>
 #include <stan/math/opencl/value_type.hpp>
diff --git a/stan/math/opencl/prim/bernoulli_logit_lpmf.hpp b/stan/math/opencl/prim/bernoulli_logit_lpmf.hpp
index 60952f3dcf8..8db546a1853 100644
--- a/stan/math/opencl/prim/bernoulli_logit_lpmf.hpp
+++ b/stan/math/opencl/prim/bernoulli_logit_lpmf.hpp
@@ -44,7 +44,8 @@ return_type_t<T_prob_cl> bernoulli_logit_lpmf(const T_n_cl& n,
     return 0.0;
   }
 
-  const auto& theta_val = value_of(theta);
+  const auto& theta_col = as_column_vector_or_scalar(theta);
+  const auto& theta_val = value_of(theta_col);
 
   auto check_n_bounded = check_cl(function, "n", n, "in the interval [0, 1]");
   auto n_bounded_expr = 0 <= n && n <= 1;
@@ -77,7 +78,7 @@ return_type_t<T_prob_cl> bernoulli_logit_lpmf(const T_n_cl& n,
                     n_bounded_expr, theta_not_nan_expr);
 
   T_partials_return logp = sum(from_matrix_cl(logp_cl));
-  operands_and_partials<T_prob_cl> ops_partials(theta);
+  operands_and_partials<decltype(theta_col)> ops_partials(theta_col);
 
   if (!is_constant_all<T_prob_cl>::value) {
     ops_partials.edge1_.partials_ = deriv_cl;
diff --git a/stan/math/opencl/prim/bernoulli_lpmf.hpp b/stan/math/opencl/prim/bernoulli_lpmf.hpp
index 013dd4c83e0..b6335842b60 100644
--- a/stan/math/opencl/prim/bernoulli_lpmf.hpp
+++ b/stan/math/opencl/prim/bernoulli_lpmf.hpp
@@ -46,10 +46,11 @@ return_type_t<T_prob_cl> bernoulli_lpmf(const T_n_cl& n,
     return 0.0;
   }
 
-  const auto& theta_val = value_of(theta);
+  const auto& theta_col = as_column_vector_or_scalar(theta);
+  const auto& theta_val = value_of(theta_col);
 
   T_partials_return logp(0.0);
-  operands_and_partials<T_prob_cl> ops_partials(theta);
+  operands_and_partials<decltype(theta_col)> ops_partials(theta_col);
 
   auto check_n_bounded = check_cl(function, "n", n, "in the interval [0, 1]");
   auto n_bounded_expr = 0 <= n && n <= 1;
diff --git a/stan/math/opencl/prim/beta_lpdf.hpp b/stan/math/opencl/prim/beta_lpdf.hpp
index 00203d04465..76aa74c3ffc 100644
--- a/stan/math/opencl/prim/beta_lpdf.hpp
+++ b/stan/math/opencl/prim/beta_lpdf.hpp
@@ -60,12 +60,17 @@ return_type_t<T_y_cl, T_scale_succ_cl, T_scale_fail_cl> beta_lpdf(
     return 0.0;
   }
 
-  const auto& y_val = value_of(y);
-  const auto& alpha_val = value_of(alpha);
-  const auto& beta_val = value_of(beta);
+  const auto& y_col = as_column_vector_or_scalar(y);
+  const auto& alpha_col = as_column_vector_or_scalar(alpha);
+  const auto& beta_col = as_column_vector_or_scalar(beta);
 
-  operands_and_partials<T_y_cl, T_scale_succ_cl, T_scale_fail_cl> ops_partials(
-      y, alpha, beta);
+  const auto& y_val = value_of(y_col);
+  const auto& alpha_val = value_of(alpha_col);
+  const auto& beta_val = value_of(beta_col);
+
+  operands_and_partials<decltype(y_col), decltype(alpha_col),
+                        decltype(beta_col)>
+      ops_partials(y_col, alpha_col, beta_col);
 
   auto check_alpha_pos_finite = check_cl(function, "First shape parameter",
                                          alpha_val, "positive finite");
diff --git a/stan/math/opencl/prim/beta_proportion_lpdf.hpp b/stan/math/opencl/prim/beta_proportion_lpdf.hpp
index 3a1db5c1295..26f34bb85bc 100644
--- a/stan/math/opencl/prim/beta_proportion_lpdf.hpp
+++ b/stan/math/opencl/prim/beta_proportion_lpdf.hpp
@@ -55,9 +55,13 @@ return_type_t<T_y_cl, T_loc_cl, T_prec_cl> beta_proportion_lpdf(
     return 0.0;
   }
 
-  const auto& y_val = value_of(y);
-  const auto& mu_val = value_of(mu);
-  const auto& kappa_val = value_of(kappa);
+  const auto& y_col = as_column_vector_or_scalar(y);
+  const auto& mu_col = as_column_vector_or_scalar(mu);
+  const auto& kappa_col = as_column_vector_or_scalar(kappa);
+
+  const auto& y_val = value_of(y_col);
+  const auto& mu_val = value_of(mu_col);
+  const auto& kappa_val = value_of(kappa_col);
 
   auto check_y_bounded
       = check_cl(function, "Random variable", y_val, "in the interval [0, 1]");
@@ -106,7 +110,8 @@ return_type_t<T_y_cl, T_loc_cl, T_prec_cl> beta_proportion_lpdf(
 
   T_partials_return logp = sum(from_matrix_cl(logp_cl));
 
-  operands_and_partials<T_y_cl, T_loc_cl, T_prec_cl> ops_partials(y, mu, kappa);
+  operands_and_partials<decltype(y_col), decltype(mu_col), decltype(kappa_col)>
+      ops_partials(y_col, mu_col, kappa_col);
   if (!is_constant<T_y_cl>::value) {
     ops_partials.edge1_.partials_ = std::move(y_deriv_cl);
   }
diff --git a/stan/math/opencl/prim/binomial_lpmf.hpp b/stan/math/opencl/prim/binomial_lpmf.hpp
index 840c90a33d3..6e4a0029f49 100644
--- a/stan/math/opencl/prim/binomial_lpmf.hpp
+++ b/stan/math/opencl/prim/binomial_lpmf.hpp
@@ -50,7 +50,8 @@ return_type_t<T_prob_cl> binomial_lpmf(const T_n_cl& n, const T_N_cl N,
     return 0.0;
   }
 
-  const auto& theta_val = value_of(theta);
+  const auto& theta_col = as_column_vector_or_scalar(theta);
+  const auto& theta_val = value_of(theta_col);
 
   auto check_n_bounded
       = check_cl(function, "Successes variable", n, "in the interval [0, N]");
@@ -106,7 +107,7 @@ return_type_t<T_prob_cl> binomial_lpmf(const T_n_cl& n, const T_N_cl N,
                     calc_if<need_deriv>(deriv_theta));
 
   T_partials_return logp = sum(from_matrix_cl(logp_cl));
-  operands_and_partials<T_prob_cl> ops_partials(theta);
+  operands_and_partials<decltype(theta_col)> ops_partials(theta_col);
 
   if (!is_constant_all<T_prob_cl>::value) {
     if (need_sums) {
diff --git a/stan/math/opencl/prim/cauchy_lpdf.hpp b/stan/math/opencl/prim/cauchy_lpdf.hpp
index 88dfa3bb46e..32a7e54f79b 100644
--- a/stan/math/opencl/prim/cauchy_lpdf.hpp
+++ b/stan/math/opencl/prim/cauchy_lpdf.hpp
@@ -52,9 +52,13 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl> cauchy_lpdf(
     return 0.0;
   }
 
-  const auto& y_val = value_of(y);
-  const auto& mu_val = value_of(mu);
-  const auto& sigma_val = value_of(sigma);
+  const auto& y_col = as_column_vector_or_scalar(y);
+  const auto& mu_col = as_column_vector_or_scalar(mu);
+  const auto& sigma_col = as_column_vector_or_scalar(sigma);
+
+  const auto& y_val = value_of(y_col);
+  const auto& mu_val = value_of(mu_col);
+  const auto& sigma_val = value_of(sigma_col);
 
   auto check_y_not_nan
       = check_cl(function, "Random variable", y_val, "not NaN");
@@ -100,8 +104,8 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl> cauchy_lpdf(
   if (include_summand<propto>::value) {
     logp -= N * LOG_PI;
   }
-  operands_and_partials<T_y_cl, T_loc_cl, T_scale_cl> ops_partials(y, mu,
-                                                                   sigma);
+  operands_and_partials<decltype(y_col), decltype(mu_col), decltype(sigma_col)>
+      ops_partials(y_col, mu_col, sigma_col);
 
   if (!is_constant<T_y_cl>::value) {
     ops_partials.edge1_.partials_ = std::move(y_deriv_cl);
diff --git a/stan/math/opencl/prim/chi_square_lpdf.hpp b/stan/math/opencl/prim/chi_square_lpdf.hpp
index 3b258268c8b..38b4a562c71 100644
--- a/stan/math/opencl/prim/chi_square_lpdf.hpp
+++ b/stan/math/opencl/prim/chi_square_lpdf.hpp
@@ -51,8 +51,11 @@ return_type_t<T_y_cl, T_dof_cl> chi_square_lpdf(const T_y_cl& y,
     return 0.0;
   }
 
-  const auto& y_val = value_of(y);
-  const auto& nu_val = value_of(nu);
+  const auto& y_col = as_column_vector_or_scalar(y);
+  const auto& nu_col = as_column_vector_or_scalar(nu);
+
+  const auto& y_val = value_of(y_col);
+  const auto& nu_val = value_of(nu_col);
 
   auto check_y_nonnegative
       = check_cl(function, "Random variable", y_val, "nonnegative");
@@ -87,7 +90,8 @@ return_type_t<T_y_cl, T_dof_cl> chi_square_lpdf(const T_y_cl& y,
 
   T_partials_return logp = sum(from_matrix_cl(logp_cl));
 
-  operands_and_partials<T_y_cl, T_dof_cl> ops_partials(y, nu);
+  operands_and_partials<decltype(y_col), decltype(nu_col)> ops_partials(y_col,
+                                                                        nu_col);
 
   if (!is_constant<T_y_cl>::value) {
     ops_partials.edge1_.partials_ = std::move(y_deriv_cl);
diff --git a/stan/math/opencl/prim/double_exponential_lpdf.hpp b/stan/math/opencl/prim/double_exponential_lpdf.hpp
index 93c966ab333..1078d070719 100644
--- a/stan/math/opencl/prim/double_exponential_lpdf.hpp
+++ b/stan/math/opencl/prim/double_exponential_lpdf.hpp
@@ -49,9 +49,13 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl> double_exponential_lpdf(
     return 0.0;
   }
 
-  const auto& y_val = value_of(y);
-  const auto& mu_val = value_of(mu);
-  const auto& sigma_val = value_of(sigma);
+  const auto& y_col = as_column_vector_or_scalar(y);
+  const auto& mu_col = as_column_vector_or_scalar(mu);
+  const auto& sigma_col = as_column_vector_or_scalar(sigma);
+
+  const auto& y_val = value_of(y_col);
+  const auto& mu_val = value_of(mu_col);
+  const auto& sigma_val = value_of(sigma_col);
 
   auto check_y_finite = check_cl(function, "Random variable", y_val, "finite");
   auto y_finite_expr = isfinite(y_val);
@@ -87,8 +91,8 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl> double_exponential_lpdf(
   if (include_summand<propto>::value) {
     logp -= N * LOG_TWO;
   }
-  operands_and_partials<T_y_cl, T_loc_cl, T_scale_cl> ops_partials(y, mu,
-                                                                   sigma);
+  operands_and_partials<decltype(y_col), decltype(mu_col), decltype(sigma_col)>
+      ops_partials(y_col, mu_col, sigma_col);
 
   if (!is_constant<T_y_cl>::value) {
     ops_partials.edge1_.partials_ = std::move(y_deriv_cl);
diff --git a/stan/math/opencl/prim/exp_mod_normal_lpdf.hpp b/stan/math/opencl/prim/exp_mod_normal_lpdf.hpp
index e8a65491e94..751bb54bac7 100644
--- a/stan/math/opencl/prim/exp_mod_normal_lpdf.hpp
+++ b/stan/math/opencl/prim/exp_mod_normal_lpdf.hpp
@@ -57,10 +57,15 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl, T_inv_scale_cl> exp_mod_normal_lpdf(
     return 0.0;
   }
 
-  const auto& y_val = value_of(y);
-  const auto& mu_val = value_of(mu);
-  const auto& sigma_val = value_of(sigma);
-  const auto& lambda_val = value_of(lambda);
+  const auto& y_col = as_column_vector_or_scalar(y);
+  const auto& mu_col = as_column_vector_or_scalar(mu);
+  const auto& sigma_col = as_column_vector_or_scalar(sigma);
+  const auto& lambda_col = as_column_vector_or_scalar(lambda);
+
+  const auto& y_val = value_of(y_col);
+  const auto& mu_val = value_of(mu_col);
+  const auto& sigma_val = value_of(sigma_col);
+  const auto& lambda_val = value_of(lambda_col);
 
   auto check_y_not_nan
       = check_cl(function, "Random variable", y_val, "not_nan");
@@ -126,8 +131,9 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl, T_inv_scale_cl> exp_mod_normal_lpdf(
     logp -= LOG_TWO * N;
   }
 
-  operands_and_partials<T_y_cl, T_loc_cl, T_scale_cl, T_inv_scale_cl>
-      ops_partials(y, mu, sigma, lambda);
+  operands_and_partials<decltype(y_col), decltype(mu_col), decltype(sigma_col),
+                        decltype(lambda_col)>
+      ops_partials(y_col, mu_col, sigma_col, lambda_col);
   if (!is_constant<T_y_cl>::value) {
     ops_partials.edge1_.partials_ = std::move(y_deriv_cl);
   }
diff --git a/stan/math/opencl/prim/exponential_lpdf.hpp b/stan/math/opencl/prim/exponential_lpdf.hpp
index b4479b8c890..685645bb9ad 100644
--- a/stan/math/opencl/prim/exponential_lpdf.hpp
+++ b/stan/math/opencl/prim/exponential_lpdf.hpp
@@ -60,10 +60,14 @@ return_type_t<T_y_cl, T_inv_scale_cl> exponential_lpdf(
     return 0.0;
   }
 
-  const auto& y_val = value_of(y);
-  const auto& beta_val = value_of(beta);
+  const auto& y_col = as_column_vector_or_scalar(y);
+  const auto& beta_col = as_column_vector_or_scalar(beta);
 
-  operands_and_partials<T_y_cl, T_inv_scale_cl> ops_partials(y, beta);
+  const auto& y_val = value_of(y_col);
+  const auto& beta_val = value_of(beta_col);
+
+  operands_and_partials<decltype(y_col), decltype(beta_col)> ops_partials(
+      y_col, beta_col);
 
   auto check_y_nonnegative
       = check_cl(function, "Random variable", y_val, "nonnegative");
diff --git a/stan/math/opencl/prim/frechet_lpdf.hpp b/stan/math/opencl/prim/frechet_lpdf.hpp
index bcb23e2d4db..9a6c125b56a 100644
--- a/stan/math/opencl/prim/frechet_lpdf.hpp
+++ b/stan/math/opencl/prim/frechet_lpdf.hpp
@@ -52,12 +52,17 @@ return_type_t<T_y_cl, T_shape_cl, T_scale_cl> frechet_lpdf(
     return 0.0;
   }
 
-  const auto& y_val = value_of(y);
-  const auto& alpha_val = value_of(alpha);
-  const auto& sigma_val = value_of(sigma);
+  const auto& y_col = as_column_vector_or_scalar(y);
+  const auto& alpha_col = as_column_vector_or_scalar(alpha);
+  const auto& sigma_col = as_column_vector_or_scalar(sigma);
 
-  operands_and_partials<T_y_cl, T_shape_cl, T_scale_cl> ops_partials(y, alpha,
-                                                                     sigma);
+  const auto& y_val = value_of(y_col);
+  const auto& alpha_val = value_of(alpha_col);
+  const auto& sigma_val = value_of(sigma_col);
+
+  operands_and_partials<decltype(y_col), decltype(alpha_col),
+                        decltype(sigma_col)>
+      ops_partials(y_col, alpha_col, sigma_col);
 
   auto check_y_positive
       = check_cl(function, "Random variable", y_val, "positive");
diff --git a/stan/math/opencl/prim/gamma_lpdf.hpp b/stan/math/opencl/prim/gamma_lpdf.hpp
index c0a2a681a6a..a6989b17bf0 100644
--- a/stan/math/opencl/prim/gamma_lpdf.hpp
+++ b/stan/math/opencl/prim/gamma_lpdf.hpp
@@ -62,9 +62,13 @@ return_type_t<T_y_cl, T_shape_cl, T_inv_scale_cl> gamma_lpdf(
     return 0.0;
   }
 
-  const auto& y_val = value_of(y);
-  const auto& alpha_val = value_of(alpha);
-  const auto& beta_val = value_of(beta);
+  const auto& y_col = as_column_vector_or_scalar(y);
+  const auto& alpha_col = as_column_vector_or_scalar(alpha);
+  const auto& beta_col = as_column_vector_or_scalar(beta);
+
+  const auto& y_val = value_of(y_col);
+  const auto& alpha_val = value_of(alpha_col);
+  const auto& beta_val = value_of(beta_col);
 
   auto check_y_not_nan
       = check_cl(function, "Random variable", y_val, "not NaN");
@@ -116,8 +120,9 @@ return_type_t<T_y_cl, T_shape_cl, T_inv_scale_cl> gamma_lpdf(
 
   T_partials_return logp = sum(from_matrix_cl(logp_cl));
 
-  operands_and_partials<T_y_cl, T_shape_cl, T_inv_scale_cl> ops_partials(
-      y, alpha, beta);
+  operands_and_partials<decltype(y_col), decltype(alpha_col),
+                        decltype(beta_col)>
+      ops_partials(y_col, alpha_col, beta_col);
   if (!is_constant<T_y_cl>::value) {
     ops_partials.edge1_.partials_ = std::move(y_deriv_cl);
   }
diff --git a/stan/math/opencl/prim/gumbel_lpdf.hpp b/stan/math/opencl/prim/gumbel_lpdf.hpp
index 271afe6e3fc..683e5e1c08e 100644
--- a/stan/math/opencl/prim/gumbel_lpdf.hpp
+++ b/stan/math/opencl/prim/gumbel_lpdf.hpp
@@ -50,9 +50,13 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl> gumbel_lpdf(
     return 0.0;
   }
 
-  const auto& y_val = value_of(y);
-  const auto& mu_val = value_of(mu);
-  const auto& beta_val = value_of(beta);
+  const auto& y_col = as_column_vector_or_scalar(y);
+  const auto& mu_col = as_column_vector_or_scalar(mu);
+  const auto& beta_col = as_column_vector_or_scalar(beta);
+
+  const auto& y_val = value_of(y_col);
+  const auto& mu_val = value_of(mu_col);
+  const auto& beta_val = value_of(beta_col);
 
   auto check_y_not_nan
       = check_cl(function, "Random variable", y_val, "not NaN");
@@ -94,7 +98,8 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl> gumbel_lpdf(
 
   T_partials_return logp = sum(from_matrix_cl(logp_cl));
 
-  operands_and_partials<T_y_cl, T_loc_cl, T_scale_cl> ops_partials(y, mu, beta);
+  operands_and_partials<decltype(y_col), decltype(mu_col), decltype(beta_col)>
+      ops_partials(y_col, mu_col, beta_col);
   if (!is_constant<T_y_cl>::value) {
     ops_partials.edge1_.partials_ = std::move(y_deriv_cl);
   }
diff --git a/stan/math/opencl/prim/inv_chi_square_lpdf.hpp b/stan/math/opencl/prim/inv_chi_square_lpdf.hpp
index b9530cd1d4b..0b65025ca38 100644
--- a/stan/math/opencl/prim/inv_chi_square_lpdf.hpp
+++ b/stan/math/opencl/prim/inv_chi_square_lpdf.hpp
@@ -58,10 +58,14 @@ return_type_t<T_y_cl, T_dof_cl> inv_chi_square_lpdf(const T_y_cl& y,
     return 0.0;
   }
 
-  const auto& y_val = value_of(y);
-  const auto& nu_val = value_of(nu);
+  const auto& y_col = as_column_vector_or_scalar(y);
+  const auto& nu_col = as_column_vector_or_scalar(nu);
 
-  operands_and_partials<T_y_cl, T_dof_cl> ops_partials(y, nu);
+  const auto& y_val = value_of(y_col);
+  const auto& nu_val = value_of(nu_col);
+
+  operands_and_partials<decltype(y_col), decltype(nu_col)> ops_partials(y_col,
+                                                                        nu_col);
 
   auto check_nu_pos_finite = check_cl(function, "Degrees of freedom parameter",
                                       nu_val, "positive finite");
diff --git a/stan/math/opencl/prim/inv_gamma_lpdf.hpp b/stan/math/opencl/prim/inv_gamma_lpdf.hpp
index 7b073003cd2..dd1adae6475 100644
--- a/stan/math/opencl/prim/inv_gamma_lpdf.hpp
+++ b/stan/math/opencl/prim/inv_gamma_lpdf.hpp
@@ -54,12 +54,17 @@ return_type_t<T_y_cl, T_shape_cl, T_scale_cl> inv_gamma_lpdf(
     return 0.0;
   }
 
-  const auto& y_val = value_of(y);
-  const auto& alpha_val = value_of(alpha);
-  const auto& beta_val = value_of(beta);
+  const auto& y_col = as_column_vector_or_scalar(y);
+  const auto& alpha_col = as_column_vector_or_scalar(alpha);
+  const auto& beta_col = as_column_vector_or_scalar(beta);
 
-  operands_and_partials<T_y_cl, T_shape_cl, T_scale_cl> ops_partials(y, alpha,
-                                                                     beta);
+  const auto& y_val = value_of(y_col);
+  const auto& alpha_val = value_of(alpha_col);
+  const auto& beta_val = value_of(beta_col);
+
+  operands_and_partials<decltype(y_col), decltype(alpha_col),
+                        decltype(beta_col)>
+      ops_partials(y_col, alpha_col, beta_col);
 
   auto check_y_not_nan
       = check_cl(function, "Random variable", y_val, "not NaN");
diff --git a/stan/math/opencl/prim/logistic_lpdf.hpp b/stan/math/opencl/prim/logistic_lpdf.hpp
index 281cacb0ac8..385ff9a8b84 100644
--- a/stan/math/opencl/prim/logistic_lpdf.hpp
+++ b/stan/math/opencl/prim/logistic_lpdf.hpp
@@ -52,12 +52,16 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl> logistic_lpdf(
     return 0.0;
   }
 
-  const auto& y_val = value_of(y);
-  const auto& mu_val = value_of(mu);
-  const auto& sigma_val = value_of(sigma);
+  const auto& y_col = as_column_vector_or_scalar(y);
+  const auto& mu_col = as_column_vector_or_scalar(mu);
+  const auto& sigma_col = as_column_vector_or_scalar(sigma);
 
-  operands_and_partials<T_y_cl, T_loc_cl, T_scale_cl> ops_partials(y, mu,
-                                                                   sigma);
+  const auto& y_val = value_of(y_col);
+  const auto& mu_val = value_of(mu_col);
+  const auto& sigma_val = value_of(sigma_col);
+
+  operands_and_partials<decltype(y_col), decltype(mu_col), decltype(sigma_col)>
+      ops_partials(y_col, mu_col, sigma_col);
 
   auto check_y_finite = check_cl(function, "Random variable", y_val, "finite");
   auto y_finite = isfinite(y_val);
diff --git a/stan/math/opencl/prim/lognormal_lpdf.hpp b/stan/math/opencl/prim/lognormal_lpdf.hpp
index 8e4574f2141..edb4a0ec646 100644
--- a/stan/math/opencl/prim/lognormal_lpdf.hpp
+++ b/stan/math/opencl/prim/lognormal_lpdf.hpp
@@ -53,12 +53,16 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl> lognormal_lpdf(
     return 0.0;
   }
 
-  const auto& y_val = value_of(y);
-  const auto& mu_val = value_of(mu);
-  const auto& sigma_val = value_of(sigma);
+  const auto& y_col = as_column_vector_or_scalar(y);
+  const auto& mu_col = as_column_vector_or_scalar(mu);
+  const auto& sigma_col = as_column_vector_or_scalar(sigma);
 
-  operands_and_partials<T_y_cl, T_loc_cl, T_scale_cl> ops_partials(y, mu,
-                                                                   sigma);
+  const auto& y_val = value_of(y_col);
+  const auto& mu_val = value_of(mu_col);
+  const auto& sigma_val = value_of(sigma_col);
+
+  operands_and_partials<decltype(y_col), decltype(mu_col), decltype(sigma_col)>
+      ops_partials(y_col, mu_col, sigma_col);
 
   auto check_y_nonnegative
       = check_cl(function, "Random variable", y_val, "nonnegative");
diff --git a/stan/math/opencl/prim/neg_binomial_2_log_lpmf.hpp b/stan/math/opencl/prim/neg_binomial_2_log_lpmf.hpp
index cb600fb561c..91a8a80f612 100644
--- a/stan/math/opencl/prim/neg_binomial_2_log_lpmf.hpp
+++ b/stan/math/opencl/prim/neg_binomial_2_log_lpmf.hpp
@@ -60,8 +60,11 @@ neg_binomial_2_log_lpmf(const T_n_cl& n, const T_log_location_cl& eta,
     return 0.0;
   }
 
-  const auto& eta_val = value_of(eta);
-  const auto& phi_val = value_of(phi);
+  const auto& eta_col = as_column_vector_or_scalar(eta);
+  const auto& phi_col = as_column_vector_or_scalar(phi);
+
+  const auto& eta_val = value_of(eta_col);
+  const auto& phi_val = value_of(phi_col);
 
   auto check_n_nonnegative
       = check_cl(function, "Failures variable", n, "nonnegative");
@@ -105,8 +108,8 @@ neg_binomial_2_log_lpmf(const T_n_cl& n, const T_log_location_cl& eta,
 
   T_partials_return logp = sum(from_matrix_cl(logp_cl));
 
-  operands_and_partials<T_log_location_cl, T_precision_cl> ops_partials(eta,
-                                                                        phi);
+  operands_and_partials<decltype(eta_col), decltype(phi_col)> ops_partials(
+      eta_col, phi_col);
 
   if (!is_constant<T_log_location_cl>::value) {
     ops_partials.edge1_.partials_ = std::move(eta_deriv_cl);
diff --git a/stan/math/opencl/prim/neg_binomial_2_lpmf.hpp b/stan/math/opencl/prim/neg_binomial_2_lpmf.hpp
index d7bcae5cb8d..a64ad66115c 100644
--- a/stan/math/opencl/prim/neg_binomial_2_lpmf.hpp
+++ b/stan/math/opencl/prim/neg_binomial_2_lpmf.hpp
@@ -57,8 +57,11 @@ inline return_type_t<T_n_cl, T_location_cl, T_precision_cl> neg_binomial_2_lpmf(
     return 0.0;
   }
 
-  const auto& mu_val = value_of(mu);
-  const auto& phi_val = value_of(phi);
+  const auto& mu_col = as_column_vector_or_scalar(mu);
+  const auto& phi_col = as_column_vector_or_scalar(phi);
+
+  const auto& mu_val = value_of(mu_col);
+  const auto& phi_val = value_of(phi_col);
 
   auto check_n_nonnegative
       = check_cl(function, "Failures variable", n, "nonnegative");
@@ -103,7 +106,8 @@ inline return_type_t<T_n_cl, T_location_cl, T_precision_cl> neg_binomial_2_lpmf(
 
   T_partials_return logp = sum(from_matrix_cl(logp_cl));
 
-  operands_and_partials<T_location_cl, T_precision_cl> ops_partials(mu, phi);
+  operands_and_partials<decltype(mu_col), decltype(phi_col)> ops_partials(
+      mu_col, phi_col);
 
   if (!is_constant<T_location_cl>::value) {
     ops_partials.edge1_.partials_ = std::move(mu_deriv_cl);
diff --git a/stan/math/opencl/prim/neg_binomial_lpmf.hpp b/stan/math/opencl/prim/neg_binomial_lpmf.hpp
index b4705ad3f8f..9f65096c765 100644
--- a/stan/math/opencl/prim/neg_binomial_lpmf.hpp
+++ b/stan/math/opencl/prim/neg_binomial_lpmf.hpp
@@ -55,8 +55,11 @@ inline return_type_t<T_n_cl, T_shape_cl, T_inv_scale_cl> neg_binomial_lpmf(
     return 0.0;
   }
 
-  const auto& alpha_val = value_of(alpha);
-  const auto& beta_val = value_of(beta);
+  const auto& alpha_col = as_column_vector_or_scalar(alpha);
+  const auto& beta_col = as_column_vector_or_scalar(beta);
+
+  const auto& alpha_val = value_of(alpha_col);
+  const auto& beta_val = value_of(beta_col);
 
   auto check_n_nonnegative
       = check_cl(function, "Failures variable", n, "nonnegative");
@@ -98,7 +101,8 @@ inline return_type_t<T_n_cl, T_shape_cl, T_inv_scale_cl> neg_binomial_lpmf(
 
   T_partials_return logp = sum(from_matrix_cl(logp_cl));
 
-  operands_and_partials<T_shape_cl, T_inv_scale_cl> ops_partials(alpha, beta);
+  operands_and_partials<decltype(alpha_col), decltype(beta_col)> ops_partials(
+      alpha_col, beta_col);
 
   if (!is_constant<T_shape_cl>::value) {
     ops_partials.edge1_.partials_ = std::move(alpha_deriv_cl);
diff --git a/stan/math/opencl/prim/normal_lpdf.hpp b/stan/math/opencl/prim/normal_lpdf.hpp
index 771d7fdd8ff..dfe2f6ebf0a 100644
--- a/stan/math/opencl/prim/normal_lpdf.hpp
+++ b/stan/math/opencl/prim/normal_lpdf.hpp
@@ -54,9 +54,13 @@ inline return_type_t<T_y_cl, T_loc_cl, T_scale_cl> normal_lpdf(
     return 0.0;
   }
 
-  const auto& y_val = value_of(y);
-  const auto& mu_val = value_of(mu);
-  const auto& sigma_val = value_of(sigma);
+  const auto& y_col = as_column_vector_or_scalar(y);
+  const auto& mu_col = as_column_vector_or_scalar(mu);
+  const auto& sigma_col = as_column_vector_or_scalar(sigma);
+
+  const auto& y_val = value_of(y_col);
+  const auto& mu_val = value_of(mu_col);
+  const auto& sigma_val = value_of(sigma_col);
 
   auto check_y_not_nan
       = check_cl(function, "Random variable", y_val, "not NaN");
@@ -98,8 +102,8 @@ inline return_type_t<T_y_cl, T_loc_cl, T_scale_cl> normal_lpdf(
     logp += NEG_LOG_SQRT_TWO_PI * N;
   }
 
-  operands_and_partials<T_y_cl, T_loc_cl, T_scale_cl> ops_partials(y, mu,
-                                                                   sigma);
+  operands_and_partials<decltype(y_col), decltype(mu_col), decltype(sigma_col)>
+      ops_partials(y_col, mu_col, sigma_col);
 
   if (!is_constant<T_y_cl>::value) {
     ops_partials.edge1_.partials_ = std::move(y_deriv_cl);
diff --git a/stan/math/opencl/prim/pareto_lpdf.hpp b/stan/math/opencl/prim/pareto_lpdf.hpp
index 8cd0e062b04..27a7addb8d5 100644
--- a/stan/math/opencl/prim/pareto_lpdf.hpp
+++ b/stan/math/opencl/prim/pareto_lpdf.hpp
@@ -52,9 +52,13 @@ return_type_t<T_y_cl, T_scale_cl, T_shape_cl> pareto_lpdf(
     return 0.0;
   }
 
-  const auto& y_val = value_of(y);
-  const auto& y_min_val = value_of(y_min);
-  const auto& alpha_val = value_of(alpha);
+  const auto& y_col = as_column_vector_or_scalar(y);
+  const auto& y_min_col = as_column_vector_or_scalar(y_min);
+  const auto& alpha_col = as_column_vector_or_scalar(alpha);
+
+  const auto& y_val = value_of(y_col);
+  const auto& y_min_val = value_of(y_min_col);
+  const auto& alpha_val = value_of(alpha_col);
 
   auto check_y_not_nan
       = check_cl(function, "Random variable", y_val, "not NaN");
@@ -104,8 +108,9 @@ return_type_t<T_y_cl, T_scale_cl, T_shape_cl> pareto_lpdf(
 
   T_partials_return logp = sum(from_matrix_cl(logp_cl));
 
-  operands_and_partials<T_y_cl, T_scale_cl, T_shape_cl> ops_partials(y, y_min,
-                                                                     alpha);
+  operands_and_partials<decltype(y_col), decltype(y_min_col),
+                        decltype(alpha_col)>
+      ops_partials(y_col, y_min_col, alpha_col);
 
   if (!is_constant<T_y_cl>::value) {
     ops_partials.edge1_.partials_ = std::move(y_deriv_cl);
diff --git a/stan/math/opencl/prim/pareto_type_2_lpdf.hpp b/stan/math/opencl/prim/pareto_type_2_lpdf.hpp
index b07c017c664..5e567cfe2e0 100644
--- a/stan/math/opencl/prim/pareto_type_2_lpdf.hpp
+++ b/stan/math/opencl/prim/pareto_type_2_lpdf.hpp
@@ -57,10 +57,15 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl, T_shape_cl> pareto_type_2_lpdf(
     return 0.0;
   }
 
-  const auto& y_val = value_of(y);
-  const auto& mu_val = value_of(mu);
-  const auto& lambda_val = value_of(lambda);
-  const auto& alpha_val = value_of(alpha);
+  const auto& y_col = as_column_vector_or_scalar(y);
+  const auto& mu_col = as_column_vector_or_scalar(mu);
+  const auto& lambda_col = as_column_vector_or_scalar(lambda);
+  const auto& alpha_col = as_column_vector_or_scalar(alpha);
+
+  const auto& y_val = value_of(y_col);
+  const auto& mu_val = value_of(mu_col);
+  const auto& lambda_val = value_of(lambda_col);
+  const auto& alpha_val = value_of(alpha_col);
 
   auto y_minus_mu = y_val - mu_val;
   auto check_y_ge_mu
@@ -112,8 +117,9 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl, T_shape_cl> pareto_type_2_lpdf(
 
   T_partials_return logp = sum(from_matrix_cl(logp_cl));
 
-  operands_and_partials<T_y_cl, T_loc_cl, T_scale_cl, T_shape_cl> ops_partials(
-      y, mu, lambda, alpha);
+  operands_and_partials<decltype(y_col), decltype(mu_col), decltype(lambda_col),
+                        decltype(alpha_col)>
+      ops_partials(y_col, mu_col, lambda_col, alpha_col);
   if (!is_constant<T_y_cl>::value) {
     ops_partials.edge1_.partials_ = std::move(y_deriv_cl);
   }
diff --git a/stan/math/opencl/prim/poisson_log_lpmf.hpp b/stan/math/opencl/prim/poisson_log_lpmf.hpp
index 7a07bd95c01..515a311a690 100644
--- a/stan/math/opencl/prim/poisson_log_lpmf.hpp
+++ b/stan/math/opencl/prim/poisson_log_lpmf.hpp
@@ -47,10 +47,11 @@ return_type_t<T_log_rate_cl> poisson_log_lpmf(const T_n_cl& n,
     return 0.0;
   }
 
-  const auto& alpha_val = value_of(alpha);
+  const auto& alpha_col = as_column_vector_or_scalar(alpha);
+  const auto& alpha_val = value_of(alpha_col);
 
   T_partials_return logp(0.0);
-  operands_and_partials<T_log_rate_cl> ops_partials(alpha);
+  operands_and_partials<decltype(alpha_col)> ops_partials(alpha_col);
 
   auto check_n_nonnegative
       = check_cl(function, "Random variable", n, "nonnegative");
diff --git a/stan/math/opencl/prim/poisson_lpmf.hpp b/stan/math/opencl/prim/poisson_lpmf.hpp
index b9021cf8276..896ef4d2209 100644
--- a/stan/math/opencl/prim/poisson_lpmf.hpp
+++ b/stan/math/opencl/prim/poisson_lpmf.hpp
@@ -46,10 +46,11 @@ return_type_t<T_rate_cl> poisson_lpmf(const T_n_cl& n,
     return 0.0;
   }
 
-  const auto& lambda_val = value_of(lambda);
+  const auto& lambda_col = as_column_vector_or_scalar(lambda);
+  const auto& lambda_val = value_of(lambda_col);
 
   T_partials_return logp(0.0);
-  operands_and_partials<T_rate_cl> ops_partials(lambda);
+  operands_and_partials<decltype(lambda_col)> ops_partials(lambda_col);
 
   auto check_n_nonnegative
       = check_cl(function, "Random variable", n, "nonnegative");
diff --git a/stan/math/opencl/prim/rayleigh_lpdf.hpp b/stan/math/opencl/prim/rayleigh_lpdf.hpp
index 8045e55d610..5aa058b5d5c 100644
--- a/stan/math/opencl/prim/rayleigh_lpdf.hpp
+++ b/stan/math/opencl/prim/rayleigh_lpdf.hpp
@@ -43,10 +43,14 @@ return_type_t<T_y_cl, T_scale_cl> rayleigh_lpdf(const T_y_cl& y,
     return 0.0;
   }
 
-  const auto& y_val = value_of(y);
-  const auto& sigma_val = value_of(sigma);
+  const auto& y_col = as_column_vector_or_scalar(y);
+  const auto& sigma_col = as_column_vector_or_scalar(sigma);
 
-  operands_and_partials<T_y_cl, T_scale_cl> ops_partials(y, sigma);
+  const auto& y_val = value_of(y_col);
+  const auto& sigma_val = value_of(sigma_col);
+
+  operands_and_partials<decltype(y_col), decltype(sigma_col)> ops_partials(
+      y_col, sigma_col);
 
   auto check_y_positive
       = check_cl(function, "Random variable", y_val, "positive");
diff --git a/stan/math/opencl/prim/scaled_inv_chi_square_lpdf.hpp b/stan/math/opencl/prim/scaled_inv_chi_square_lpdf.hpp
index 2ed91fa1f21..502f54157b6 100644
--- a/stan/math/opencl/prim/scaled_inv_chi_square_lpdf.hpp
+++ b/stan/math/opencl/prim/scaled_inv_chi_square_lpdf.hpp
@@ -58,9 +58,13 @@ inline return_type_t<T_y_cl, T_dof_cl, T_scale_cl> scaled_inv_chi_square_lpdf(
     return 0.0;
   }
 
-  const auto& y_val = value_of(y);
-  const auto& nu_val = value_of(nu);
-  const auto& s_val = value_of(s);
+  const auto& y_col = as_column_vector_or_scalar(y);
+  const auto& nu_col = as_column_vector_or_scalar(nu);
+  const auto& s_col = as_column_vector_or_scalar(s);
+
+  const auto& y_val = value_of(y_col);
+  const auto& nu_val = value_of(nu_col);
+  const auto& s_val = value_of(s_col);
 
   auto check_y_not_nan
       = check_cl(function, "Random variable", y_val, "not NaN");
@@ -122,7 +126,8 @@ inline return_type_t<T_y_cl, T_dof_cl, T_scale_cl> scaled_inv_chi_square_lpdf(
 
   T_partials_return logp = sum(from_matrix_cl(logp_cl));
 
-  operands_and_partials<T_y_cl, T_dof_cl, T_scale_cl> ops_partials(y, nu, s);
+  operands_and_partials<decltype(y_col), decltype(nu_col), decltype(s_col)>
+      ops_partials(y_col, nu_col, s_col);
 
   if (!is_constant<T_y_cl>::value) {
     ops_partials.edge1_.partials_ = std::move(y_deriv_cl);
diff --git a/stan/math/opencl/prim/skew_normal_lpdf.hpp b/stan/math/opencl/prim/skew_normal_lpdf.hpp
index 477fab854bf..2d781ee151d 100644
--- a/stan/math/opencl/prim/skew_normal_lpdf.hpp
+++ b/stan/math/opencl/prim/skew_normal_lpdf.hpp
@@ -59,10 +59,15 @@ inline return_type_t<T_y_cl, T_loc_cl, T_scale_cl, T_shape_cl> skew_normal_lpdf(
     return 0.0;
   }
 
-  const auto& y_val = value_of(y);
-  const auto& mu_val = value_of(mu);
-  const auto& sigma_val = value_of(sigma);
-  const auto& alpha_val = value_of(alpha);
+  const auto& y_col = as_column_vector_or_scalar(y);
+  const auto& mu_col = as_column_vector_or_scalar(mu);
+  const auto& sigma_col = as_column_vector_or_scalar(sigma);
+  const auto& alpha_col = as_column_vector_or_scalar(alpha);
+
+  const auto& y_val = value_of(y_col);
+  const auto& mu_val = value_of(mu_col);
+  const auto& sigma_val = value_of(sigma_col);
+  const auto& alpha_val = value_of(alpha_col);
 
   auto check_y_not_nan
       = check_cl(function, "Random variable", y_val, "not NaN");
@@ -128,8 +133,9 @@ inline return_type_t<T_y_cl, T_loc_cl, T_scale_cl, T_shape_cl> skew_normal_lpdf(
     logp -= HALF_LOG_TWO_PI * N;
   }
 
-  operands_and_partials<T_y_cl, T_loc_cl, T_scale_cl, T_shape_cl> ops_partials(
-      y, mu, sigma, alpha);
+  operands_and_partials<decltype(y_col), decltype(mu_col), decltype(sigma_col),
+                        decltype(alpha_col)>
+      ops_partials(y_col, mu_col, sigma_col, alpha_col);
 
   if (!is_constant<T_y_cl>::value) {
     ops_partials.edge1_.partials_ = std::move(y_deriv_cl);
diff --git a/stan/math/opencl/prim/std_normal_lpdf.hpp b/stan/math/opencl/prim/std_normal_lpdf.hpp
index a02f86fcd85..b9782db683e 100644
--- a/stan/math/opencl/prim/std_normal_lpdf.hpp
+++ b/stan/math/opencl/prim/std_normal_lpdf.hpp
@@ -43,7 +43,8 @@ inline return_type_t<T_y_cl> std_normal_lpdf(const T_y_cl& y) {
     return 0.0;
   }
 
-  const auto& y_val = value_of(y);
+  const auto& y_col = as_column_vector_or_scalar(y);
+  const auto& y_val = value_of(y_col);
 
   auto check_y_not_nan
       = check_cl(function, "Random variable", y_val, "not NaN");
@@ -65,7 +66,7 @@ inline return_type_t<T_y_cl> std_normal_lpdf(const T_y_cl& y) {
     logp += NEG_LOG_SQRT_TWO_PI * N;
   }
 
-  operands_and_partials<T_y_cl> ops_partials(y);
+  operands_and_partials<decltype(y_col)> ops_partials(y_col);
 
   if (!is_constant<T_y_cl>::value) {
     ops_partials.edge1_.partials_ = std::move(y_deriv_cl);
diff --git a/stan/math/opencl/prim/student_t_lpdf.hpp b/stan/math/opencl/prim/student_t_lpdf.hpp
index d009095b4e2..f6877abf320 100644
--- a/stan/math/opencl/prim/student_t_lpdf.hpp
+++ b/stan/math/opencl/prim/student_t_lpdf.hpp
@@ -68,10 +68,15 @@ inline return_type_t<T_y_cl, T_dof_cl, T_loc_cl, T_scale_cl> student_t_lpdf(
     return 0.0;
   }
 
-  const auto& y_val = value_of(y);
-  const auto& nu_val = value_of(nu);
-  const auto& mu_val = value_of(mu);
-  const auto& sigma_val = value_of(sigma);
+  const auto& y_col = as_column_vector_or_scalar(y);
+  const auto& nu_col = as_column_vector_or_scalar(nu);
+  const auto& mu_col = as_column_vector_or_scalar(mu);
+  const auto& sigma_col = as_column_vector_or_scalar(sigma);
+
+  const auto& y_val = value_of(y_col);
+  const auto& nu_val = value_of(nu_col);
+  const auto& mu_val = value_of(mu_col);
+  const auto& sigma_val = value_of(sigma_col);
 
   auto check_y_not_nan
       = check_cl(function, "Random variable", y_val, "not NaN");
@@ -135,8 +140,9 @@ inline return_type_t<T_y_cl, T_dof_cl, T_loc_cl, T_scale_cl> student_t_lpdf(
     logp -= LOG_SQRT_PI * N;
   }
 
-  operands_and_partials<T_y_cl, T_dof_cl, T_loc_cl, T_scale_cl> ops_partials(
-      y, nu, mu, sigma);
+  operands_and_partials<decltype(y_col), decltype(nu_col), decltype(mu_col),
+                        decltype(sigma_col)>
+      ops_partials(y_col, nu_col, mu_col, sigma_col);
 
   if (!is_constant<T_y_cl>::value) {
     ops_partials.edge1_.partials_ = std::move(y_deriv_cl);
diff --git a/stan/math/opencl/prim/uniform_lpdf.hpp b/stan/math/opencl/prim/uniform_lpdf.hpp
index 96957a8af4b..2454e482ff2 100644
--- a/stan/math/opencl/prim/uniform_lpdf.hpp
+++ b/stan/math/opencl/prim/uniform_lpdf.hpp
@@ -57,9 +57,13 @@ inline return_type_t<T_y_cl, T_low_cl, T_high_cl> uniform_lpdf(
     return 0.0;
   }
 
-  const auto& y_val = value_of(y);
-  const auto& alpha_val = value_of(alpha);
-  const auto& beta_val = value_of(beta);
+  const auto& y_col = as_column_vector_or_scalar(y);
+  const auto& alpha_col = as_column_vector_or_scalar(alpha);
+  const auto& beta_col = as_column_vector_or_scalar(beta);
+
+  const auto& y_val = value_of(y_col);
+  const auto& alpha_val = value_of(alpha_col);
+  const auto& beta_val = value_of(beta_col);
 
   auto check_y_not_nan
       = check_cl(function, "Random variable", y_val, "not NaN");
@@ -106,8 +110,9 @@ inline return_type_t<T_y_cl, T_low_cl, T_high_cl> uniform_lpdf(
 
   T_partials_return logp = sum(from_matrix_cl(logp_cl));
 
-  operands_and_partials<T_y_cl, T_low_cl, T_high_cl> ops_partials(y, alpha,
-                                                                  beta);
+  operands_and_partials<decltype(y_col), decltype(alpha_col),
+                        decltype(beta_col)>
+      ops_partials(y_col, alpha_col, beta_col);
 
   if (!is_constant<T_low_cl>::value) {
     ops_partials.edge2_.partials_ = std::move(alpha_deriv_cl);
diff --git a/stan/math/opencl/prim/weibull_lpdf.hpp b/stan/math/opencl/prim/weibull_lpdf.hpp
index dd03117d80b..00972c90fb9 100644
--- a/stan/math/opencl/prim/weibull_lpdf.hpp
+++ b/stan/math/opencl/prim/weibull_lpdf.hpp
@@ -49,9 +49,13 @@ inline return_type_t<T_y_cl, T_shape_cl, T_scale_cl> weibull_lpdf(
     return 0.0;
   }
 
-  const auto& y_val = value_of(y);
-  const auto& alpha_val = value_of(alpha);
-  const auto& sigma_val = value_of(sigma);
+  const auto& y_col = as_column_vector_or_scalar(y);
+  const auto& alpha_col = as_column_vector_or_scalar(alpha);
+  const auto& sigma_col = as_column_vector_or_scalar(sigma);
+
+  const auto& y_val = value_of(y_col);
+  const auto& alpha_val = value_of(alpha_col);
+  const auto& sigma_val = value_of(sigma_col);
 
   auto check_y_finite = check_cl(function, "Random variable", y_val, "finite");
   auto y_finite = isfinite(y_val);
@@ -108,8 +112,9 @@ inline return_type_t<T_y_cl, T_shape_cl, T_scale_cl> weibull_lpdf(
 
   T_partials_return logp = sum(from_matrix_cl(logp_cl));
 
-  operands_and_partials<T_y_cl, T_shape_cl, T_scale_cl> ops_partials(y, alpha,
-                                                                     sigma);
+  operands_and_partials<decltype(y_col), decltype(alpha_col),
+                        decltype(sigma_col)>
+      ops_partials(y_col, alpha_col, sigma_col);
 
   if (!is_constant<T_y_cl>::value) {
     ops_partials.edge1_.partials_ = std::move(y_deriv_cl);
diff --git a/stan/math/opencl/rev.hpp b/stan/math/opencl/rev.hpp
index f857827d3a0..020ea103250 100644
--- a/stan/math/opencl/rev.hpp
+++ b/stan/math/opencl/rev.hpp
@@ -3,6 +3,7 @@
 #ifdef STAN_OPENCL
 
 #include <stan/math/opencl/prim.hpp>
+#include <stan/math/opencl/rev/as_column_vector_or_scalar.hpp>
 #include <stan/math/opencl/rev/acos.hpp>
 #include <stan/math/opencl/rev/acosh.hpp>
 #include <stan/math/opencl/rev/add.hpp>
diff --git a/stan/math/opencl/rev/as_column_vector_or_scalar.hpp b/stan/math/opencl/rev/as_column_vector_or_scalar.hpp
new file mode 100644
index 00000000000..ef471770ed2
--- /dev/null
+++ b/stan/math/opencl/rev/as_column_vector_or_scalar.hpp
@@ -0,0 +1,30 @@
+#ifndef STAN_MATH_OPENCL_REV_AS_COLUMN_VECTOR_OR_SCALAR_HPP
+#define STAN_MATH_OPENCL_REV_AS_COLUMN_VECTOR_OR_SCALAR_HPP
+#ifdef STAN_OPENCL
+
+#include <stan/math/opencl/kernel_generator.hpp>
+#include <stan/math/prim/err.hpp>
+#include <stan/math/rev/core.hpp>
+
+namespace stan {
+namespace math {
+
+/**
+ * Converts kernel generator expression row or column vector to a column vector.
+ *
+ * @tparam T kernel generator expression.
+ * @param m Specified input.
+ * @return input converted to a column vector.
+ */
+template <typename T,
+          require_all_nonscalar_prim_or_rev_kernel_expression_t<T>* = nullptr,
+          require_any_var_t<T>* = nullptr>
+inline auto as_column_vector_or_scalar(const T& m) {
+  return m.as_column_vector_or_scalar();
+}
+
+}  // namespace math
+}  // namespace stan
+
+#endif
+#endif
diff --git a/stan/math/opencl/rev/operands_and_partials.hpp b/stan/math/opencl/rev/operands_and_partials.hpp
index e2ca34f6d25..3f6deded19e 100644
--- a/stan/math/opencl/rev/operands_and_partials.hpp
+++ b/stan/math/opencl/rev/operands_and_partials.hpp
@@ -14,7 +14,8 @@ namespace math {
 namespace internal {
 
 template <typename Op>
-class ops_partials_edge<double, var_value<Op>, require_matrix_cl_t<Op>> {
+class ops_partials_edge<double, var_value<Op>,
+                        require_kernel_expression_lhs_t<Op>> {
  public:
   using partials_t = plain_type_t<Op>;
   partials_t partials_;                       // For univariate use-cases
diff --git a/stan/math/opencl/rev/vari.hpp b/stan/math/opencl/rev/vari.hpp
index bc0de7edbe0..78c20a5fcfe 100644
--- a/stan/math/opencl/rev/vari.hpp
+++ b/stan/math/opencl/rev/vari.hpp
@@ -85,6 +85,17 @@ class vari_cl_base : public vari_base {
                                                     std::move(adj_t));
   }
 
+  /**
+   * Returns column vector view into the row or column vector.
+   * @return column vector view
+   */
+  auto as_column_vector_or_scalar() {
+    auto&& val_t = stan::math::as_column_vector_or_scalar(val_);
+    auto&& adj_t = stan::math::as_column_vector_or_scalar(adj_);
+    return vari_view<std::decay_t<decltype(val_t)>>(std::move(val_t),
+                                                    std::move(adj_t));
+  }
+
   /**
    * Return the number of rows for this class's `val_` member
    */
diff --git a/stan/math/prim/err/check_vector.hpp b/stan/math/prim/err/check_vector.hpp
index ebdaf588b6f..5cee97faeb5 100644
--- a/stan/math/prim/err/check_vector.hpp
+++ b/stan/math/prim/err/check_vector.hpp
@@ -8,6 +8,10 @@
 #include <string>
 #include <typeinfo>
 
+#ifdef STAN_OPENCL
+#include <stan/math/opencl/value_type.hpp>
+#endif
+
 namespace stan {
 namespace math {
 
diff --git a/stan/math/prim/fun/cholesky_decompose.hpp b/stan/math/prim/fun/cholesky_decompose.hpp
index 1d06463c3ef..bf2883b5a10 100644
--- a/stan/math/prim/fun/cholesky_decompose.hpp
+++ b/stan/math/prim/fun/cholesky_decompose.hpp
@@ -4,9 +4,6 @@
 #include <stan/math/prim/meta.hpp>
 #include <stan/math/prim/err.hpp>
 #include <stan/math/prim/fun/Eigen.hpp>
-#ifdef STAN_OPENCL
-#include <stan/math/opencl/prim.hpp>
-#endif
 
 #include <cmath>
 
@@ -30,7 +27,6 @@ namespace math {
  *   if m is not positive definite (if m has more than 0 elements)
  */
 template <typename EigMat, require_eigen_t<EigMat>* = nullptr,
-          require_not_vt_same<double, EigMat>* = nullptr,
           require_not_eigen_vt<is_var, EigMat>* = nullptr>
 inline Eigen::Matrix<value_type_t<EigMat>, EigMat::RowsAtCompileTime,
                      EigMat::ColsAtCompileTime>
@@ -45,51 +41,6 @@ cholesky_decompose(const EigMat& m) {
   return llt.matrixL();
 }
 
-/**
- * Return the lower-triangular Cholesky factor (i.e., matrix
- * square root) of the specified square, symmetric matrix.  The return
- * value \f$L\f$ will be a lower-triangular matrix such that the
- * original matrix \f$A\f$ is given by
- * <p>\f$A = L \times L^EigMat\f$.
- *
- * @tparam EigMat type of the matrix (must be derived from \c Eigen::MatrixBase)
- * @param m Symmetric matrix.
- * @return Square root of matrix.
- * @note Because OpenCL only works on doubles there are two
- * <code>cholesky_decompose</code> functions. One that works on doubles
- * (this one) and another that works on all other types.
- * @throw std::domain_error if m is not a symmetric matrix or
- *   if m is not positive definite (if m has more than 0 elements)
- */
-template <typename EigMat, require_eigen_t<EigMat>* = nullptr,
-          require_vt_same<double, EigMat>* = nullptr>
-inline Eigen::Matrix<double, EigMat::RowsAtCompileTime,
-                     EigMat::ColsAtCompileTime>
-cholesky_decompose(const EigMat& m) {
-  const eval_return_type_t<EigMat>& m_eval = m.eval();
-  check_not_nan("cholesky_decompose", "m", m_eval);
-#ifdef STAN_OPENCL
-  if (m.rows() >= opencl_context.tuning_opts().cholesky_size_worth_transfer) {
-    matrix_cl<double> m_cl(m_eval);
-    return from_matrix_cl(cholesky_decompose(m_cl));
-  } else {
-    check_symmetric("cholesky_decompose", "m", m_eval);
-    Eigen::LLT<Eigen::Matrix<double, EigMat::RowsAtCompileTime,
-                             EigMat::ColsAtCompileTime>>
-        llt = m_eval.llt();
-    check_pos_definite("cholesky_decompose", "m", llt);
-    return llt.matrixL();
-  }
-#else
-  check_symmetric("cholesky_decompose", "m", m_eval);
-  Eigen::LLT<Eigen::Matrix<double, EigMat::RowsAtCompileTime,
-                           EigMat::ColsAtCompileTime>>
-      llt = m_eval.llt();
-  check_pos_definite("cholesky_decompose", "m", llt);
-  return llt.matrixL();
-#endif
-}
-
 }  // namespace math
 }  // namespace stan
 
diff --git a/stan/math/prim/fun/gp_exp_quad_cov.hpp b/stan/math/prim/fun/gp_exp_quad_cov.hpp
index 932a14c87d6..57cdd623cc9 100644
--- a/stan/math/prim/fun/gp_exp_quad_cov.hpp
+++ b/stan/math/prim/fun/gp_exp_quad_cov.hpp
@@ -8,10 +8,6 @@
 #include <stan/math/prim/fun/squared_distance.hpp>
 #include <stan/math/prim/fun/exp.hpp>
 #include <stan/math/prim/fun/square.hpp>
-#ifdef STAN_OPENCL
-#include <stan/math/opencl/prim.hpp>
-#endif
-
 #include <cmath>
 #include <type_traits>
 #include <vector>
@@ -338,308 +334,6 @@ gp_exp_quad_cov(const std::vector<Eigen::Matrix<T_x1, -1, 1>> &x1,
   return cov;
 }
 
-#ifdef STAN_OPENCL
-/**
- * Returns a squared exponential kernel.
- *
- * @param x std::vector of scalars that can be used in square distance.
- * @param sigma marginal standard deviation or magnitude
- * @param length_scale length scale
- * @return squared distance
- * @throw std::domain_error if sigma <= 0, l <= 0, or
- *   x is nan or infinite
- */
-template <>
-inline Eigen::MatrixXd gp_exp_quad_cov(const std::vector<double> &x,
-                                       const double &sigma,
-                                       const double &length_scale) {
-  const char *function_name = "gp_exp_quad_cov";
-  check_positive(function_name, "magnitude", sigma);
-  check_positive(function_name, "length scale", length_scale);
-
-  const auto x_size = x.size();
-  Eigen::MatrixXd cov(x_size, x_size);
-  if (x_size == 0) {
-    return cov;
-  }
-  const auto total_size = x_size + cov.size();
-  if (total_size < opencl_context.tuning_opts().gp_exp_quad_cov_simple) {
-    for (size_t n = 0; n < x_size; ++n) {
-      check_not_nan("gp_exp_quad_cov", "x", x[n]);
-    }
-
-    cov = internal::gp_exp_quad_cov(x, square(sigma),
-                                    -0.5 / square(length_scale));
-    return cov;
-  }
-
-  matrix_cl<double> x_cl(x, 1, x.size());
-  check_nan(function_name, "x", x_cl);
-  matrix_cl<double> cov_cl = gp_exp_quad_cov(x_cl, sigma, length_scale);
-  cov = from_matrix_cl(cov_cl);
-
-  return cov;
-}
-
-/**
- * Returns a squared exponential kernel.
- *
- * @param x std::vector of scalars that can be used in square distance.
- *    This function assumes each element of x is the same size.
- * @param sigma marginal standard deviation or magnitude
- * @param length_scale length scale
- * @return squared distance
- * @throw std::domain_error if sigma <= 0, l <= 0, or
- *   x is nan or infinite
- */
-template <>
-inline Eigen::MatrixXd gp_exp_quad_cov(const std::vector<Eigen::VectorXd> &x,
-                                       const double &sigma,
-                                       const double &length_scale) {
-  const char *function_name = "gp_exp_quad_cov";
-  check_positive(function_name, "magnitude", sigma);
-  check_positive(function_name, "length scale", length_scale);
-
-  const size_t x_size = x.size();
-  Eigen::MatrixXd cov(x_size, x_size);
-
-  if (x_size == 0) {
-    return cov;
-  }
-
-  const size_t inner_x1_size = x[0].size();
-  const auto total_size = x_size * inner_x1_size + cov.size();
-  if (total_size < opencl_context.tuning_opts().gp_exp_quad_cov_complex) {
-    for (size_t i = 0; i < x_size; ++i) {
-      check_not_nan("gp_exp_quad_cov", "x", x[i]);
-    }
-    cov = internal::gp_exp_quad_cov(x, square(sigma),
-                                    -0.5 / square(length_scale));
-    return cov;
-  }
-
-  matrix_cl<double> x_cl(x);
-  check_nan(function_name, "x", x_cl);
-  matrix_cl<double> cov_cl = gp_exp_quad_cov(x_cl, sigma, length_scale);
-  cov = from_matrix_cl(cov_cl);
-
-  return cov;
-}
-
-/**
- * Returns a squared exponential kernel.
- *
- * @param x std::vector of Eigen vectors of scalars.
- * @param sigma marginal standard deviation or magnitude
- * @param length_scale std::vector length scale
- * @return squared distance
- * @throw std::domain_error if sigma <= 0, l <= 0, or
- *   x is nan or infinite
- */
-template <>
-inline Eigen::MatrixXd gp_exp_quad_cov(
-    const std::vector<Eigen::VectorXd> &x, const double &sigma,
-    const std::vector<double> &length_scale) {
-  const char *function_name = "gp_exp_quad_cov";
-  check_positive_finite(function_name, "magnitude", sigma);
-  check_positive_finite(function_name, "length scale", length_scale);
-
-  const size_t x_size = x.size();
-  Eigen::MatrixXd cov(x_size, x_size);
-
-  if (x_size == 0) {
-    return cov;
-  }
-
-  const size_t inner_x1_size = x[0].size();
-  check_size_match(function_name, "x dimension", inner_x1_size,
-                   "number of length scales", length_scale.size());
-  const auto total_size = x_size * inner_x1_size + inner_x1_size + cov.size();
-  if (total_size < opencl_context.tuning_opts().gp_exp_quad_cov_complex) {
-    return internal::gp_exp_quad_cov(divide_columns(x, length_scale),
-                                     square(sigma));
-  }
-
-  matrix_cl<double> x_cl(x);
-  check_nan(function_name, "x", x_cl);
-  matrix_cl<double> length_scale_cl(length_scale, length_scale.size(), 1);
-  divide_columns(x_cl, length_scale_cl);
-  matrix_cl<double> cov_cl = gp_exp_quad_cov(x_cl, sigma, 1);
-  cov = from_matrix_cl(cov_cl);
-  return cov;
-}
-
-/**
- * Returns a squared exponential kernel.
- *
- * This function is for the cross covariance matrix
- * needed to compute posterior predictive density.
- *
- * @param x1 std::vector of elements that can be used in square distance
- * @param x2 std::vector of elements that can be used in square distance
- * @param sigma standard deviation
- * @param length_scale length scale
- * @return squared distance
- * @throw std::domain_error if sigma <= 0, l <= 0, or
- *   x is nan or infinite
- */
-template <>
-inline typename Eigen::MatrixXd gp_exp_quad_cov(const std::vector<double> &x1,
-                                                const std::vector<double> &x2,
-                                                const double &sigma,
-                                                const double &length_scale) {
-  const char *function_name = "gp_exp_quad_cov";
-  check_positive_finite(function_name, "magnitude", sigma);
-  check_positive_finite(function_name, "length scale", length_scale);
-
-  Eigen::MatrixXd cov(x1.size(), x2.size());
-  if (x1.size() == 0 || x1.size() == 0) {
-    return cov;
-  }
-  const auto total_size = x1.size() + x2.size() + cov.size();
-  if (total_size < opencl_context.tuning_opts().gp_exp_quad_cov_simple) {
-    for (size_t i = 0; i < x1.size(); ++i) {
-      check_not_nan(function_name, "x1", x1[i]);
-    }
-    for (size_t i = 0; i < x2.size(); ++i) {
-      check_not_nan(function_name, "x2", x2[i]);
-    }
-
-    cov = internal::gp_exp_quad_cov(x1, x2, square(sigma),
-                                    -0.5 / square(length_scale));
-    return cov;
-  }
-
-  matrix_cl<double> x1_cl(x1, 1, x1.size());
-  check_nan(function_name, "x1", x1_cl);
-  matrix_cl<double> x2_cl(x2, 1, x2.size());
-  check_nan(function_name, "x2", x2_cl);
-  matrix_cl<double> cov_cl = gp_exp_quad_cov(x1_cl, x2_cl, sigma, length_scale);
-  cov = from_matrix_cl(cov_cl);
-  return cov;
-}
-
-/**
- * Returns a squared exponential kernel.
- *
- * This function is for the cross covariance matrix
- * needed to compute posterior predictive density.
- *
- * @param x1 std::vector of elements that can be used in square distance
- * @param x2 std::vector of elements that can be used in square distance
- *    This function assumes each element of x1 and x2 are the same size.
- * @param sigma standard deviation
- * @param length_scale length scale
- * @return squared distance
- * @throw std::domain_error if sigma <= 0, l <= 0, or
- *   x is nan or infinite
- */
-template <>
-inline typename Eigen::MatrixXd gp_exp_quad_cov(
-    const std::vector<Eigen::VectorXd> &x1,
-    const std::vector<Eigen::VectorXd> &x2, const double &sigma,
-    const double &length_scale) {
-  const char *function_name = "gp_exp_quad_cov";
-  const int x1_size = x1.size();
-  const int x2_size = x2.size();
-  check_positive_finite(function_name, "magnitude", sigma);
-  check_positive_finite(function_name, "length scale", length_scale);
-
-  Eigen::MatrixXd cov(x1.size(), x2.size());
-  if (x1.size() == 0 || x1.size() == 0) {
-    return cov;
-  }
-
-  const int x1_inner_size = x1[0].size();
-  const int x2_inner_size = x1[0].size();
-  const auto total_size
-      = x1_size * x1_inner_size + x2_size * x2_inner_size + cov.size();
-  if (total_size < opencl_context.tuning_opts().gp_exp_quad_cov_complex) {
-    for (size_t i = 0; i < x1.size(); ++i) {
-      check_not_nan(function_name, "x1", x1[i]);
-    }
-    for (size_t i = 0; i < x2.size(); ++i) {
-      check_not_nan(function_name, "x2", x2[i]);
-    }
-
-    cov = internal::gp_exp_quad_cov(x1, x2, square(sigma),
-                                    -0.5 / square(length_scale));
-    return cov;
-  }
-
-  matrix_cl<double> x1_cl(x1);
-  check_nan(function_name, "x1", x1_cl);
-  matrix_cl<double> x2_cl(x2);
-  check_nan(function_name, "x2", x2_cl);
-  matrix_cl<double> cov_cl = gp_exp_quad_cov(x1_cl, x2_cl, sigma, length_scale);
-  cov = from_matrix_cl(cov_cl);
-  return cov;
-}
-
-/**
- * Returns a squared exponential kernel.
- *
- * This function is for the cross covariance
- * matrix needed to compute the posterior predictive density.
- *
- * @param x1 std::vector of Eigen vectors of scalars.
- * @param x2 std::vector of Eigen vectors of scalars.
- * @param sigma standard deviation
- * @param length_scale std::vector of length scale
- * @return squared distance
- * @throw std::domain_error if sigma <= 0, l <= 0, or
- *   x is nan or infinite
- */
-template <>
-inline typename Eigen::MatrixXd gp_exp_quad_cov(
-    const std::vector<Eigen::VectorXd> &x1,
-    const std::vector<Eigen::VectorXd> &x2, const double &sigma,
-    const std::vector<double> &length_scale) {
-  size_t x1_size = x1.size();
-  size_t x2_size = x2.size();
-  size_t l_size = length_scale.size();
-
-  Eigen::MatrixXd cov(x1_size, x2_size);
-  if (x1_size == 0 || x2_size == 0) {
-    return cov;
-  }
-
-  const int x1_inner_size = x1[0].size();
-  const int x2_inner_size = x1[0].size();
-  const char *function_name = "gp_exp_quad_cov";
-  check_positive_finite(function_name, "magnitude", sigma);
-  check_positive_finite(function_name, "length scale", length_scale);
-  check_size_match(function_name, "x dimension", x1[0].size(),
-                   "number of length scales", l_size);
-  check_size_match(function_name, "x dimension", x2[0].size(),
-                   "number of length scales", l_size);
-  const auto total_size
-      = x1_size * x1_inner_size + x2_size * x2_inner_size + l_size + cov.size();
-  if (total_size < opencl_context.tuning_opts().gp_exp_quad_cov_complex) {
-    for (size_t i = 0; i < x1_size; ++i) {
-      check_not_nan(function_name, "x1", x1[i]);
-    }
-    for (size_t i = 0; i < x2_size; ++i) {
-      check_not_nan(function_name, "x1", x2[i]);
-    }
-    cov = internal::gp_exp_quad_cov(divide_columns(x1, length_scale),
-                                    divide_columns(x2, length_scale),
-                                    square(sigma));
-    return cov;
-  }
-  matrix_cl<double> x1_cl(x1);
-  check_nan(function_name, "x1", x1_cl);
-  matrix_cl<double> length_scale_cl(length_scale, length_scale.size(), 1);
-  divide_columns(x1_cl, length_scale_cl);
-  matrix_cl<double> x2_cl(x2);
-  check_nan(function_name, "x2", x2_cl);
-  divide_columns(x2_cl, length_scale_cl);
-  matrix_cl<double> cov_cl = gp_exp_quad_cov(x1_cl, x2_cl, sigma, 1);
-  cov = from_matrix_cl(cov_cl);
-  return cov;
-}
-#endif
-
 }  // namespace math
 }  // namespace stan
 #endif
diff --git a/stan/math/prim/fun/mdivide_left_tri.hpp b/stan/math/prim/fun/mdivide_left_tri.hpp
index 15309e513d0..b6560f36f72 100644
--- a/stan/math/prim/fun/mdivide_left_tri.hpp
+++ b/stan/math/prim/fun/mdivide_left_tri.hpp
@@ -5,9 +5,6 @@
 #include <stan/math/prim/err.hpp>
 #include <stan/math/prim/fun/Eigen.hpp>
 #include <stan/math/prim/fun/to_ref.hpp>
-#ifdef STAN_OPENCL
-#include <stan/math/opencl/prim.hpp>
-#endif
 
 namespace stan {
 namespace math {
@@ -28,7 +25,6 @@ namespace math {
  */
 template <Eigen::UpLoType TriView, typename T1, typename T2,
           require_all_eigen_t<T1, T2> * = nullptr,
-          require_any_not_vt_same<double, T1, T2> * = nullptr,
           require_all_not_eigen_vt<is_var, T1, T2> * = nullptr>
 inline Eigen::Matrix<return_type_t<T1, T2>, T1::RowsAtCompileTime,
                      T2::ColsAtCompileTime>
@@ -55,8 +51,7 @@ mdivide_left_tri(const T1 &A, const T2 &b) {
  * @return x = A^-1 .
  * @throws std::domain_error if A is not square
  */
-template <Eigen::UpLoType TriView, typename T, require_eigen_t<T> * = nullptr,
-          require_not_vt_same<double, T> * = nullptr>
+template <Eigen::UpLoType TriView, typename T, require_eigen_t<T> * = nullptr>
 inline plain_type_t<T> mdivide_left_tri(const T &A) {
   check_square("mdivide_left_tri", "A", A);
   if (A.rows() == 0) {
@@ -69,85 +64,6 @@ inline plain_type_t<T> mdivide_left_tri(const T &A) {
   return b;
 }
 
-/**
- * Returns the solution of the system Ax=b when A is triangular
- * and A and b are matrices of doubles.
- *
- * @tparam TriView Specifies whether A is upper (Eigen::Upper)
- * or lower triangular (Eigen::Lower).
- * @tparam T1 type of the triangular matrix
- * @tparam T2 type of the right-hand side matrix or vector
- *
- * @param A Triangular matrix.
- * @param b Right hand side matrix or vector.
- * @return x = A^-1 b, solution of the linear system.
- * @throws std::domain_error if A is not square or the rows of b don't
- * match the size of A.
- */
-template <Eigen::UpLoType TriView, typename T1, typename T2,
-          require_all_eigen_t<T1, T2> * = nullptr,
-          require_all_vt_same<double, T1, T2> * = nullptr>
-inline Eigen::Matrix<double, T1::RowsAtCompileTime, T2::ColsAtCompileTime>
-mdivide_left_tri(const T1 &A, const T2 &b) {
-  check_square("mdivide_left_tri", "A", A);
-  check_multiplicable("mdivide_left_tri", "A", A, "b", b);
-  if (A.rows() == 0) {
-    return {0, b.cols()};
-  }
-
-#ifdef STAN_OPENCL
-  if (A.rows()
-      >= opencl_context.tuning_opts().tri_inverse_size_worth_transfer) {
-    matrix_cl<double> A_cl(A, from_eigen_uplo_type(TriView));
-    matrix_cl<double> b_cl(b);
-    matrix_cl<double> A_inv_cl = tri_inverse(A_cl);
-    matrix_cl<double> C_cl = A_inv_cl * b_cl;
-    return from_matrix_cl(C_cl);
-  } else {
-#endif
-    return to_ref(A).template triangularView<TriView>().solve(b);
-#ifdef STAN_OPENCL
-  }
-#endif
-}
-
-/**
- * Returns the solution of the system Ax=b when A is triangular, b=I and
- * both are matrices of doubles.
- *
- * @tparam TriView Specifies whether A is upper (Eigen::Upper)
- * or lower triangular (Eigen::Lower).
- * @tparam T type of the matrix
- *
- * @param A Triangular matrix.
- * @return x = A^-1 .
- * @throws std::domain_error if A is not square
- */
-template <Eigen::UpLoType TriView, typename T, require_eigen_t<T> * = nullptr,
-          require_vt_same<double, T> * = nullptr>
-inline plain_type_t<T> mdivide_left_tri(const T &A) {
-  check_square("mdivide_left_tri", "A", A);
-  if (A.rows() == 0) {
-    return {};
-  }
-
-  const int n = A.rows();
-#ifdef STAN_OPENCL
-  if (A.rows()
-      >= opencl_context.tuning_opts().tri_inverse_size_worth_transfer) {
-    matrix_cl<double> A_cl(A, from_eigen_uplo_type(TriView));
-    A_cl = tri_inverse(A_cl);
-    return from_matrix_cl(A_cl);
-  } else {
-#endif
-    plain_type_t<T> b = plain_type_t<T>::Identity(n, n);
-    A.template triangularView<TriView>().solveInPlace(b);
-    return b;
-#ifdef STAN_OPENCL
-  }
-#endif
-}
-
 }  // namespace math
 }  // namespace stan
 
diff --git a/stan/math/prim/fun/mdivide_right_tri.hpp b/stan/math/prim/fun/mdivide_right_tri.hpp
index 8f12116d785..479f9e1417a 100644
--- a/stan/math/prim/fun/mdivide_right_tri.hpp
+++ b/stan/math/prim/fun/mdivide_right_tri.hpp
@@ -5,9 +5,6 @@
 #include <stan/math/prim/err.hpp>
 #include <stan/math/prim/fun/Eigen.hpp>
 #include <stan/math/prim/fun/to_ref.hpp>
-#ifdef STAN_OPENCL
-#include <stan/math/opencl/prim.hpp>
-#endif
 
 namespace stan {
 namespace math {
@@ -28,8 +25,7 @@ namespace math {
  * match the size of A.
  */
 template <Eigen::UpLoType TriView, typename EigMat1, typename EigMat2,
-          require_all_eigen_t<EigMat1, EigMat2>* = nullptr,
-          require_any_not_st_same<double, EigMat1, EigMat2>* = nullptr>
+          require_all_eigen_t<EigMat1, EigMat2>* = nullptr>
 inline Eigen::Matrix<return_type_t<EigMat1, EigMat2>,
                      EigMat1::RowsAtCompileTime, EigMat2::ColsAtCompileTime>
 mdivide_right_tri(const EigMat1& b, const EigMat2& A) {
@@ -57,54 +53,6 @@ mdivide_right_tri(const EigMat1& b, const EigMat2& A) {
       .transpose();
 }
 
-/**
- * Returns the solution of the system xA=b when A is triangular
- * and A and b are matrices of doubles.
- *
- * @tparam TriView Specifies whether A is upper (Eigen::Upper)
- * or lower triangular (Eigen::Lower).
- * @tparam EigMat1 type of the right-hand side matrix or vector
- * @tparam EigMat2 type of the triangular matrix
- *
- * @param A Triangular matrix.  Specify upper or lower with TriView
- * being Eigen::Upper or Eigen::Lower.
- * @param b Right hand side matrix or vector.
- * @return x = b A^-1, solution of the linear system.
- * @throws std::domain_error if A is not square or the rows of b don't
- * match the size of A.
- */
-template <Eigen::UpLoType TriView, typename EigMat1, typename EigMat2,
-          require_all_eigen_t<EigMat1, EigMat2>* = nullptr,
-          require_all_st_same<double, EigMat1, EigMat2>* = nullptr>
-inline Eigen::Matrix<double, EigMat1::RowsAtCompileTime,
-                     EigMat2::ColsAtCompileTime>
-mdivide_right_tri(const EigMat1& b, const EigMat2& A) {
-  check_square("mdivide_right_tri", "A", A);
-  check_multiplicable("mdivide_right_tri", "b", b, "A", A);
-  if (A.rows() == 0) {
-    return {b.rows(), 0};
-  }
-
-#ifdef STAN_OPENCL
-  if (A.rows()
-      >= opencl_context.tuning_opts().tri_inverse_size_worth_transfer) {
-    matrix_cl<double> A_cl(A, from_eigen_uplo_type(TriView));
-    matrix_cl<double> b_cl(b);
-    matrix_cl<double> A_inv_cl = tri_inverse(A_cl);
-    matrix_cl<double> C_cl = b_cl * A_inv_cl;
-    return from_matrix_cl(C_cl);
-  } else {
-#endif
-    return to_ref(A)
-        .template triangularView<TriView>()
-        .transpose()
-        .solve(b.transpose())
-        .transpose();
-#ifdef STAN_OPENCL
-  }
-#endif
-}
-
 }  // namespace math
 }  // namespace stan
 
diff --git a/stan/math/prim/fun/multiply.hpp b/stan/math/prim/fun/multiply.hpp
index 6c917985bc7..92fc6b91cb2 100644
--- a/stan/math/prim/fun/multiply.hpp
+++ b/stan/math/prim/fun/multiply.hpp
@@ -5,9 +5,6 @@
 #include <stan/math/prim/err.hpp>
 #include <stan/math/prim/fun/Eigen.hpp>
 #include <stan/math/prim/fun/dot_product.hpp>
-#ifdef STAN_OPENCL
-#include <stan/math/opencl/prim.hpp>
-#endif
 #include <type_traits>
 
 namespace stan {
@@ -71,8 +68,6 @@ inline auto multiply(Scal c, const Mat& m) {
  */
 template <typename Mat1, typename Mat2,
           require_all_eigen_vt<std::is_arithmetic, Mat1, Mat2>* = nullptr,
-          require_any_not_same_t<double, value_type_t<Mat1>,
-                                 value_type_t<Mat2>>* = nullptr,
           require_not_eigen_row_and_col_t<Mat1, Mat2>* = nullptr>
 inline auto multiply(const Mat1& m1, const Mat2& m2) {
   check_size_match("multiply", "Columns of m1", m1.cols(), "Rows of m2",
@@ -84,46 +79,6 @@ inline auto multiply(const Mat1& m1, const Mat2& m2) {
 #endif
 }
 
-/**
- * Return the product of the specified matrices. The number of
- * columns in the first matrix must be the same as the number of rows
- * in the second matrix. If scalar of matrices is \c double OpenCL
- * implementation can be used.
- *
- * @tparam Mat1 type of the first matrix or expression
- * @tparam Mat2 type of the second matrix or expression
- *
- * @param m1 first matrix or expression
- * @param m2 second matrix or expression
- * @return the product of the first and second matrices
- * @throw <code>std::invalid_argument</code> if the number of columns of m1 does
- * not match the number of rows of m2.
- */
-template <typename Mat1, typename Mat2,
-          require_all_eigen_t<Mat1, Mat2>* = nullptr,
-          require_all_same_t<double, value_type_t<Mat1>,
-                             value_type_t<Mat2>>* = nullptr,
-          require_not_eigen_row_and_col_t<Mat1, Mat2>* = nullptr>
-inline auto multiply(const Mat1& m1, const Mat2& m2)
-    -> decltype((m1 * m2).eval()) {
-  check_multiplicable("multiply", "m1", m1, "m2", m2);
-
-#ifdef STAN_OPENCL
-  if (m1.rows() * m1.cols() * m2.cols()
-      > opencl_context.tuning_opts().multiply_dim_prod_worth_transfer) {
-    matrix_cl<double> m1_cl(m1);
-    matrix_cl<double> m2_cl(m2);
-    matrix_cl<double> m3_cl = m1_cl * m2_cl;
-    return from_matrix_cl<Mat1::RowsAtCompileTime, Mat2::ColsAtCompileTime>(
-        m3_cl);
-  } else {
-    return (m1 * m2).eval();
-  }
-#else
-  return (m1 * m2).eval();
-#endif
-}
-
 /**
  * Return the scalar product of the specified row vector and
  * specified column vector.  The return is the same as the dot
diff --git a/stan/math/prim/functor/reduce_sum.hpp b/stan/math/prim/functor/reduce_sum.hpp
index d0d4fe34799..868de060f32 100644
--- a/stan/math/prim/functor/reduce_sum.hpp
+++ b/stan/math/prim/functor/reduce_sum.hpp
@@ -204,9 +204,9 @@ inline auto reduce_sum(Vec&& vmapped, int grainsize, std::ostream* msgs,
 
 #ifdef STAN_THREADS
   return internal::reduce_sum_impl<ReduceFunction, void, return_type, Vec,
-                                   Args...>()(std::forward<Vec>(vmapped), true,
-                                              grainsize, msgs,
-                                              std::forward<Args>(args)...);
+                                   ref_type_t<Args&&>...>()(
+      std::forward<Vec>(vmapped), true, grainsize, msgs,
+      std::forward<Args>(args)...);
 #else
   if (vmapped.empty()) {
     return return_type(0.0);
diff --git a/stan/math/prim/mat/err/check_ldlt_factor.hpp b/stan/math/prim/mat/err/check_ldlt_factor.hpp
index cf5b3e4214a..0f9b7e580de 100644
--- a/stan/math/prim/mat/err/check_ldlt_factor.hpp
+++ b/stan/math/prim/mat/err/check_ldlt_factor.hpp
@@ -1,2 +1 @@
 #include "../../../prim/err/check_ldlt_factor.hpp"
-
diff --git a/stan/math/prim/mat/err/check_symmetric.hpp b/stan/math/prim/mat/err/check_symmetric.hpp
index c8d34d5e9b7..1ec4b57b2aa 100644
--- a/stan/math/prim/mat/err/check_symmetric.hpp
+++ b/stan/math/prim/mat/err/check_symmetric.hpp
@@ -1,2 +1 @@
 #include "../../../prim/err/check_symmetric.hpp"
-
diff --git a/stan/math/prim/mat/fun/log_determinant_ldlt.hpp b/stan/math/prim/mat/fun/log_determinant_ldlt.hpp
index a3966d0ce37..937275bfb27 100644
--- a/stan/math/prim/mat/fun/log_determinant_ldlt.hpp
+++ b/stan/math/prim/mat/fun/log_determinant_ldlt.hpp
@@ -1,2 +1 @@
 #include "../../fun/log_determinant_ldlt.hpp"
-
diff --git a/stan/math/prim/mat/fun/trace_inv_quad_form_ldlt.hpp b/stan/math/prim/mat/fun/trace_inv_quad_form_ldlt.hpp
index 79a1c88ce45..f15531ef7b6 100644
--- a/stan/math/prim/mat/fun/trace_inv_quad_form_ldlt.hpp
+++ b/stan/math/prim/mat/fun/trace_inv_quad_form_ldlt.hpp
@@ -1,2 +1 @@
 #include "../../fun/trace_inv_quad_form_ldlt.hpp"
-
diff --git a/stan/math/prim/scal/err/check_finite.hpp b/stan/math/prim/scal/err/check_finite.hpp
index 30cc3048f41..c1749be0b9c 100644
--- a/stan/math/prim/scal/err/check_finite.hpp
+++ b/stan/math/prim/scal/err/check_finite.hpp
@@ -1,2 +1 @@
 #include "../../../prim/err/check_finite.hpp"
-
diff --git a/stan/math/prim/scal/err/check_not_nan.hpp b/stan/math/prim/scal/err/check_not_nan.hpp
index 53a95cfe67a..29c784b5341 100644
--- a/stan/math/prim/scal/err/check_not_nan.hpp
+++ b/stan/math/prim/scal/err/check_not_nan.hpp
@@ -1,2 +1 @@
 #include "../../../prim/err/check_not_nan.hpp"
-
diff --git a/stan/math/prim/scal/err/check_size_match.hpp b/stan/math/prim/scal/err/check_size_match.hpp
index dfea07e8a86..9eed3c01313 100644
--- a/stan/math/prim/scal/err/check_size_match.hpp
+++ b/stan/math/prim/scal/err/check_size_match.hpp
@@ -1,2 +1 @@
 #include "../../../prim/err/check_size_match.hpp"
-
diff --git a/stan/math/prim/scal/meta/include_summand.hpp b/stan/math/prim/scal/meta/include_summand.hpp
index 3310e6c7761..77d7170e0f8 100644
--- a/stan/math/prim/scal/meta/include_summand.hpp
+++ b/stan/math/prim/scal/meta/include_summand.hpp
@@ -1,2 +1 @@
 #include "../../meta/include_summand.hpp"
-
diff --git a/stan/math/prim/scal/meta/length_mvt.hpp b/stan/math/prim/scal/meta/length_mvt.hpp
index daa727f6065..b836f0d8d8d 100644
--- a/stan/math/prim/scal/meta/length_mvt.hpp
+++ b/stan/math/prim/scal/meta/length_mvt.hpp
@@ -25,4 +25,3 @@ size_t length_mvt(const T& /* unused */) {
 
 }  // namespace stan
 #endif
-
diff --git a/stan/math/prim/scal/meta/max_size_mvt.hpp b/stan/math/prim/scal/meta/max_size_mvt.hpp
index 193851fbf3c..013d885b5b0 100644
--- a/stan/math/prim/scal/meta/max_size_mvt.hpp
+++ b/stan/math/prim/scal/meta/max_size_mvt.hpp
@@ -32,4 +32,3 @@ size_t max_size_mvt(const T1& x1, const T2& x2, const T3& x3, const T4& x4) {
 
 }  // namespace stan
 #endif
-
diff --git a/stan/math/prim/scal/meta/return_type.hpp b/stan/math/prim/scal/meta/return_type.hpp
index 50ced40f59b..cbb69296df9 100644
--- a/stan/math/prim/scal/meta/return_type.hpp
+++ b/stan/math/prim/scal/meta/return_type.hpp
@@ -1,2 +1 @@
 #include "../../meta/return_type.hpp"
-
diff --git a/stan/math/rev/core/deep_copy_vars.hpp b/stan/math/rev/core/deep_copy_vars.hpp
index 5eb1b3d8542..06561d1a9e0 100644
--- a/stan/math/rev/core/deep_copy_vars.hpp
+++ b/stan/math/rev/core/deep_copy_vars.hpp
@@ -20,7 +20,7 @@ namespace math {
  *  Otherwise it will be moved.
  */
 template <typename Arith, typename = require_arithmetic_t<scalar_type_t<Arith>>>
-inline decltype(auto) deep_copy_vars(Arith&& arg) {
+inline Arith deep_copy_vars(Arith&& arg) {
   return std::forward<Arith>(arg);
 }
 
diff --git a/stan/math/rev/core/var.hpp b/stan/math/rev/core/var.hpp
index a6002726358..5a4bcb1290d 100644
--- a/stan/math/rev/core/var.hpp
+++ b/stan/math/rev/core/var.hpp
@@ -671,6 +671,20 @@ class var_value<
     return var_sub(new vari_sub(vi_->col(i)));
   }
 
+  /**
+   * View a `matrix_cl` as a column vector.
+   */
+  inline auto as_column_vector_or_scalar() const {
+    using vari_sub = decltype(vi_->as_column_vector_or_scalar());
+    using var_sub = var_value<value_type_t<vari_sub>>;
+    return var_sub(new vari_sub(vi_->as_column_vector_or_scalar()));
+  }
+  inline auto as_column_vector_or_scalar() {
+    using vari_sub = decltype(vi_->as_column_vector_or_scalar());
+    using var_sub = var_value<value_type_t<vari_sub>>;
+    return var_sub(new vari_sub(vi_->as_column_vector_or_scalar()));
+  }
+
   /**
    * View element of eigen matrices. This creates a new
    * vari_value<double> so unlike the other views this subset will not
diff --git a/stan/math/rev/fun/cholesky_decompose.hpp b/stan/math/rev/fun/cholesky_decompose.hpp
index 87dba049985..35a04f6d566 100644
--- a/stan/math/rev/fun/cholesky_decompose.hpp
+++ b/stan/math/rev/fun/cholesky_decompose.hpp
@@ -12,11 +12,6 @@
 #include <stan/math/prim/err/check_pos_definite.hpp>
 #include <stan/math/prim/err/check_square.hpp>
 #include <stan/math/prim/err/check_symmetric.hpp>
-
-#ifdef STAN_OPENCL
-#include <stan/math/opencl/prim.hpp>
-#endif
-
 #include <algorithm>
 #include <vector>
 
@@ -55,22 +50,24 @@ inline auto unblocked_cholesky_lambda(T1& L_A, T2& L, T3& A) {
     const size_t N = A.rows();
     // Algorithm is in rowmajor so we make the adjoint copy rowmajor
     Eigen::Matrix<double, -1, -1, Eigen::RowMajor> adjL(L.rows(), L.cols());
+    Eigen::MatrixXd adjA = Eigen::MatrixXd::Zero(L.rows(), L.cols());
     adjL.template triangularView<Eigen::Lower>() = L.adj();
     for (int i = N - 1; i >= 0; --i) {
       for (int j = i; j >= 0; --j) {
         if (i == j) {
-          A.adj()(i, j) = 0.5 * adjL.coeff(i, j) / L_A.coeff(i, j);
+          adjA.coeffRef(i, j) = 0.5 * adjL.coeff(i, j) / L_A.coeff(i, j);
         } else {
-          A.adj()(i, j) = adjL.coeff(i, j) / L_A.coeff(j, j);
+          adjA.coeffRef(i, j) = adjL.coeff(i, j) / L_A.coeff(j, j);
           adjL.coeffRef(j, j)
               -= adjL.coeff(i, j) * L_A.coeff(i, j) / L_A.coeff(j, j);
         }
         for (int k = j - 1; k >= 0; --k) {
-          adjL.coeffRef(i, k) -= A.adj().coeff(i, j) * L_A.coeff(j, k);
-          adjL.coeffRef(j, k) -= A.adj().coeff(i, j) * L_A.coeff(i, k);
+          adjL.coeffRef(i, k) -= adjA.coeff(i, j) * L_A.coeff(j, k);
+          adjL.coeffRef(j, k) -= adjA.coeff(i, j) * L_A.coeff(i, k);
         }
       }
     }
+    A.adj() += adjA;
   };
 }
 
@@ -87,7 +84,7 @@ inline auto cholesky_lambda(T1& L_A, T2& L, T3& A) {
     using Eigen::Lower;
     using Eigen::StrictlyUpper;
     using Eigen::Upper;
-    Eigen::MatrixXd L_adj(L.rows(), L.cols());
+    Eigen::MatrixXd L_adj = Eigen::MatrixXd::Zero(L.rows(), L.cols());
     L_adj.template triangularView<Eigen::Lower>() = L.adj();
     const int M_ = L_A.rows();
     int block_size_ = std::max(M_ / 8, 8);
@@ -119,81 +116,9 @@ inline auto cholesky_lambda(T1& L_A, T2& L, T3& A) {
       R_adj.noalias() -= D_adj.template selfadjointView<Lower>() * R;
       D_adj.diagonal() *= 0.5;
     }
-    A.adj().template triangularView<Eigen::Lower>() = L_adj;
-  };
-}
-
-#ifdef STAN_OPENCL
-/**
- * Reverse mode differentiation for Cholesky using OpenCL
- * Reverse mode differentiation algorithm reference:
- *
- * Iain Murray: Differentiation of the Cholesky decomposition, 2016.
- *
- */
-template <typename AMat, typename LVari>
-inline auto opencl_cholesky_lambda(AMat& arena_A, LVari& vari_L) {
-  return [arena_A, vari_L]() mutable {
-    const int M_ = arena_A.rows();
-    const int packed_size = M_ * (M_ + 1) / 2;
-    Eigen::Map<Eigen::Matrix<vari*, Eigen::Dynamic, 1>> L_cpu(
-        vari_L, M_ * (M_ + 1) / 2);
-    Eigen::VectorXd L_val_cpu = L_cpu.val();
-    Eigen::VectorXd L_adj_cpu = L_cpu.adj();
-    matrix_cl<double> L_val = packed_copy<matrix_cl_view::Lower>(L_val_cpu, M_);
-    matrix_cl<double> L_adj = packed_copy<matrix_cl_view::Lower>(L_adj_cpu, M_);
-    int block_size
-        = M_
-          / std::max(1,
-                     opencl_context.tuning_opts().cholesky_rev_block_partition);
-    block_size = std::max(block_size, 8);
-    block_size = std::min(
-        block_size, opencl_context.tuning_opts().cholesky_rev_min_block_size);
-    // The following is an OpenCL implementation of
-    // the chain() function from the cholesky_block
-    // vari class implementation
-    for (int k = M_; k > 0; k -= block_size) {
-      const int j = std::max(0, k - block_size);
-      const int k_j_ind = k - j;
-      const int m_k_ind = M_ - k;
-
-      auto&& R_val = block_zero_based(L_val, j, 0, k_j_ind, j);
-      auto&& R_adj = block_zero_based(L_adj, j, 0, k_j_ind, j);
-      matrix_cl<double> D_val = block_zero_based(L_val, j, j, k_j_ind, k_j_ind);
-      matrix_cl<double> D_adj = block_zero_based(L_adj, j, j, k_j_ind, k_j_ind);
-      auto&& B_val = block_zero_based(L_val, k, 0, m_k_ind, j);
-      auto&& B_adj = block_zero_based(L_adj, k, 0, m_k_ind, j);
-      auto&& C_val = block_zero_based(L_val, k, j, m_k_ind, k_j_ind);
-      auto&& C_adj = block_zero_based(L_adj, k, j, m_k_ind, k_j_ind);
-
-      C_adj = C_adj * tri_inverse(D_val);
-      B_adj = B_adj - C_adj * R_val;
-      D_adj = D_adj - transpose(C_adj) * C_val;
-
-      D_adj = transpose(D_val) * D_adj;
-      D_adj.triangular_transpose<TriangularMapCL::LowerToUpper>();
-      D_val = transpose(tri_inverse(D_val));
-      D_adj = D_val * transpose(D_val * D_adj);
-      D_adj.triangular_transpose<TriangularMapCL::LowerToUpper>();
-
-      R_adj = R_adj - transpose(C_adj) * B_val - D_adj * R_val;
-      diagonal(D_adj) = diagonal(D_adj) * 0.5;
-
-      block_zero_based(L_adj, j, j, k_j_ind, k_j_ind) = D_adj;
-    }
-    L_adj.view(matrix_cl_view::Lower);
-    std::vector<double> L_adj_cpu_res = packed_copy(L_adj);
-    int pos = 0;
-    for (Eigen::Index j = 0; j < arena_A.rows(); ++j) {
-      for (Eigen::Index i = j; i < arena_A.rows(); ++i) {
-        arena_A.coeffRef(i, j)->adj_ += L_adj_cpu_res[pos];
-        pos++;
-      }
-    }
+    A.adj().template triangularView<Eigen::Lower>() += L_adj;
   };
 }
-
-#endif
 }  // namespace internal
 
 /**
@@ -212,13 +137,11 @@ inline auto cholesky_decompose(const EigMat& A) {
   check_square("cholesky_decompose", "A", A);
   arena_t<EigMat> arena_A = A;
   arena_t<Eigen::Matrix<double, -1, -1>> L_A(arena_A.val());
-#ifdef STAN_OPENCL
-  L_A = cholesky_decompose(L_A);
-#else
+
   check_symmetric("cholesky_decompose", "A", A);
   Eigen::LLT<Eigen::Ref<Eigen::MatrixXd>, Eigen::Lower> L_factor(L_A);
   check_pos_definite("cholesky_decompose", "m", L_factor);
-#endif
+
   L_A.template triangularView<Eigen::StrictlyUpper>().setZero();
   // looping gradient calcs faster for small matrices compared to
   // cholesky_block
@@ -228,31 +151,8 @@ inline auto cholesky_decompose(const EigMat& A) {
     internal::initialize_return(L, L_A, dummy);
     reverse_pass_callback(internal::unblocked_cholesky_lambda(L_A, L, arena_A));
   } else {
-#ifdef STAN_OPENCL
-    if (L_A.rows()
-        > opencl_context.tuning_opts().cholesky_size_worth_transfer) {
-      vari** vari_L = ChainableStack::instance_->memalloc_.alloc_array<vari*>(
-          arena_A.rows() * (arena_A.rows() + 1) / 2);
-      size_t pos = 0;
-      for (Eigen::Index j = 0; j < arena_A.rows(); ++j) {
-        for (Eigen::Index i = j; i < arena_A.rows(); ++i) {
-          vari_L[pos] = new vari(L_A.coeffRef(i, j), false);
-          L.coeffRef(i, j).vi_ = vari_L[pos];
-          ++pos;
-        }
-        for (Eigen::Index k = 0; k < j; ++k) {
-          L.coeffRef(k, j).vi_ = dummy;
-        }
-      }
-      reverse_pass_callback(internal::opencl_cholesky_lambda(arena_A, vari_L));
-    } else {
-      internal::initialize_return(L, L_A, dummy);
-      reverse_pass_callback(internal::cholesky_lambda(L_A, L, arena_A));
-    }
-#else
     internal::initialize_return(L, L_A, dummy);
     reverse_pass_callback(internal::cholesky_lambda(L_A, L, arena_A));
-#endif
   }
   return plain_type_t<EigMat>(L);
 }
diff --git a/stan/math/version.hpp b/stan/math/version.hpp
index 8b8afaf15ba..6c23b8a9e28 100644
--- a/stan/math/version.hpp
+++ b/stan/math/version.hpp
@@ -13,7 +13,7 @@
 
 #define STAN_MATH_MAJOR 4
 #define STAN_MATH_MINOR 0
-#define STAN_MATH_PATCH 0
+#define STAN_MATH_PATCH 1
 
 namespace stan {
 namespace math {
diff --git a/test/unit/math/opencl/kernel_generator/as_column_vector_or_scalar_test.cpp b/test/unit/math/opencl/kernel_generator/as_column_vector_or_scalar_test.cpp
new file mode 100644
index 00000000000..f18c6811048
--- /dev/null
+++ b/test/unit/math/opencl/kernel_generator/as_column_vector_or_scalar_test.cpp
@@ -0,0 +1,175 @@
+#ifdef STAN_OPENCL
+#include <stan/math/prim/fun/Eigen.hpp>
+#include <stan/math/opencl/kernel_generator.hpp>
+#include <stan/math/opencl/matrix_cl.hpp>
+#include <stan/math/opencl/copy.hpp>
+#include <test/unit/math/opencl/kernel_generator/reference_kernel.hpp>
+#include <stan/math.hpp>
+#include <test/unit/util.hpp>
+#include <gtest/gtest.h>
+#include <string>
+
+TEST(KernelGenerator, as_column_vector_or_scalar_errors) {
+  using stan::math::as_column_vector_or_scalar;
+  stan::math::matrix_cl<double> m(7, 1);
+  stan::math::matrix_cl<double> n(1, 7);
+  stan::math::matrix_cl<double> j(7, 7);
+
+  auto block_m = stan::math::block_zero_based(m, 0, 0, 7, 1);
+  auto block_n = stan::math::block_zero_based(n, 0, 0, 1, 7);
+
+  EXPECT_THROW(as_column_vector_or_scalar(j), std::invalid_argument);
+  EXPECT_THROW(block_n = as_column_vector_or_scalar(n), std::invalid_argument);
+  EXPECT_THROW(block_n = as_column_vector_or_scalar(m), std::invalid_argument);
+  EXPECT_NO_THROW(block_m = as_column_vector_or_scalar(m));
+  EXPECT_NO_THROW(as_column_vector_or_scalar(block_m) = m);
+  EXPECT_NO_THROW(as_column_vector_or_scalar(block_m)
+                  = as_column_vector_or_scalar(m));
+  EXPECT_NO_THROW(block_m = as_column_vector_or_scalar(n));
+  EXPECT_NO_THROW(as_column_vector_or_scalar(block_n) = m);
+  EXPECT_NO_THROW(as_column_vector_or_scalar(block_n)
+                  = as_column_vector_or_scalar(n));
+  auto a = as_column_vector_or_scalar(m);
+  EXPECT_NO_THROW(a = a);
+  EXPECT_NO_THROW(a = a + 1);
+}
+
+TEST(KernelGenerator, as_column_vector_or_scalar_vector_test) {
+  using Eigen::MatrixXd;
+  using Eigen::VectorXd;
+  using stan::math::matrix_cl;
+  VectorXd m(6, 1);
+  m << 1.1, 1.2, 1.3, 1.4, 1.5, 1.6;
+
+  matrix_cl<double> m_cl(m);
+  auto tmp = stan::math::as_column_vector_or_scalar(m_cl);
+  matrix_cl<double> res_cl = tmp;
+
+  MatrixXd res = stan::math::from_matrix_cl(res_cl);
+  MatrixXd correct = stan::math::as_column_vector_or_scalar(m);
+  EXPECT_MATRIX_NEAR(correct, res, 1e-9);
+}
+
+TEST(KernelGenerator, as_column_vector_or_scalar_row_vector_test) {
+  using Eigen::MatrixXd;
+  using Eigen::RowVectorXd;
+  using stan::math::matrix_cl;
+  RowVectorXd m(6);
+  m << 1.1, 1.2, 1.3, 1.4, 1.5, 1.6;
+
+  matrix_cl<double> m_cl(m);
+  auto tmp = stan::math::as_column_vector_or_scalar(m_cl);
+  matrix_cl<double> res_cl = tmp;
+
+  MatrixXd res = stan::math::from_matrix_cl(res_cl);
+  MatrixXd correct = stan::math::as_column_vector_or_scalar(m);
+  EXPECT_MATRIX_NEAR(correct, res, 1e-9);
+}
+
+TEST(KernelGenerator, double_as_column_vector_or_scalar_test) {
+  using Eigen::MatrixXd;
+  using Eigen::VectorXd;
+  using stan::math::matrix_cl;
+  VectorXd m(6);
+  m << 1.1, 1.2, 1.3, 1.4, 1.5, 1.6;
+
+  matrix_cl<double> m_cl(m);
+  auto tmp = stan::math::as_column_vector_or_scalar(
+      stan::math::as_column_vector_or_scalar(m_cl));
+  matrix_cl<double> res_cl = tmp;
+
+  MatrixXd res = stan::math::from_matrix_cl(res_cl);
+  MatrixXd correct = m;
+  EXPECT_MATRIX_NEAR(correct, res, 1e-9);
+}
+
+TEST(KernelGenerator, double_as_column_vector_or_scalar_accepts_lvalue_test) {
+  using Eigen::MatrixXd;
+  using Eigen::RowVectorXd;
+  using stan::math::matrix_cl;
+  RowVectorXd m(6);
+  m << 1.1, 1.2, 1.3, 1.4, 1.5, 1.6;
+
+  matrix_cl<double> m_cl(m);
+  auto tmp2 = stan::math::as_column_vector_or_scalar(m_cl);
+  auto tmp = stan::math::as_column_vector_or_scalar(tmp2);
+  matrix_cl<double> res_cl = tmp;
+
+  MatrixXd res = stan::math::from_matrix_cl(res_cl);
+  MatrixXd correct = stan::math::as_column_vector_or_scalar(m);
+  EXPECT_MATRIX_NEAR(correct, res, 1e-9);
+}
+
+TEST(KernelGenerator, as_column_vector_or_scalar_vector_block_test) {
+  using Eigen::MatrixXd;
+  using stan::math::matrix_cl;
+  MatrixXd m(5, 5);
+  m << 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+      21, 22, 23, 24, 25;
+
+  matrix_cl<double> m_cl(m);
+  auto tmp2 = stan::math::block_zero_based(m_cl, 1, 2, 3, 1);
+  auto tmp = stan::math::as_column_vector_or_scalar(tmp2);
+  matrix_cl<double> res_cl = tmp;
+
+  MatrixXd res = stan::math::from_matrix_cl(res_cl);
+  MatrixXd correct
+      = stan::math::as_column_vector_or_scalar(m.col(2).segment(1, 3));
+  EXPECT_MATRIX_NEAR(correct, res, 1e-9);
+}
+
+TEST(KernelGenerator, as_column_vector_or_scalar_row_vector_block_test) {
+  using Eigen::MatrixXd;
+  using stan::math::matrix_cl;
+  MatrixXd m(5, 5);
+  m << 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+      21, 22, 23, 24, 25;
+
+  matrix_cl<double> m_cl(m);
+  auto tmp2 = stan::math::block_zero_based(m_cl, 2, 1, 1, 3);
+  auto tmp = stan::math::as_column_vector_or_scalar(tmp2);
+  matrix_cl<double> res_cl = tmp;
+
+  MatrixXd res = stan::math::from_matrix_cl(res_cl);
+  MatrixXd correct
+      = stan::math::as_column_vector_or_scalar(m.row(2).segment(1, 3));
+  EXPECT_MATRIX_NEAR(correct, res, 1e-9);
+}
+
+TEST(KernelGenerator, lhs_as_column_vector_or_scalar_vector_plus_eq_test) {
+  using Eigen::MatrixXd;
+  using Eigen::VectorXd;
+  using stan::math::as_column_vector_or_scalar;
+  using stan::math::matrix_cl;
+  VectorXd m1(6);
+  m1 << 1, 2, 3, 4, 5, 6;
+  VectorXd correct = m1.array() + 1;
+
+  matrix_cl<double> m1_cl(m1);
+
+  as_column_vector_or_scalar(m1_cl) += 1;
+
+  MatrixXd res = stan::math::from_matrix_cl(m1_cl);
+
+  EXPECT_MATRIX_NEAR(res, correct, 1e-9);
+}
+
+TEST(KernelGenerator, lhs_as_column_vector_or_scalar_row_vector_plus_eq_test) {
+  using Eigen::MatrixXd;
+  using Eigen::RowVectorXd;
+  using stan::math::as_column_vector_or_scalar;
+  using stan::math::matrix_cl;
+  RowVectorXd m1(6);
+  m1 << 1, 2, 3, 4, 5, 6;
+  RowVectorXd correct = m1.array() + 1;
+
+  matrix_cl<double> m1_cl(m1);
+
+  as_column_vector_or_scalar(m1_cl) += 1;
+
+  MatrixXd res = stan::math::from_matrix_cl(m1_cl);
+
+  EXPECT_MATRIX_NEAR(res, correct, 1e-9);
+}
+
+#endif
diff --git a/test/unit/math/opencl/rev/bernoulli_logit_lpmf_test.cpp b/test/unit/math/opencl/rev/bernoulli_logit_lpmf_test.cpp
index c839378ee1f..b71ac6c34f3 100644
--- a/test/unit/math/opencl/rev/bernoulli_logit_lpmf_test.cpp
+++ b/test/unit/math/opencl/rev/bernoulli_logit_lpmf_test.cpp
@@ -58,6 +58,10 @@ TEST(ProbDistributionsBernoulliLogit, opencl_matches_cpu_small) {
                                                 theta);
   stan::math::test::compare_cpu_opencl_prim_rev(
       bernoulli_logit_lpmf_functor_propto, n, theta);
+  stan::math::test::compare_cpu_opencl_prim_rev(bernoulli_logit_lpmf_functor, n,
+                                                theta.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      bernoulli_logit_lpmf_functor_propto, n, theta.transpose().eval());
 }
 
 TEST(ProbDistributionsBernoulliLogit, opencl_broadcast_n) {
@@ -99,6 +103,10 @@ TEST(ProbDistributionsBernoulliLogit, opencl_matches_cpu_big) {
                                                 theta);
   stan::math::test::compare_cpu_opencl_prim_rev(
       bernoulli_logit_lpmf_functor_propto, n, theta);
+  stan::math::test::compare_cpu_opencl_prim_rev(bernoulli_logit_lpmf_functor, n,
+                                                theta.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      bernoulli_logit_lpmf_functor_propto, n, theta.transpose().eval());
 }
 
 #endif
diff --git a/test/unit/math/opencl/rev/bernoulli_lpmf_test.cpp b/test/unit/math/opencl/rev/bernoulli_lpmf_test.cpp
index c6f16e9aa90..2dd8ae41530 100644
--- a/test/unit/math/opencl/rev/bernoulli_lpmf_test.cpp
+++ b/test/unit/math/opencl/rev/bernoulli_lpmf_test.cpp
@@ -57,6 +57,10 @@ TEST(ProbDistributionsBernoulli, opencl_matches_cpu_small) {
                                                 theta);
   stan::math::test::compare_cpu_opencl_prim_rev(bernoulli_lpmf_functor_propto,
                                                 n, theta);
+  stan::math::test::compare_cpu_opencl_prim_rev(bernoulli_lpmf_functor, n,
+                                                theta.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(bernoulli_lpmf_functor_propto,
+                                                n, theta.transpose().eval());
 }
 
 TEST(ProbDistributionsBernoulli, opencl_broadcast_n) {
@@ -98,6 +102,10 @@ TEST(ProbDistributionsBernoulli, opencl_matches_cpu_big) {
                                                 theta);
   stan::math::test::compare_cpu_opencl_prim_rev(bernoulli_lpmf_functor_propto,
                                                 n, theta);
+  stan::math::test::compare_cpu_opencl_prim_rev(bernoulli_lpmf_functor, n,
+                                                theta.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(bernoulli_lpmf_functor_propto,
+                                                n, theta.transpose().eval());
 }
 
 #endif
diff --git a/test/unit/math/opencl/rev/beta_lpdf_test.cpp b/test/unit/math/opencl/rev/beta_lpdf_test.cpp
index a3c28d1ff16..dd0c3fce94e 100644
--- a/test/unit/math/opencl/rev/beta_lpdf_test.cpp
+++ b/test/unit/math/opencl/rev/beta_lpdf_test.cpp
@@ -98,6 +98,12 @@ TEST(ProbDistributionsBeta, opencl_matches_cpu_small) {
                                                 beta);
   stan::math::test::compare_cpu_opencl_prim_rev(beta_lpdf_functor_propto, y,
                                                 alpha, beta);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      beta_lpdf_functor, y.transpose().eval(), alpha.transpose().eval(),
+      beta.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      beta_lpdf_functor_propto, y.transpose().eval(), alpha.transpose().eval(),
+      beta.transpose().eval());
 }
 
 TEST(ProbDistributionsBeta, opencl_broadcast_y) {
@@ -113,6 +119,10 @@ TEST(ProbDistributionsBeta, opencl_broadcast_y) {
                                                          alpha, beta);
   stan::math::test::test_opencl_broadcasting_prim_rev<0>(
       beta_lpdf_functor_propto, y, alpha, beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      beta_lpdf_functor, y, alpha.transpose().eval(), beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      beta_lpdf_functor_propto, y, alpha, beta.transpose().eval());
 }
 
 TEST(ProbDistributionsBeta, opencl_broadcast_alpha) {
@@ -128,6 +138,10 @@ TEST(ProbDistributionsBeta, opencl_broadcast_alpha) {
                                                          alpha, beta);
   stan::math::test::test_opencl_broadcasting_prim_rev<1>(
       beta_lpdf_functor_propto, y, alpha, beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      beta_lpdf_functor, y.transpose().eval(), alpha, beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      beta_lpdf_functor_propto, y, alpha, beta.transpose().eval());
 }
 
 TEST(ProbDistributionsBeta, opencl_broadcast_beta) {
@@ -145,6 +159,10 @@ TEST(ProbDistributionsBeta, opencl_broadcast_beta) {
                                                          alpha, beta);
   stan::math::test::test_opencl_broadcasting_prim_rev<2>(
       beta_lpdf_functor_propto, y, alpha, beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      beta_lpdf_functor, y.transpose().eval(), alpha, beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      beta_lpdf_functor_propto, y, alpha.transpose().eval(), beta);
 }
 
 TEST(ProbDistributionsBeta, opencl_y_alpha_scalar) {
@@ -189,6 +207,12 @@ TEST(ProbDistributionsBeta, opencl_matches_cpu_big) {
                                                 beta);
   stan::math::test::compare_cpu_opencl_prim_rev(beta_lpdf_functor_propto, y,
                                                 alpha, beta);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      beta_lpdf_functor, y.transpose().eval(), alpha.transpose().eval(),
+      beta.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      beta_lpdf_functor_propto, y.transpose().eval(), alpha.transpose().eval(),
+      beta.transpose().eval());
 }
 
 #endif
diff --git a/test/unit/math/opencl/rev/beta_proportion_lpdf_test.cpp b/test/unit/math/opencl/rev/beta_proportion_lpdf_test.cpp
index 6e637b5999b..6709ea03e42 100644
--- a/test/unit/math/opencl/rev/beta_proportion_lpdf_test.cpp
+++ b/test/unit/math/opencl/rev/beta_proportion_lpdf_test.cpp
@@ -74,10 +74,12 @@ TEST(ProbDistributionsBetaProportion, opencl_matches_cpu_small) {
   Eigen::VectorXd kappa(N);
   kappa << 0.3, 1.8, 3.0;
 
-  stan::math::test::compare_cpu_opencl_prim_rev(beta_proportion_lpdf_functor, y,
-                                                mu, kappa);
   stan::math::test::compare_cpu_opencl_prim_rev(
-      beta_proportion_lpdf_functor_propto, y, mu, kappa);
+      beta_proportion_lpdf_functor, y.transpose().eval(), mu.transpose().eval(),
+      kappa.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      beta_proportion_lpdf_functor_propto, y.transpose().eval(),
+      mu.transpose().eval(), kappa.transpose().eval());
 }
 
 TEST(ProbDistributionsBetaProportion, opencl_broadcast_y) {
@@ -93,6 +95,11 @@ TEST(ProbDistributionsBetaProportion, opencl_broadcast_y) {
       beta_proportion_lpdf_functor, y_scal, mu, kappa);
   stan::math::test::test_opencl_broadcasting_prim_rev<0>(
       beta_proportion_lpdf_functor_propto, y_scal, mu, kappa);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      beta_proportion_lpdf_functor, y_scal, mu.transpose().eval(), kappa);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      beta_proportion_lpdf_functor_propto, y_scal, mu,
+      kappa.transpose().eval());
 }
 
 TEST(ProbDistributionsBetaProportion, opencl_broadcast_mu) {
@@ -108,6 +115,11 @@ TEST(ProbDistributionsBetaProportion, opencl_broadcast_mu) {
       beta_proportion_lpdf_functor, y, mu_scal, kappa);
   stan::math::test::test_opencl_broadcasting_prim_rev<1>(
       beta_proportion_lpdf_functor_propto, y, mu_scal, kappa);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      beta_proportion_lpdf_functor, y.transpose().eval(), mu_scal, kappa);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      beta_proportion_lpdf_functor_propto, y, mu_scal,
+      kappa.transpose().eval());
 }
 
 TEST(ProbDistributionsBetaProportion, opencl_broadcast_kappa) {
@@ -123,6 +135,11 @@ TEST(ProbDistributionsBetaProportion, opencl_broadcast_kappa) {
       beta_proportion_lpdf_functor, y, mu, kappa_scal);
   stan::math::test::test_opencl_broadcasting_prim_rev<2>(
       beta_proportion_lpdf_functor_propto, y, mu, kappa_scal);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      beta_proportion_lpdf_functor, y.transpose().eval(), mu, kappa_scal);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      beta_proportion_lpdf_functor_propto, y, mu.transpose().eval(),
+      kappa_scal);
 }
 
 TEST(ProbDistributionsBetaProportion, opencl_matches_cpu_big) {
@@ -139,6 +156,12 @@ TEST(ProbDistributionsBetaProportion, opencl_matches_cpu_big) {
                                                 mu, kappa);
   stan::math::test::compare_cpu_opencl_prim_rev(
       beta_proportion_lpdf_functor_propto, y, mu, kappa);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      beta_proportion_lpdf_functor, y.transpose().eval(), mu.transpose().eval(),
+      kappa.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      beta_proportion_lpdf_functor_propto, y.transpose().eval(),
+      mu.transpose().eval(), kappa.transpose().eval());
 }
 
 #endif
diff --git a/test/unit/math/opencl/rev/binomial_lpmf_test.cpp b/test/unit/math/opencl/rev/binomial_lpmf_test.cpp
index 8b76c0c3436..7be5166eb86 100644
--- a/test/unit/math/opencl/rev/binomial_lpmf_test.cpp
+++ b/test/unit/math/opencl/rev/binomial_lpmf_test.cpp
@@ -76,6 +76,10 @@ TEST(ProbDistributionsBinomial, opencl_matches_cpu_small) {
                                                 theta);
   stan::math::test::compare_cpu_opencl_prim_rev(binomial_lpmf_functor_propto, n,
                                                 m, theta);
+  stan::math::test::compare_cpu_opencl_prim_rev(binomial_lpmf_functor, n, m,
+                                                theta.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(binomial_lpmf_functor_propto, n,
+                                                m, theta.transpose().eval());
 }
 
 TEST(ProbDistributionsBinomial, opencl_broadcast_n) {
@@ -90,6 +94,10 @@ TEST(ProbDistributionsBinomial, opencl_broadcast_n) {
                                                          n, m, theta);
   stan::math::test::test_opencl_broadcasting_prim_rev<0>(
       binomial_lpmf_functor_propto, n, m, theta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      binomial_lpmf_functor, n, m, theta.transpose().eval());
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      binomial_lpmf_functor_propto, n, m, theta.transpose().eval());
 }
 
 TEST(ProbDistributionsBinomial, opencl_broadcast_N) {
@@ -104,6 +112,10 @@ TEST(ProbDistributionsBinomial, opencl_broadcast_N) {
                                                          n, m, theta);
   stan::math::test::test_opencl_broadcasting_prim_rev<1>(
       binomial_lpmf_functor_propto, n, m, theta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      binomial_lpmf_functor, n, m, theta.transpose().eval());
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      binomial_lpmf_functor_propto, n, m, theta.transpose().eval());
 }
 
 TEST(ProbDistributionsBinomial, opencl_broadcast_theta) {
@@ -136,6 +148,10 @@ TEST(ProbDistributionsBinomial, opencl_matches_cpu_big) {
                                                 theta);
   stan::math::test::compare_cpu_opencl_prim_rev(binomial_lpmf_functor_propto, n,
                                                 m, theta);
+  stan::math::test::compare_cpu_opencl_prim_rev(binomial_lpmf_functor, n, m,
+                                                theta.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(binomial_lpmf_functor_propto, n,
+                                                m, theta.transpose().eval());
 }
 
 TEST(ProbDistributionsBinomial, opencl_n_N_scalar) {
diff --git a/test/unit/math/opencl/rev/cauchy_lpdf_test.cpp b/test/unit/math/opencl/rev/cauchy_lpdf_test.cpp
index f232cd9a5fd..7623819af84 100644
--- a/test/unit/math/opencl/rev/cauchy_lpdf_test.cpp
+++ b/test/unit/math/opencl/rev/cauchy_lpdf_test.cpp
@@ -80,6 +80,12 @@ TEST(ProbDistributionsCauchy, opencl_matches_cpu_small) {
                                                 sigma);
   stan::math::test::compare_cpu_opencl_prim_rev(cauchy_lpdf_functor_propto, y,
                                                 mu, sigma);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      cauchy_lpdf_functor, y.transpose().eval(), mu.transpose().eval(),
+      sigma.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      cauchy_lpdf_functor_propto, y.transpose().eval(), mu.transpose().eval(),
+      sigma.transpose().eval());
 }
 
 TEST(ProbDistributionsCauchy, opencl_broadcast_y) {
@@ -95,6 +101,10 @@ TEST(ProbDistributionsCauchy, opencl_broadcast_y) {
                                                          y_scal, mu, sigma);
   stan::math::test::test_opencl_broadcasting_prim_rev<0>(
       cauchy_lpdf_functor_propto, y_scal, mu, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      cauchy_lpdf_functor, y_scal, mu.transpose().eval(), sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      cauchy_lpdf_functor_propto, y_scal, mu, sigma.transpose().eval());
 }
 
 TEST(ProbDistributionsCauchy, opencl_broadcast_mu) {
@@ -110,6 +120,10 @@ TEST(ProbDistributionsCauchy, opencl_broadcast_mu) {
                                                          mu_scal, sigma);
   stan::math::test::test_opencl_broadcasting_prim_rev<1>(
       cauchy_lpdf_functor_propto, y, mu_scal, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      cauchy_lpdf_functor, y.transpose().eval(), mu_scal, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      cauchy_lpdf_functor_propto, y, mu_scal, sigma.transpose().eval());
 }
 
 TEST(ProbDistributionsCauchy, opencl_broadcast_sigma) {
@@ -125,6 +139,10 @@ TEST(ProbDistributionsCauchy, opencl_broadcast_sigma) {
                                                          mu, sigma_scal);
   stan::math::test::test_opencl_broadcasting_prim_rev<2>(
       cauchy_lpdf_functor_propto, y, mu, sigma_scal);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      cauchy_lpdf_functor, y.transpose().eval(), mu, sigma_scal);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      cauchy_lpdf_functor_propto, y, mu.transpose().eval(), sigma_scal);
 }
 
 TEST(ProbDistributionsCauchy, opencl_matches_cpu_big) {
@@ -141,6 +159,12 @@ TEST(ProbDistributionsCauchy, opencl_matches_cpu_big) {
                                                 sigma);
   stan::math::test::compare_cpu_opencl_prim_rev(cauchy_lpdf_functor_propto, y,
                                                 mu, sigma);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      cauchy_lpdf_functor, y.transpose().eval(), mu.transpose().eval(),
+      sigma.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      cauchy_lpdf_functor_propto, y.transpose().eval(), mu.transpose().eval(),
+      sigma.transpose().eval());
 }
 
 #endif
diff --git a/test/unit/math/opencl/rev/chi_square_lpdf_test.cpp b/test/unit/math/opencl/rev/chi_square_lpdf_test.cpp
index e5af658f356..aaa48c05a5d 100644
--- a/test/unit/math/opencl/rev/chi_square_lpdf_test.cpp
+++ b/test/unit/math/opencl/rev/chi_square_lpdf_test.cpp
@@ -59,6 +59,10 @@ TEST(ProbDistributionsChiSquare, opencl_matches_cpu_small) {
   stan::math::test::compare_cpu_opencl_prim_rev(chi_square_lpdf_functor, y, nu);
   stan::math::test::compare_cpu_opencl_prim_rev(chi_square_lpdf_functor_propto,
                                                 y, nu);
+  stan::math::test::compare_cpu_opencl_prim_rev(chi_square_lpdf_functor,
+                                                y.transpose().eval(), nu);
+  stan::math::test::compare_cpu_opencl_prim_rev(chi_square_lpdf_functor_propto,
+                                                y, nu.transpose().eval());
 }
 
 TEST(ProbDistributionsChiSquare, opencl_broadcast_y) {
@@ -98,6 +102,11 @@ TEST(ProbDistributionsChiSquare, opencl_matches_cpu_big) {
   stan::math::test::compare_cpu_opencl_prim_rev(chi_square_lpdf_functor, y, nu);
   stan::math::test::compare_cpu_opencl_prim_rev(chi_square_lpdf_functor_propto,
                                                 y, nu);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      chi_square_lpdf_functor, y.transpose().eval(), nu.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(chi_square_lpdf_functor_propto,
+                                                y.transpose().eval(),
+                                                nu.transpose().eval());
 }
 
 #endif
diff --git a/test/unit/math/opencl/rev/double_exponential_lpdf_test.cpp b/test/unit/math/opencl/rev/double_exponential_lpdf_test.cpp
index 942cf3e989e..a4380a78ce2 100644
--- a/test/unit/math/opencl/rev/double_exponential_lpdf_test.cpp
+++ b/test/unit/math/opencl/rev/double_exponential_lpdf_test.cpp
@@ -72,10 +72,17 @@ TEST(ProbDistributionsDoubleExponential, opencl_matches_cpu_small) {
   mu << 0.3, 0.8, -1.7;
   Eigen::VectorXd sigma(N);
   sigma << 0.3, 0.8, 4.2;
+
   stan::math::test::compare_cpu_opencl_prim_rev(double_exponential_lpdf_functor,
                                                 y, mu, sigma);
   stan::math::test::compare_cpu_opencl_prim_rev(
       double_exponential_lpdf_functor_propto, y, mu, sigma);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      double_exponential_lpdf_functor, y.transpose().eval(),
+      mu.transpose().eval(), sigma.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      double_exponential_lpdf_functor_propto, y.transpose().eval(),
+      mu.transpose().eval(), sigma.transpose().eval());
 }
 
 TEST(ProbDistributionsDoubleExponential, opencl_broadcast_y) {
@@ -91,6 +98,11 @@ TEST(ProbDistributionsDoubleExponential, opencl_broadcast_y) {
       double_exponential_lpdf_functor, y_scal, mu, sigma);
   stan::math::test::test_opencl_broadcasting_prim_rev<0>(
       double_exponential_lpdf_functor_propto, y_scal, mu, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      double_exponential_lpdf_functor, y_scal, mu.transpose().eval(), sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      double_exponential_lpdf_functor_propto, y_scal, mu,
+      sigma.transpose().eval());
 }
 
 TEST(ProbDistributionsDoubleExponential, opencl_broadcast_mu) {
@@ -106,6 +118,11 @@ TEST(ProbDistributionsDoubleExponential, opencl_broadcast_mu) {
       double_exponential_lpdf_functor, y, mu_scal, sigma);
   stan::math::test::test_opencl_broadcasting_prim_rev<1>(
       double_exponential_lpdf_functor_propto, y, mu_scal, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      double_exponential_lpdf_functor, y.transpose().eval(), mu_scal, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      double_exponential_lpdf_functor_propto, y, mu_scal,
+      sigma.transpose().eval());
 }
 TEST(ProbDistributionsDoubleExponential, opencl_broadcast_sigma) {
   int N = 3;
@@ -120,6 +137,11 @@ TEST(ProbDistributionsDoubleExponential, opencl_broadcast_sigma) {
       double_exponential_lpdf_functor, y, mu, sigma_scal);
   stan::math::test::test_opencl_broadcasting_prim_rev<2>(
       double_exponential_lpdf_functor_propto, y, mu, sigma_scal);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      double_exponential_lpdf_functor, y.transpose().eval(), mu, sigma_scal);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      double_exponential_lpdf_functor_propto, y, mu.transpose().eval(),
+      sigma_scal);
 }
 
 TEST(ProbDistributionsDoubleExponential, opencl_matches_cpu_big) {
@@ -136,6 +158,12 @@ TEST(ProbDistributionsDoubleExponential, opencl_matches_cpu_big) {
                                                 y, mu, sigma);
   stan::math::test::compare_cpu_opencl_prim_rev(
       double_exponential_lpdf_functor_propto, y, mu, sigma);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      double_exponential_lpdf_functor, y.transpose().eval(),
+      mu.transpose().eval(), sigma.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      double_exponential_lpdf_functor_propto, y.transpose().eval(),
+      mu.transpose().eval(), sigma.transpose().eval());
 }
 
 TEST(ProbDistributionsDoubleExponential, opencl_y_mu_scalar) {
diff --git a/test/unit/math/opencl/rev/exp_mod_normal_lpdf_test.cpp b/test/unit/math/opencl/rev/exp_mod_normal_lpdf_test.cpp
index bb9aab873cf..e8c0e406fa4 100644
--- a/test/unit/math/opencl/rev/exp_mod_normal_lpdf_test.cpp
+++ b/test/unit/math/opencl/rev/exp_mod_normal_lpdf_test.cpp
@@ -119,6 +119,13 @@ TEST(ProbDistributionsExpModNormal, opencl_matches_cpu_small) {
                                                 mu, sigma, lambda);
   stan::math::test::compare_cpu_opencl_prim_rev(
       exp_mod_normal_lpdf_functor_propto, y, mu, sigma, lambda);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      exp_mod_normal_lpdf_functor, y.transpose().eval(), mu.transpose().eval(),
+      sigma.transpose().eval(), lambda.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      exp_mod_normal_lpdf_functor_propto, y.transpose().eval(),
+      mu.transpose().eval(), sigma.transpose().eval(),
+      lambda.transpose().eval());
 }
 
 TEST(ProbDistributionsExpModNormal, opencl_broadcast_y) {
@@ -136,6 +143,12 @@ TEST(ProbDistributionsExpModNormal, opencl_broadcast_y) {
       exp_mod_normal_lpdf_functor, y, mu, sigma, lambda);
   stan::math::test::test_opencl_broadcasting_prim_rev<0>(
       exp_mod_normal_lpdf_functor_propto, y, mu, sigma, lambda);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      exp_mod_normal_lpdf_functor, y, mu.transpose().eval(),
+      sigma.transpose().eval(), lambda);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      exp_mod_normal_lpdf_functor_propto, y, mu, sigma.transpose().eval(),
+      lambda.transpose().eval());
 }
 
 TEST(ProbDistributionsExpModNormal, opencl_broadcast_mu) {
@@ -153,6 +166,12 @@ TEST(ProbDistributionsExpModNormal, opencl_broadcast_mu) {
       exp_mod_normal_lpdf_functor, y, mu, sigma, lambda);
   stan::math::test::test_opencl_broadcasting_prim_rev<1>(
       exp_mod_normal_lpdf_functor_propto, y, mu, sigma, lambda);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      exp_mod_normal_lpdf_functor, y, mu, sigma.transpose().eval(),
+      lambda.transpose().eval());
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      exp_mod_normal_lpdf_functor_propto, y.transpose().eval(), mu, sigma,
+      lambda.transpose().eval());
 }
 
 TEST(ProbDistributionsExpModNormal, opencl_broadcast_sigma) {
@@ -170,6 +189,12 @@ TEST(ProbDistributionsExpModNormal, opencl_broadcast_sigma) {
       exp_mod_normal_lpdf_functor, y, mu, sigma, lambda);
   stan::math::test::test_opencl_broadcasting_prim_rev<2>(
       exp_mod_normal_lpdf_functor_propto, y, mu, sigma, lambda);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      exp_mod_normal_lpdf_functor, y.transpose().eval(), mu, sigma,
+      lambda.transpose().eval());
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      exp_mod_normal_lpdf_functor_propto, y.transpose().eval(),
+      mu.transpose().eval(), sigma, lambda);
 }
 
 TEST(ProbDistributionsExpModNormal, opencl_broadcast_lambda) {
@@ -187,6 +212,12 @@ TEST(ProbDistributionsExpModNormal, opencl_broadcast_lambda) {
       exp_mod_normal_lpdf_functor, y, mu, sigma, lambda);
   stan::math::test::test_opencl_broadcasting_prim_rev<3>(
       exp_mod_normal_lpdf_functor_propto, y, mu, sigma, lambda);
+  stan::math::test::test_opencl_broadcasting_prim_rev<3>(
+      exp_mod_normal_lpdf_functor, y.transpose().eval(), mu.transpose().eval(),
+      sigma, lambda);
+  stan::math::test::test_opencl_broadcasting_prim_rev<3>(
+      exp_mod_normal_lpdf_functor_propto, y, mu.transpose().eval(),
+      sigma.transpose().eval(), lambda);
 }
 
 TEST(ProbDistributionsExpModNormal, opencl_matches_cpu_big) {
@@ -206,6 +237,13 @@ TEST(ProbDistributionsExpModNormal, opencl_matches_cpu_big) {
                                                 mu, sigma, lambda);
   stan::math::test::compare_cpu_opencl_prim_rev(
       exp_mod_normal_lpdf_functor_propto, y, mu, sigma, lambda);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      exp_mod_normal_lpdf_functor, y.transpose().eval(), mu.transpose().eval(),
+      sigma.transpose().eval(), lambda.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      exp_mod_normal_lpdf_functor_propto, y.transpose().eval(),
+      mu.transpose().eval(), sigma.transpose().eval(),
+      lambda.transpose().eval());
 }
 
 #endif
diff --git a/test/unit/math/opencl/rev/exponential_lpdf_test.cpp b/test/unit/math/opencl/rev/exponential_lpdf_test.cpp
index d6d7942fd37..01e5671360a 100644
--- a/test/unit/math/opencl/rev/exponential_lpdf_test.cpp
+++ b/test/unit/math/opencl/rev/exponential_lpdf_test.cpp
@@ -68,6 +68,10 @@ TEST(ProbDistributionsExponential, opencl_matches_cpu_small) {
                                                 beta);
   stan::math::test::compare_cpu_opencl_prim_rev(exponential_lpdf_functor_propto,
                                                 y, beta);
+  stan::math::test::compare_cpu_opencl_prim_rev(exponential_lpdf_functor,
+                                                y.transpose().eval(), beta);
+  stan::math::test::compare_cpu_opencl_prim_rev(exponential_lpdf_functor_propto,
+                                                y, beta.transpose().eval());
 }
 
 TEST(ProbDistributionsExponential, opencl_broadcast_y) {
@@ -108,6 +112,11 @@ TEST(ProbDistributionsExponential, opencl_matches_cpu_big) {
                                                 beta);
   stan::math::test::compare_cpu_opencl_prim_rev(exponential_lpdf_functor_propto,
                                                 y, beta);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      exponential_lpdf_functor, y.transpose().eval(), beta.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(exponential_lpdf_functor_propto,
+                                                y.transpose().eval(),
+                                                beta.transpose().eval());
 }
 
 #endif
diff --git a/test/unit/math/opencl/rev/frechet_lpdf_test.cpp b/test/unit/math/opencl/rev/frechet_lpdf_test.cpp
index 75afe49b895..01a119a12e8 100644
--- a/test/unit/math/opencl/rev/frechet_lpdf_test.cpp
+++ b/test/unit/math/opencl/rev/frechet_lpdf_test.cpp
@@ -97,6 +97,12 @@ TEST(ProbDistributionsFrechet, opencl_matches_cpu_small) {
                                                 sigma);
   stan::math::test::compare_cpu_opencl_prim_rev(frechet_lpdf_functor_propto, y,
                                                 alpha, sigma);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      frechet_lpdf_functor, y.transpose().eval(), alpha.transpose().eval(),
+      sigma.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      frechet_lpdf_functor_propto, y.transpose().eval(),
+      alpha.transpose().eval(), sigma.transpose().eval());
 }
 
 TEST(ProbDistributionsFrechet, opencl_broadcast_y) {
@@ -112,6 +118,10 @@ TEST(ProbDistributionsFrechet, opencl_broadcast_y) {
                                                          y, alpha, sigma);
   stan::math::test::test_opencl_broadcasting_prim_rev<0>(
       frechet_lpdf_functor_propto, y, alpha, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      frechet_lpdf_functor, y, alpha.transpose().eval(), sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      frechet_lpdf_functor_propto, y, alpha, sigma.transpose().eval());
 }
 
 TEST(ProbDistributionsFrechet, opencl_broadcast_alpha) {
@@ -127,6 +137,10 @@ TEST(ProbDistributionsFrechet, opencl_broadcast_alpha) {
                                                          y, alpha, sigma);
   stan::math::test::test_opencl_broadcasting_prim_rev<1>(
       frechet_lpdf_functor_propto, y, alpha, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      frechet_lpdf_functor, y.transpose().eval(), alpha, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      frechet_lpdf_functor_propto, y, alpha, sigma.transpose().eval());
 }
 
 TEST(ProbDistributionsFrechet, opencl_broadcast_sigma) {
@@ -144,6 +158,10 @@ TEST(ProbDistributionsFrechet, opencl_broadcast_sigma) {
                                                          y, alpha, sigma);
   stan::math::test::test_opencl_broadcasting_prim_rev<2>(
       frechet_lpdf_functor_propto, y, alpha, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      frechet_lpdf_functor, y.transpose().eval(), alpha, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      frechet_lpdf_functor_propto, y, alpha.transpose().eval(), sigma);
 }
 
 TEST(ProbDistributionsFrechet, opencl_matches_cpu_big) {
@@ -160,6 +178,12 @@ TEST(ProbDistributionsFrechet, opencl_matches_cpu_big) {
                                                 sigma);
   stan::math::test::compare_cpu_opencl_prim_rev(frechet_lpdf_functor_propto, y,
                                                 alpha, sigma);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      frechet_lpdf_functor, y.transpose().eval(), alpha.transpose().eval(),
+      sigma.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      frechet_lpdf_functor_propto, y.transpose().eval(),
+      alpha.transpose().eval(), sigma.transpose().eval());
 }
 
 #endif
diff --git a/test/unit/math/opencl/rev/gamma_lpdf_test.cpp b/test/unit/math/opencl/rev/gamma_lpdf_test.cpp
index bd5ab71c35f..bb3c2ddc29f 100644
--- a/test/unit/math/opencl/rev/gamma_lpdf_test.cpp
+++ b/test/unit/math/opencl/rev/gamma_lpdf_test.cpp
@@ -91,6 +91,12 @@ TEST(ProbDistributionsGamma, opencl_matches_cpu_small) {
                                                 beta);
   stan::math::test::compare_cpu_opencl_prim_rev(gamma_lpdf_functor_propto, y,
                                                 alpha, beta);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      gamma_lpdf_functor, y.transpose().eval(), alpha.transpose().eval(),
+      beta.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      gamma_lpdf_functor_propto, y.transpose().eval(), alpha.transpose().eval(),
+      beta.transpose().eval());
 }
 TEST(ProbDistributionsGamma, opencl_matches_cpu_small_negative_y) {
   int N = 3;
@@ -121,6 +127,10 @@ TEST(ProbDistributionsGamma, opencl_broadcast_y) {
                                                          alpha, beta);
   stan::math::test::test_opencl_broadcasting_prim_rev<0>(
       gamma_lpdf_functor_propto, y, alpha, beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      gamma_lpdf_functor, y, alpha.transpose().eval(), beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      gamma_lpdf_functor_propto, y, alpha, beta.transpose().eval());
 }
 
 TEST(ProbDistributionsGamma, opencl_broadcast_alpha) {
@@ -136,6 +146,10 @@ TEST(ProbDistributionsGamma, opencl_broadcast_alpha) {
                                                          alpha, beta);
   stan::math::test::test_opencl_broadcasting_prim_rev<1>(
       gamma_lpdf_functor_propto, y, alpha, beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      gamma_lpdf_functor, y.transpose().eval(), alpha, beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      gamma_lpdf_functor_propto, y, alpha, beta.transpose().eval());
 }
 
 TEST(ProbDistributionsGamma, opencl_broadcast_beta) {
@@ -151,6 +165,10 @@ TEST(ProbDistributionsGamma, opencl_broadcast_beta) {
                                                          alpha, beta);
   stan::math::test::test_opencl_broadcasting_prim_rev<2>(
       gamma_lpdf_functor_propto, y, alpha, beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      gamma_lpdf_functor, y.transpose().eval(), alpha, beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      gamma_lpdf_functor_propto, y, alpha.transpose().eval(), beta);
 }
 
 TEST(ProbDistributionsGamma, opencl_matches_cpu_big) {
@@ -167,6 +185,12 @@ TEST(ProbDistributionsGamma, opencl_matches_cpu_big) {
                                                 beta);
   stan::math::test::compare_cpu_opencl_prim_rev(gamma_lpdf_functor_propto, y,
                                                 alpha, beta);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      gamma_lpdf_functor, y.transpose().eval(), alpha.transpose().eval(),
+      beta.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      gamma_lpdf_functor_propto, y.transpose().eval(), alpha.transpose().eval(),
+      beta.transpose().eval());
 }
 
 #endif
diff --git a/test/unit/math/opencl/rev/gumbel_lpdf_test.cpp b/test/unit/math/opencl/rev/gumbel_lpdf_test.cpp
index 213c2ab6f28..a4c67778f52 100644
--- a/test/unit/math/opencl/rev/gumbel_lpdf_test.cpp
+++ b/test/unit/math/opencl/rev/gumbel_lpdf_test.cpp
@@ -90,6 +90,12 @@ TEST(ProbDistributionsGumbel, opencl_matches_cpu_small) {
                                                 beta);
   stan::math::test::compare_cpu_opencl_prim_rev(gumbel_lpdf_functor_propto, y,
                                                 mu, beta);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      gumbel_lpdf_functor, y.transpose().eval(), mu.transpose().eval(),
+      beta.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      gumbel_lpdf_functor_propto, y.transpose().eval(), mu.transpose().eval(),
+      beta.transpose().eval());
 }
 
 TEST(ProbDistributionsGumbel, opencl_broadcast_y) {
@@ -105,6 +111,10 @@ TEST(ProbDistributionsGumbel, opencl_broadcast_y) {
                                                          mu, beta);
   stan::math::test::test_opencl_broadcasting_prim_rev<0>(
       gumbel_lpdf_functor_propto, y, mu, beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      gumbel_lpdf_functor, y, mu.transpose().eval(), beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      gumbel_lpdf_functor_propto, y, mu, beta.transpose().eval());
 }
 
 TEST(ProbDistributionsGumbel, opencl_broadcast_mu) {
@@ -120,6 +130,10 @@ TEST(ProbDistributionsGumbel, opencl_broadcast_mu) {
                                                          mu, beta);
   stan::math::test::test_opencl_broadcasting_prim_rev<1>(
       gumbel_lpdf_functor_propto, y, mu, beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      gumbel_lpdf_functor, y.transpose().eval(), mu, beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      gumbel_lpdf_functor_propto, y, mu, beta.transpose().eval());
 }
 
 TEST(ProbDistributionsGumbel, opencl_broadcast_beta) {
@@ -135,6 +149,10 @@ TEST(ProbDistributionsGumbel, opencl_broadcast_beta) {
                                                          mu, beta);
   stan::math::test::test_opencl_broadcasting_prim_rev<2>(
       gumbel_lpdf_functor_propto, y, mu, beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      gumbel_lpdf_functor, y.transpose().eval(), mu, beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      gumbel_lpdf_functor_propto, y, mu.transpose().eval(), beta);
 }
 
 TEST(ProbDistributionsGumbel, opencl_matches_cpu_big) {
@@ -151,6 +169,12 @@ TEST(ProbDistributionsGumbel, opencl_matches_cpu_big) {
                                                 beta);
   stan::math::test::compare_cpu_opencl_prim_rev(gumbel_lpdf_functor_propto, y,
                                                 mu, beta);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      gumbel_lpdf_functor, y.transpose().eval(), mu.transpose().eval(),
+      beta.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      gumbel_lpdf_functor_propto, y.transpose().eval(), mu.transpose().eval(),
+      beta.transpose().eval());
 }
 
 #endif
diff --git a/test/unit/math/opencl/rev/inv_chi_square_lpdf_test.cpp b/test/unit/math/opencl/rev/inv_chi_square_lpdf_test.cpp
index d6c055815e9..e0ff451ed13 100644
--- a/test/unit/math/opencl/rev/inv_chi_square_lpdf_test.cpp
+++ b/test/unit/math/opencl/rev/inv_chi_square_lpdf_test.cpp
@@ -66,6 +66,10 @@ TEST(ProbDistributionsInvChiSquare, opencl_matches_cpu_small) {
                                                 nu);
   stan::math::test::compare_cpu_opencl_prim_rev(
       inv_chi_square_lpdf_functor_propto, y, nu);
+  stan::math::test::compare_cpu_opencl_prim_rev(inv_chi_square_lpdf_functor,
+                                                y.transpose().eval(), nu);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      inv_chi_square_lpdf_functor_propto, y, nu.transpose().eval());
 }
 
 TEST(ProbDistributionsInvChiSquare, opencl_matches_cpu_small_y_zero) {
@@ -120,6 +124,11 @@ TEST(ProbDistributionsInvChiSquare, opencl_matches_cpu_big) {
                                                 nu);
   stan::math::test::compare_cpu_opencl_prim_rev(
       inv_chi_square_lpdf_functor_propto, y, nu);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      inv_chi_square_lpdf_functor, y.transpose().eval(), nu.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      inv_chi_square_lpdf_functor_propto, y.transpose().eval(),
+      nu.transpose().eval());
 }
 
 #endif
diff --git a/test/unit/math/opencl/rev/inv_gamma_lpdf_test.cpp b/test/unit/math/opencl/rev/inv_gamma_lpdf_test.cpp
index 9cc0d765a0f..d86600bd966 100644
--- a/test/unit/math/opencl/rev/inv_gamma_lpdf_test.cpp
+++ b/test/unit/math/opencl/rev/inv_gamma_lpdf_test.cpp
@@ -92,6 +92,12 @@ TEST(ProbDistributionsInvGamma, opencl_matches_cpu_small) {
                                                 alpha, beta);
   stan::math::test::compare_cpu_opencl_prim_rev(inv_gamma_lpdf_functor_propto,
                                                 y, alpha, beta);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      inv_gamma_lpdf_functor, y.transpose().eval(), alpha.transpose().eval(),
+      beta.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      inv_gamma_lpdf_functor_propto, y.transpose().eval(),
+      alpha.transpose().eval(), beta.transpose().eval());
 }
 
 TEST(ProbDistributionsInvGamma, opencl_matches_cpu_small_zero_y) {
@@ -124,6 +130,10 @@ TEST(ProbDistributionsInvGamma, opencl_broadcast_y) {
                                                          y, alpha, beta);
   stan::math::test::test_opencl_broadcasting_prim_rev<0>(
       inv_gamma_lpdf_functor_propto, y, alpha, beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      inv_gamma_lpdf_functor, y, alpha.transpose().eval(), beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      inv_gamma_lpdf_functor_propto, y, alpha, beta.transpose().eval());
 }
 
 TEST(ProbDistributionsInvGamma, opencl_broadcast_alpha) {
@@ -139,6 +149,10 @@ TEST(ProbDistributionsInvGamma, opencl_broadcast_alpha) {
                                                          y, alpha, beta);
   stan::math::test::test_opencl_broadcasting_prim_rev<1>(
       inv_gamma_lpdf_functor_propto, y, alpha, beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      inv_gamma_lpdf_functor, y.transpose().eval(), alpha, beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      inv_gamma_lpdf_functor_propto, y, alpha, beta.transpose().eval());
 }
 
 TEST(ProbDistributionsInvGamma, opencl_broadcast_beta) {
@@ -154,6 +168,10 @@ TEST(ProbDistributionsInvGamma, opencl_broadcast_beta) {
                                                          y, alpha, beta);
   stan::math::test::test_opencl_broadcasting_prim_rev<2>(
       inv_gamma_lpdf_functor_propto, y, alpha, beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      inv_gamma_lpdf_functor, y.transpose().eval(), alpha, beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      inv_gamma_lpdf_functor_propto, y, alpha.transpose().eval(), beta);
 }
 
 TEST(ProbDistributionsInvGamma, opencl_matches_cpu_big) {
@@ -170,6 +188,12 @@ TEST(ProbDistributionsInvGamma, opencl_matches_cpu_big) {
                                                 alpha, beta);
   stan::math::test::compare_cpu_opencl_prim_rev(inv_gamma_lpdf_functor_propto,
                                                 y, alpha, beta);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      inv_gamma_lpdf_functor, y.transpose().eval(), alpha.transpose().eval(),
+      beta.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      inv_gamma_lpdf_functor_propto, y.transpose().eval(),
+      alpha.transpose().eval(), beta.transpose().eval());
 }
 
 #endif
diff --git a/test/unit/math/opencl/rev/logistic_lpdf_test.cpp b/test/unit/math/opencl/rev/logistic_lpdf_test.cpp
index 508b9a94960..44cc923c6ea 100644
--- a/test/unit/math/opencl/rev/logistic_lpdf_test.cpp
+++ b/test/unit/math/opencl/rev/logistic_lpdf_test.cpp
@@ -92,6 +92,12 @@ TEST(ProbDistributionsLogistic, opencl_matches_cpu_small) {
                                                 sigma);
   stan::math::test::compare_cpu_opencl_prim_rev(logistic_lpdf_functor_propto, y,
                                                 mu, sigma);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      logistic_lpdf_functor, y.transpose().eval(), mu.transpose().eval(),
+      sigma.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      logistic_lpdf_functor_propto, y.transpose().eval(), mu.transpose().eval(),
+      sigma.transpose().eval());
 }
 
 TEST(ProbDistributionsLogistic, opencl_broadcast_y) {
@@ -107,6 +113,10 @@ TEST(ProbDistributionsLogistic, opencl_broadcast_y) {
                                                          y, mu, sigma);
   stan::math::test::test_opencl_broadcasting_prim_rev<0>(
       logistic_lpdf_functor_propto, y, mu, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      logistic_lpdf_functor, y, mu.transpose().eval(), sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      logistic_lpdf_functor_propto, y, mu, sigma.transpose().eval());
 }
 
 TEST(ProbDistributionsLogistic, opencl_broadcast_mu) {
@@ -122,6 +132,10 @@ TEST(ProbDistributionsLogistic, opencl_broadcast_mu) {
                                                          y, mu, sigma);
   stan::math::test::test_opencl_broadcasting_prim_rev<1>(
       logistic_lpdf_functor_propto, y, mu, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      logistic_lpdf_functor, y.transpose().eval(), mu, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      logistic_lpdf_functor_propto, y, mu, sigma.transpose().eval());
 }
 
 TEST(ProbDistributionsLogistic, opencl_broadcast_sigma) {
@@ -137,6 +151,10 @@ TEST(ProbDistributionsLogistic, opencl_broadcast_sigma) {
                                                          y, mu, sigma);
   stan::math::test::test_opencl_broadcasting_prim_rev<2>(
       logistic_lpdf_functor_propto, y, mu, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      logistic_lpdf_functor, y.transpose().eval(), mu, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      logistic_lpdf_functor_propto, y, mu.transpose().eval(), sigma);
 }
 
 TEST(ProbDistributionsLogistic, opencl_matches_cpu_big) {
@@ -153,6 +171,12 @@ TEST(ProbDistributionsLogistic, opencl_matches_cpu_big) {
                                                 sigma);
   stan::math::test::compare_cpu_opencl_prim_rev(logistic_lpdf_functor_propto, y,
                                                 mu, sigma);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      logistic_lpdf_functor, y.transpose().eval(), mu.transpose().eval(),
+      sigma.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      logistic_lpdf_functor_propto, y.transpose().eval(), mu.transpose().eval(),
+      sigma.transpose().eval());
 }
 
 TEST(ProbDistributionsLogistic, opencl_sigma_mu_scalar) {
diff --git a/test/unit/math/opencl/rev/lognormal_lpdf_test.cpp b/test/unit/math/opencl/rev/lognormal_lpdf_test.cpp
index 668325755bd..b3c0bdbd490 100644
--- a/test/unit/math/opencl/rev/lognormal_lpdf_test.cpp
+++ b/test/unit/math/opencl/rev/lognormal_lpdf_test.cpp
@@ -98,6 +98,12 @@ TEST(ProbDistributionsLognormal, opencl_matches_cpu_small) {
                                                 sigma);
   stan::math::test::compare_cpu_opencl_prim_rev(lognormal_lpdf_functor_propto,
                                                 y, mu, sigma);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      lognormal_lpdf_functor, y.transpose().eval(), mu.transpose().eval(),
+      sigma.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      lognormal_lpdf_functor_propto, y.transpose().eval(),
+      mu.transpose().eval(), sigma.transpose().eval());
 }
 TEST(ProbDistributionsLognormal, opencl_matches_cpu_small_zero_y) {
   int N = 3;
@@ -129,6 +135,10 @@ TEST(ProbDistributionsLognormal, opencl_broadcast_y) {
                                                          y, mu, sigma);
   stan::math::test::test_opencl_broadcasting_prim_rev<0>(
       lognormal_lpdf_functor_propto, y, mu, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      lognormal_lpdf_functor, y, mu.transpose().eval(), sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      lognormal_lpdf_functor_propto, y, mu, sigma.transpose().eval());
 }
 
 TEST(ProbDistributionsLognormal, opencl_broadcast_mu) {
@@ -144,6 +154,10 @@ TEST(ProbDistributionsLognormal, opencl_broadcast_mu) {
                                                          y, mu, sigma);
   stan::math::test::test_opencl_broadcasting_prim_rev<1>(
       lognormal_lpdf_functor_propto, y, mu, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      lognormal_lpdf_functor, y.transpose().eval(), mu, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      lognormal_lpdf_functor_propto, y, mu, sigma.transpose().eval());
 }
 
 TEST(ProbDistributionsLognormal, opencl_broadcast_sigma) {
@@ -159,6 +173,10 @@ TEST(ProbDistributionsLognormal, opencl_broadcast_sigma) {
                                                          y, mu, sigma);
   stan::math::test::test_opencl_broadcasting_prim_rev<2>(
       lognormal_lpdf_functor_propto, y, mu, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      lognormal_lpdf_functor, y.transpose().eval(), mu, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      lognormal_lpdf_functor_propto, y, mu.transpose().eval(), sigma);
 }
 
 TEST(ProbDistributionsLognormal, opencl_matches_cpu_big) {
@@ -175,6 +193,12 @@ TEST(ProbDistributionsLognormal, opencl_matches_cpu_big) {
                                                 sigma);
   stan::math::test::compare_cpu_opencl_prim_rev(lognormal_lpdf_functor_propto,
                                                 y, mu, sigma);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      lognormal_lpdf_functor, y.transpose().eval(), mu.transpose().eval(),
+      sigma.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      lognormal_lpdf_functor_propto, y.transpose().eval(),
+      mu.transpose().eval(), sigma.transpose().eval());
 }
 
 #endif
diff --git a/test/unit/math/opencl/rev/neg_binomial_2_log_lpmf_test.cpp b/test/unit/math/opencl/rev/neg_binomial_2_log_lpmf_test.cpp
index 253eda0b25f..d9e2e89b852 100644
--- a/test/unit/math/opencl/rev/neg_binomial_2_log_lpmf_test.cpp
+++ b/test/unit/math/opencl/rev/neg_binomial_2_log_lpmf_test.cpp
@@ -81,6 +81,12 @@ TEST(ProbDistributionsNegBinomial2Log, opencl_matches_cpu_small) {
                                                 n, eta, phi);
   stan::math::test::compare_cpu_opencl_prim_rev(
       neg_binomial_2_log_lpmf_functor_propto, n, eta, phi);
+  stan::math::test::compare_cpu_opencl_prim_rev(neg_binomial_2_log_lpmf_functor,
+                                                n, eta.transpose().eval(),
+                                                phi.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      neg_binomial_2_log_lpmf_functor_propto, n, eta.transpose().eval(),
+      phi.transpose().eval());
 }
 
 TEST(ProbDistributionsNegBinomial2Log, opencl_broadcast_n) {
@@ -96,6 +102,10 @@ TEST(ProbDistributionsNegBinomial2Log, opencl_broadcast_n) {
       neg_binomial_2_log_lpmf_functor, n, eta, phi);
   stan::math::test::test_opencl_broadcasting_prim_rev<0>(
       neg_binomial_2_log_lpmf_functor_propto, n, eta, phi);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      neg_binomial_2_log_lpmf_functor, n, eta.transpose().eval(), phi);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      neg_binomial_2_log_lpmf_functor_propto, n, eta, phi.transpose().eval());
 }
 
 TEST(ProbDistributionsNegBinomial2Log, opencl_broadcast_eta) {
@@ -110,6 +120,10 @@ TEST(ProbDistributionsNegBinomial2Log, opencl_broadcast_eta) {
       neg_binomial_2_log_lpmf_functor, n, eta, phi);
   stan::math::test::test_opencl_broadcasting_prim_rev<1>(
       neg_binomial_2_log_lpmf_functor_propto, n, eta, phi);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      neg_binomial_2_log_lpmf_functor, n, eta, phi.transpose().eval());
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      neg_binomial_2_log_lpmf_functor_propto, n, eta, phi.transpose().eval());
 }
 
 TEST(ProbDistributionsNegBinomial2Log, opencl_broadcast_phi) {
@@ -124,6 +138,10 @@ TEST(ProbDistributionsNegBinomial2Log, opencl_broadcast_phi) {
       neg_binomial_2_log_lpmf_functor, n, eta, phi);
   stan::math::test::test_opencl_broadcasting_prim_rev<2>(
       neg_binomial_2_log_lpmf_functor_propto, n, eta, phi);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      neg_binomial_2_log_lpmf_functor, n, eta.transpose().eval(), phi);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      neg_binomial_2_log_lpmf_functor_propto, n, eta.transpose().eval(), phi);
 }
 
 TEST(ProbDistributionsNegBinomial2Log, opencl_matches_cpu_big) {
@@ -142,6 +160,12 @@ TEST(ProbDistributionsNegBinomial2Log, opencl_matches_cpu_big) {
                                                 n, eta, phi);
   stan::math::test::compare_cpu_opencl_prim_rev(
       neg_binomial_2_log_lpmf_functor_propto, n, eta, phi);
+  stan::math::test::compare_cpu_opencl_prim_rev(neg_binomial_2_log_lpmf_functor,
+                                                n, eta.transpose().eval(),
+                                                phi.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      neg_binomial_2_log_lpmf_functor_propto, n, eta.transpose().eval(),
+      phi.transpose().eval());
 }
 
 TEST(ProbDistributionsNegBinomial2Log, opencl_matches_cpu_eta_phi_scalar) {
diff --git a/test/unit/math/opencl/rev/neg_binomial_2_lpmf_test.cpp b/test/unit/math/opencl/rev/neg_binomial_2_lpmf_test.cpp
index a08beecaede..e7e863610d9 100644
--- a/test/unit/math/opencl/rev/neg_binomial_2_lpmf_test.cpp
+++ b/test/unit/math/opencl/rev/neg_binomial_2_lpmf_test.cpp
@@ -86,6 +86,12 @@ TEST(muProbDistributionsNegBinomial2, opencl_matches_cpu_small) {
                                                 mu, phi);
   stan::math::test::compare_cpu_opencl_prim_rev(
       neg_binomial_2_lpmf_functor_propto, n, mu, phi);
+  stan::math::test::compare_cpu_opencl_prim_rev(neg_binomial_2_lpmf_functor, n,
+                                                mu.transpose().eval(),
+                                                phi.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      neg_binomial_2_lpmf_functor_propto, n, mu.transpose().eval(),
+      phi.transpose().eval());
 }
 
 TEST(muProbDistributionsNegBinomial2, opencl_broadcast_n) {
@@ -101,6 +107,10 @@ TEST(muProbDistributionsNegBinomial2, opencl_broadcast_n) {
       neg_binomial_2_lpmf_functor, n, mu, phi);
   stan::math::test::test_opencl_broadcasting_prim_rev<0>(
       neg_binomial_2_lpmf_functor_propto, n, mu, phi);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      neg_binomial_2_lpmf_functor, n, mu.transpose().eval(), phi);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      neg_binomial_2_lpmf_functor_propto, n, mu, phi.transpose().eval());
 }
 
 TEST(muProbDistributionsNegBinomial2, opencl_broadcast_mu) {
@@ -115,6 +125,10 @@ TEST(muProbDistributionsNegBinomial2, opencl_broadcast_mu) {
       neg_binomial_2_lpmf_functor, n, mu, phi);
   stan::math::test::test_opencl_broadcasting_prim_rev<1>(
       neg_binomial_2_lpmf_functor_propto, n, mu, phi);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      neg_binomial_2_lpmf_functor, n, mu, phi.transpose().eval());
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      neg_binomial_2_lpmf_functor_propto, n, mu, phi.transpose().eval());
 }
 
 TEST(muProbDistributionsNegBinomial2, opencl_broadcast_phi) {
@@ -129,6 +143,10 @@ TEST(muProbDistributionsNegBinomial2, opencl_broadcast_phi) {
       neg_binomial_2_lpmf_functor, n, mu, phi);
   stan::math::test::test_opencl_broadcasting_prim_rev<2>(
       neg_binomial_2_lpmf_functor_propto, n, mu, phi);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      neg_binomial_2_lpmf_functor, n, mu.transpose().eval(), phi);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      neg_binomial_2_lpmf_functor_propto, n, mu.transpose().eval(), phi);
 }
 
 TEST(muProbDistributionsNegBinomial2, opencl_matches_cpu_big) {
@@ -147,6 +165,12 @@ TEST(muProbDistributionsNegBinomial2, opencl_matches_cpu_big) {
                                                 mu, phi);
   stan::math::test::compare_cpu_opencl_prim_rev(
       neg_binomial_2_lpmf_functor_propto, n, mu, phi);
+  stan::math::test::compare_cpu_opencl_prim_rev(neg_binomial_2_lpmf_functor, n,
+                                                mu.transpose().eval(),
+                                                phi.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      neg_binomial_2_lpmf_functor_propto, n, mu.transpose().eval(),
+      phi.transpose().eval());
 }
 
 TEST(ProbDistributionsNegBinomial2, opencl_scalar_n_mu) {
diff --git a/test/unit/math/opencl/rev/neg_binomial_lpmf_test.cpp b/test/unit/math/opencl/rev/neg_binomial_lpmf_test.cpp
index 4255bc6d4be..28014935e8e 100644
--- a/test/unit/math/opencl/rev/neg_binomial_lpmf_test.cpp
+++ b/test/unit/math/opencl/rev/neg_binomial_lpmf_test.cpp
@@ -86,6 +86,12 @@ TEST(muProbDistributionsNegBinomial, opencl_matches_cpu_small) {
                                                 alpha, beta);
   stan::math::test::compare_cpu_opencl_prim_rev(
       neg_binomial_lpmf_functor_propto, n, alpha, beta);
+  stan::math::test::compare_cpu_opencl_prim_rev(neg_binomial_lpmf_functor, n,
+                                                alpha.transpose().eval(),
+                                                beta.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      neg_binomial_lpmf_functor_propto, n, alpha.transpose().eval(),
+      beta.transpose().eval());
 }
 
 TEST(muProbDistributionsNegBinomial, opencl_broadcast_n) {
@@ -101,6 +107,10 @@ TEST(muProbDistributionsNegBinomial, opencl_broadcast_n) {
       neg_binomial_lpmf_functor, n, alpha, beta);
   stan::math::test::test_opencl_broadcasting_prim_rev<0>(
       neg_binomial_lpmf_functor_propto, n, alpha, beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      neg_binomial_lpmf_functor, n, alpha.transpose().eval(), beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      neg_binomial_lpmf_functor_propto, n, alpha, beta.transpose().eval());
 }
 
 TEST(muProbDistributionsNegBinomial, opencl_broadcast_alpha) {
@@ -115,6 +125,10 @@ TEST(muProbDistributionsNegBinomial, opencl_broadcast_alpha) {
       neg_binomial_lpmf_functor, n, alpha, beta);
   stan::math::test::test_opencl_broadcasting_prim_rev<1>(
       neg_binomial_lpmf_functor_propto, n, alpha, beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      neg_binomial_lpmf_functor, n, alpha, beta.transpose().eval());
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      neg_binomial_lpmf_functor_propto, n, alpha, beta.transpose().eval());
 }
 
 TEST(muProbDistributionsNegBinomial, opencl_broadcast_beta) {
@@ -129,6 +143,10 @@ TEST(muProbDistributionsNegBinomial, opencl_broadcast_beta) {
       neg_binomial_lpmf_functor, n, alpha, beta);
   stan::math::test::test_opencl_broadcasting_prim_rev<2>(
       neg_binomial_lpmf_functor_propto, n, alpha, beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      neg_binomial_lpmf_functor, n, alpha.transpose().eval(), beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      neg_binomial_lpmf_functor_propto, n, alpha.transpose().eval(), beta);
 }
 
 TEST(muProbDistributionsNegBinomial, opencl_matches_cpu_big) {
@@ -147,6 +165,12 @@ TEST(muProbDistributionsNegBinomial, opencl_matches_cpu_big) {
                                                 alpha, beta);
   stan::math::test::compare_cpu_opencl_prim_rev(
       neg_binomial_lpmf_functor_propto, n, alpha, beta);
+  stan::math::test::compare_cpu_opencl_prim_rev(neg_binomial_lpmf_functor, n,
+                                                alpha.transpose().eval(),
+                                                beta.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      neg_binomial_lpmf_functor_propto, n, alpha.transpose().eval(),
+      beta.transpose().eval());
 }
 
 #endif
diff --git a/test/unit/math/opencl/rev/normal_lpdf_test.cpp b/test/unit/math/opencl/rev/normal_lpdf_test.cpp
index a33c49fa37a..0a4d2e98f05 100644
--- a/test/unit/math/opencl/rev/normal_lpdf_test.cpp
+++ b/test/unit/math/opencl/rev/normal_lpdf_test.cpp
@@ -80,6 +80,12 @@ TEST(ProbDistributionsNormal, opencl_matches_cpu_small) {
                                                 sigma);
   stan::math::test::compare_cpu_opencl_prim_rev(normal_lpdf_functor_propto, y,
                                                 mu, sigma);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      normal_lpdf_functor, y.transpose().eval(), mu.transpose().eval(),
+      sigma.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      normal_lpdf_functor_propto, y.transpose().eval(), mu.transpose().eval(),
+      sigma.transpose().eval());
 }
 
 TEST(ProbDistributionsNormal, opencl_broadcast_y) {
@@ -95,6 +101,10 @@ TEST(ProbDistributionsNormal, opencl_broadcast_y) {
                                                          y_scal, mu, sigma);
   stan::math::test::test_opencl_broadcasting_prim_rev<0>(
       normal_lpdf_functor_propto, y_scal, mu, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      normal_lpdf_functor, y_scal, mu.transpose().eval(), sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      normal_lpdf_functor_propto, y_scal, mu, sigma.transpose().eval());
 }
 
 TEST(ProbDistributionsNormal, opencl_broadcast_mu) {
@@ -110,6 +120,10 @@ TEST(ProbDistributionsNormal, opencl_broadcast_mu) {
                                                          mu_scal, sigma);
   stan::math::test::test_opencl_broadcasting_prim_rev<1>(
       normal_lpdf_functor_propto, y, mu_scal, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      normal_lpdf_functor, y.transpose().eval(), mu_scal, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      normal_lpdf_functor_propto, y, mu_scal, sigma.transpose().eval());
 }
 
 TEST(ProbDistributionsNormal, opencl_broadcast_sigma) {
@@ -125,6 +139,10 @@ TEST(ProbDistributionsNormal, opencl_broadcast_sigma) {
                                                          mu, sigma_scal);
   stan::math::test::test_opencl_broadcasting_prim_rev<2>(
       normal_lpdf_functor_propto, y, mu, sigma_scal);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      normal_lpdf_functor, y.transpose().eval(), mu, sigma_scal);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      normal_lpdf_functor_propto, y, mu.transpose().eval(), sigma_scal);
 }
 
 TEST(ProbDistributionsNormal, opencl_matches_cpu_big) {
@@ -141,6 +159,12 @@ TEST(ProbDistributionsNormal, opencl_matches_cpu_big) {
                                                 sigma);
   stan::math::test::compare_cpu_opencl_prim_rev(normal_lpdf_functor_propto, y,
                                                 mu, sigma);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      normal_lpdf_functor, y.transpose().eval(), mu.transpose().eval(),
+      sigma.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      normal_lpdf_functor_propto, y.transpose().eval(), mu.transpose().eval(),
+      sigma.transpose().eval());
 }
 
 #endif
diff --git a/test/unit/math/opencl/rev/pareto_lpdf_test.cpp b/test/unit/math/opencl/rev/pareto_lpdf_test.cpp
index 5f90a590abf..1ab39a71093 100644
--- a/test/unit/math/opencl/rev/pareto_lpdf_test.cpp
+++ b/test/unit/math/opencl/rev/pareto_lpdf_test.cpp
@@ -90,6 +90,12 @@ TEST(ProbDistributionsPareto, opencl_matches_cpu_small) {
                                                 alpha);
   stan::math::test::compare_cpu_opencl_prim_rev(pareto_lpdf_functor_propto, y,
                                                 y_min, alpha);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      pareto_lpdf_functor, y.transpose().eval(), y_min.transpose().eval(),
+      alpha.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      pareto_lpdf_functor_propto, y.transpose().eval(),
+      y_min.transpose().eval(), alpha.transpose().eval());
 }
 
 TEST(ProbDistributionsPareto, opencl_broadcast_y) {
@@ -105,6 +111,11 @@ TEST(ProbDistributionsPareto, opencl_broadcast_y) {
                                                          y_scal, y_min, alpha);
   stan::math::test::test_opencl_broadcasting_prim_rev<0>(
       pareto_lpdf_functor_propto, y_scal, y_min, alpha);
+
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      pareto_lpdf_functor, y_scal, y_min.transpose().eval(), alpha);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      pareto_lpdf_functor_propto, y_scal, y_min, alpha.transpose().eval());
 }
 
 TEST(ProbDistributionsPareto, opencl_broadcast_y_min) {
@@ -120,6 +131,10 @@ TEST(ProbDistributionsPareto, opencl_broadcast_y_min) {
                                                          y_min_scal, alpha);
   stan::math::test::test_opencl_broadcasting_prim_rev<1>(
       pareto_lpdf_functor_propto, y, y_min_scal, alpha);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      pareto_lpdf_functor, y.transpose().eval(), y_min_scal, alpha);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      pareto_lpdf_functor_propto, y, y_min_scal, alpha.transpose().eval());
 }
 
 TEST(ProbDistributionsPareto, opencl_broadcast_alpha) {
@@ -135,6 +150,10 @@ TEST(ProbDistributionsPareto, opencl_broadcast_alpha) {
                                                          y_min, alpha_scal);
   stan::math::test::test_opencl_broadcasting_prim_rev<2>(
       pareto_lpdf_functor_propto, y, y_min, alpha_scal);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      pareto_lpdf_functor, y.transpose().eval(), y_min, alpha_scal);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      pareto_lpdf_functor_propto, y, y_min.transpose().eval(), alpha_scal);
 }
 
 TEST(ProbDistributionsPareto, opencl_matches_cpu_big) {
@@ -151,6 +170,12 @@ TEST(ProbDistributionsPareto, opencl_matches_cpu_big) {
                                                 alpha);
   stan::math::test::compare_cpu_opencl_prim_rev(pareto_lpdf_functor_propto, y,
                                                 y_min, alpha);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      pareto_lpdf_functor, y.transpose().eval(), y_min.transpose().eval(),
+      alpha.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      pareto_lpdf_functor_propto, y.transpose().eval(),
+      y_min.transpose().eval(), alpha.transpose().eval());
 }
 
 #endif
diff --git a/test/unit/math/opencl/rev/pareto_type_2_lpdf_test.cpp b/test/unit/math/opencl/rev/pareto_type_2_lpdf_test.cpp
index b926d039a70..af5dce76eeb 100644
--- a/test/unit/math/opencl/rev/pareto_type_2_lpdf_test.cpp
+++ b/test/unit/math/opencl/rev/pareto_type_2_lpdf_test.cpp
@@ -119,6 +119,13 @@ TEST(ProbDistributionsParetoType2, opencl_matches_cpu_small) {
                                                 mu, lambda, alpha);
   stan::math::test::compare_cpu_opencl_prim_rev(
       pareto_type_2_lpdf_functor_propto, y, mu, lambda, alpha);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      pareto_type_2_lpdf_functor, y.transpose().eval(), mu.transpose().eval(),
+      lambda.transpose().eval(), alpha.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      pareto_type_2_lpdf_functor_propto, y.transpose().eval(),
+      mu.transpose().eval(), lambda.transpose().eval(),
+      alpha.transpose().eval());
 }
 
 TEST(ProbDistributionsParetoType2, opencl_broadcast_y) {
@@ -136,6 +143,12 @@ TEST(ProbDistributionsParetoType2, opencl_broadcast_y) {
       pareto_type_2_lpdf_functor, y, mu, lambda, alpha);
   stan::math::test::test_opencl_broadcasting_prim_rev<0>(
       pareto_type_2_lpdf_functor_propto, y, mu, lambda, alpha);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      pareto_type_2_lpdf_functor, y, mu.transpose().eval(),
+      lambda.transpose().eval(), alpha);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      pareto_type_2_lpdf_functor_propto, y, mu, lambda.transpose().eval(),
+      alpha.transpose().eval());
 }
 
 TEST(ProbDistributionsParetoType2, opencl_broadcast_mu) {
@@ -153,6 +166,12 @@ TEST(ProbDistributionsParetoType2, opencl_broadcast_mu) {
       pareto_type_2_lpdf_functor, y, mu, lambda, alpha);
   stan::math::test::test_opencl_broadcasting_prim_rev<1>(
       pareto_type_2_lpdf_functor_propto, y, mu, lambda, alpha);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      pareto_type_2_lpdf_functor, y, mu, lambda.transpose().eval(),
+      alpha.transpose().eval());
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      pareto_type_2_lpdf_functor_propto, y.transpose().eval(), mu, lambda,
+      alpha.transpose().eval());
 }
 
 TEST(ProbDistributionsParetoType2, opencl_broadcast_lambda) {
@@ -170,6 +189,12 @@ TEST(ProbDistributionsParetoType2, opencl_broadcast_lambda) {
       pareto_type_2_lpdf_functor, y, mu, lambda, alpha);
   stan::math::test::test_opencl_broadcasting_prim_rev<2>(
       pareto_type_2_lpdf_functor_propto, y, mu, lambda, alpha);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      pareto_type_2_lpdf_functor, y.transpose().eval(), mu, lambda,
+      alpha.transpose().eval());
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      pareto_type_2_lpdf_functor_propto, y.transpose().eval(),
+      mu.transpose().eval(), lambda, alpha);
 }
 
 TEST(ProbDistributionsParetoType2, opencl_broadcast_alpha) {
@@ -187,6 +212,12 @@ TEST(ProbDistributionsParetoType2, opencl_broadcast_alpha) {
       pareto_type_2_lpdf_functor, y, mu, lambda, alpha);
   stan::math::test::test_opencl_broadcasting_prim_rev<3>(
       pareto_type_2_lpdf_functor_propto, y, mu, lambda, alpha);
+  stan::math::test::test_opencl_broadcasting_prim_rev<3>(
+      pareto_type_2_lpdf_functor, y.transpose().eval(), mu.transpose().eval(),
+      lambda, alpha);
+  stan::math::test::test_opencl_broadcasting_prim_rev<3>(
+      pareto_type_2_lpdf_functor_propto, y, mu.transpose().eval(),
+      lambda.transpose().eval(), alpha);
 }
 
 TEST(ProbDistributionsParetoType2, opencl_matches_cpu_big) {
@@ -206,6 +237,13 @@ TEST(ProbDistributionsParetoType2, opencl_matches_cpu_big) {
                                                 mu, lambda, alpha);
   stan::math::test::compare_cpu_opencl_prim_rev(
       pareto_type_2_lpdf_functor_propto, y, mu, lambda, alpha);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      pareto_type_2_lpdf_functor, y, mu.transpose().eval(),
+      lambda.transpose().eval(), alpha.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      pareto_type_2_lpdf_functor_propto, y.transpose().eval(),
+      mu.transpose().eval(), lambda.transpose().eval(),
+      alpha.transpose().eval());
 }
 
 #endif
diff --git a/test/unit/math/opencl/rev/poisson_log_lpmf_test.cpp b/test/unit/math/opencl/rev/poisson_log_lpmf_test.cpp
index 03a8d53d120..bc7dcfc3171 100644
--- a/test/unit/math/opencl/rev/poisson_log_lpmf_test.cpp
+++ b/test/unit/math/opencl/rev/poisson_log_lpmf_test.cpp
@@ -57,6 +57,10 @@ TEST(ProbDistributionsPoissonLog, opencl_matches_cpu_small) {
                                                 alpha);
   stan::math::test::compare_cpu_opencl_prim_rev(poisson_log_lpmf_functor_propto,
                                                 n, alpha);
+  stan::math::test::compare_cpu_opencl_prim_rev(poisson_log_lpmf_functor, n,
+                                                alpha.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(poisson_log_lpmf_functor_propto,
+                                                n, alpha.transpose().eval());
 }
 
 TEST(ProbDistributionsPoissonLog, opencl_broadcast_n) {
@@ -98,6 +102,10 @@ TEST(ProbDistributionsPoissonLog, opencl_matches_cpu_big) {
                                                 alpha);
   stan::math::test::compare_cpu_opencl_prim_rev(poisson_log_lpmf_functor_propto,
                                                 n, alpha);
+  stan::math::test::compare_cpu_opencl_prim_rev(poisson_log_lpmf_functor, n,
+                                                alpha.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(poisson_log_lpmf_functor_propto,
+                                                n, alpha.transpose().eval());
 }
 
 #endif
diff --git a/test/unit/math/opencl/rev/poisson_lpmf_test.cpp b/test/unit/math/opencl/rev/poisson_lpmf_test.cpp
index 27b87ba3dad..0da588c6ab7 100644
--- a/test/unit/math/opencl/rev/poisson_lpmf_test.cpp
+++ b/test/unit/math/opencl/rev/poisson_lpmf_test.cpp
@@ -55,7 +55,10 @@ TEST(ProbDistributionsPoisson, opencl_matches_cpu_small) {
 
   stan::math::test::compare_cpu_opencl_prim_rev(poisson_lpmf_functor, n, alpha);
   stan::math::test::compare_cpu_opencl_prim_rev(poisson_lpmf_functor_propto, n,
-                                                alpha);
+                                                alpha.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(poisson_lpmf_functor, n, alpha);
+  stan::math::test::compare_cpu_opencl_prim_rev(poisson_lpmf_functor_propto, n,
+                                                alpha.transpose().eval());
 }
 
 TEST(ProbDistributionsPoisson, opencl_broadcast_n) {
@@ -96,6 +99,10 @@ TEST(ProbDistributionsPoisson, opencl_matches_cpu_big) {
   stan::math::test::compare_cpu_opencl_prim_rev(poisson_lpmf_functor, n, alpha);
   stan::math::test::compare_cpu_opencl_prim_rev(poisson_lpmf_functor_propto, n,
                                                 alpha);
+  stan::math::test::compare_cpu_opencl_prim_rev(poisson_lpmf_functor, n,
+                                                alpha.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(poisson_lpmf_functor_propto, n,
+                                                alpha.transpose().eval());
 }
 
 #endif
diff --git a/test/unit/math/opencl/rev/rayleigh_lpdf_test.cpp b/test/unit/math/opencl/rev/rayleigh_lpdf_test.cpp
index 3eb73f34281..93460840c2f 100644
--- a/test/unit/math/opencl/rev/rayleigh_lpdf_test.cpp
+++ b/test/unit/math/opencl/rev/rayleigh_lpdf_test.cpp
@@ -63,6 +63,10 @@ TEST(ProbDistributionsRayleigh, opencl_matches_cpu_small) {
                                                 sigma);
   stan::math::test::compare_cpu_opencl_prim_rev(rayleigh_lpdf_functor_propto, y,
                                                 sigma);
+  stan::math::test::compare_cpu_opencl_prim_rev(rayleigh_lpdf_functor,
+                                                y.transpose().eval(), sigma);
+  stan::math::test::compare_cpu_opencl_prim_rev(rayleigh_lpdf_functor_propto, y,
+                                                sigma.transpose().eval());
 }
 
 TEST(ProbDistributionsRayleigh, opencl_broadcast_y) {
@@ -103,6 +107,11 @@ TEST(ProbDistributionsRayleigh, opencl_matches_cpu_big) {
                                                 sigma);
   stan::math::test::compare_cpu_opencl_prim_rev(rayleigh_lpdf_functor_propto, y,
                                                 sigma);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      rayleigh_lpdf_functor, y.transpose().eval(), sigma.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(rayleigh_lpdf_functor_propto,
+                                                y.transpose().eval(),
+                                                sigma.transpose().eval());
 }
 
 #endif
diff --git a/test/unit/math/opencl/rev/scaled_inv_chi_square_lpdf_test.cpp b/test/unit/math/opencl/rev/scaled_inv_chi_square_lpdf_test.cpp
index 04b41fd3390..cf698c694a5 100644
--- a/test/unit/math/opencl/rev/scaled_inv_chi_square_lpdf_test.cpp
+++ b/test/unit/math/opencl/rev/scaled_inv_chi_square_lpdf_test.cpp
@@ -90,6 +90,12 @@ TEST(ProbDistributionsScaledInvChiSquare, opencl_matches_cpu_small) {
       scaled_inv_chi_square_lpdf_functor_propto, y, nu, s);
   stan::math::test::compare_cpu_opencl_prim_rev(
       scaled_inv_chi_square_lpdf_functor, y, nu, s);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      scaled_inv_chi_square_lpdf_functor_propto, y.transpose().eval(),
+      nu.transpose().eval(), s.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      scaled_inv_chi_square_lpdf_functor, y.transpose().eval(),
+      nu.transpose().eval(), s.transpose().eval());
 }
 
 TEST(ProbDistributionsScaledInvChiSquare, opencl_matches_cpu_small_y_negative) {
@@ -122,6 +128,11 @@ TEST(ProbDistributionsScaledInvChiSquare, opencl_broadcast_y) {
       scaled_inv_chi_square_lpdf_functor, y_scal, nu, s);
   stan::math::test::test_opencl_broadcasting_prim_rev<0>(
       scaled_inv_chi_square_lpdf_functor_propto, y_scal, nu, s);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      scaled_inv_chi_square_lpdf_functor, y_scal, nu.transpose().eval(), s);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      scaled_inv_chi_square_lpdf_functor_propto, y_scal, nu,
+      s.transpose().eval());
 }
 
 TEST(ProbDistributionsScaledInvChiSquare, opencl_broadcast_nu) {
@@ -137,6 +148,11 @@ TEST(ProbDistributionsScaledInvChiSquare, opencl_broadcast_nu) {
       scaled_inv_chi_square_lpdf_functor, y, nu_scal, s);
   stan::math::test::test_opencl_broadcasting_prim_rev<1>(
       scaled_inv_chi_square_lpdf_functor_propto, y, nu_scal, s);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      scaled_inv_chi_square_lpdf_functor, y.transpose().eval(), nu_scal, s);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      scaled_inv_chi_square_lpdf_functor_propto, y, nu_scal,
+      s.transpose().eval());
 }
 
 TEST(ProbDistributionsScaledInvChiSquare, opencl_broadcast_s) {
@@ -152,6 +168,11 @@ TEST(ProbDistributionsScaledInvChiSquare, opencl_broadcast_s) {
       scaled_inv_chi_square_lpdf_functor, y, nu, s_scal);
   stan::math::test::test_opencl_broadcasting_prim_rev<2>(
       scaled_inv_chi_square_lpdf_functor_propto, y, nu, s_scal);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      scaled_inv_chi_square_lpdf_functor, y.transpose().eval(), nu, s_scal);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      scaled_inv_chi_square_lpdf_functor_propto, y, nu.transpose().eval(),
+      s_scal);
 }
 
 TEST(ProbDistributionsScaledInvChiSquare, opencl_matches_cpu_big) {
@@ -168,6 +189,12 @@ TEST(ProbDistributionsScaledInvChiSquare, opencl_matches_cpu_big) {
       scaled_inv_chi_square_lpdf_functor, y, nu, s);
   stan::math::test::compare_cpu_opencl_prim_rev(
       scaled_inv_chi_square_lpdf_functor_propto, y, nu, s);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      scaled_inv_chi_square_lpdf_functor, y.transpose().eval(),
+      nu.transpose().eval(), s.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      scaled_inv_chi_square_lpdf_functor_propto, y.transpose().eval(),
+      nu.transpose().eval(), s.transpose().eval());
 }
 
 #endif
diff --git a/test/unit/math/opencl/rev/skew_normal_lpdf_test.cpp b/test/unit/math/opencl/rev/skew_normal_lpdf_test.cpp
index 5e64b72d603..4e60315ee71 100644
--- a/test/unit/math/opencl/rev/skew_normal_lpdf_test.cpp
+++ b/test/unit/math/opencl/rev/skew_normal_lpdf_test.cpp
@@ -105,6 +105,13 @@ TEST(ProbDistributionsSkewNormal, opencl_matches_cpu_small) {
                                                 sigma, alpha);
   stan::math::test::compare_cpu_opencl_prim_rev(skew_normal_lpdf_functor_propto,
                                                 y, mu, sigma, alpha);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      skew_normal_lpdf_functor, y.transpose().eval(), mu.transpose().eval(),
+      sigma.transpose().eval(), alpha.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      skew_normal_lpdf_functor_propto, y.transpose().eval(),
+      mu.transpose().eval(), sigma.transpose().eval(),
+      alpha.transpose().eval());
 }
 
 TEST(ProbDistributionsSkewNormal, opencl_broadcast_y) {
@@ -122,6 +129,12 @@ TEST(ProbDistributionsSkewNormal, opencl_broadcast_y) {
       skew_normal_lpdf_functor, y_scal, mu, sigma, alpha);
   stan::math::test::test_opencl_broadcasting_prim_rev<0>(
       skew_normal_lpdf_functor_propto, y_scal, mu, sigma, alpha);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      skew_normal_lpdf_functor, y_scal, mu.transpose().eval(),
+      sigma.transpose().eval(), alpha);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      skew_normal_lpdf_functor_propto, y_scal, mu, sigma.transpose().eval(),
+      alpha.transpose().eval());
 }
 
 TEST(ProbDistributionsSkewNormal, opencl_broadcast_mu) {
@@ -139,6 +152,12 @@ TEST(ProbDistributionsSkewNormal, opencl_broadcast_mu) {
       skew_normal_lpdf_functor, y, mu_scal, sigma, alpha);
   stan::math::test::test_opencl_broadcasting_prim_rev<1>(
       skew_normal_lpdf_functor_propto, y, mu_scal, sigma, alpha);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      skew_normal_lpdf_functor, y, mu_scal, sigma.transpose().eval(),
+      alpha.transpose().eval());
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      skew_normal_lpdf_functor_propto, y.transpose().eval(), mu_scal, sigma,
+      alpha.transpose().eval());
 }
 
 TEST(ProbDistributionsSkewNormal, opencl_broadcast_sigma) {
@@ -156,6 +175,12 @@ TEST(ProbDistributionsSkewNormal, opencl_broadcast_sigma) {
       skew_normal_lpdf_functor, y, mu, sigma_scal, alpha);
   stan::math::test::test_opencl_broadcasting_prim_rev<2>(
       skew_normal_lpdf_functor_propto, y, mu, sigma_scal, alpha);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      skew_normal_lpdf_functor, y.transpose().eval(), mu, sigma_scal,
+      alpha.transpose().eval());
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      skew_normal_lpdf_functor_propto, y.transpose().eval(),
+      mu.transpose().eval(), sigma_scal, alpha);
 }
 
 TEST(ProbDistributionsSkewNormal, opencl_broadcast_alpha) {
@@ -173,6 +198,12 @@ TEST(ProbDistributionsSkewNormal, opencl_broadcast_alpha) {
       skew_normal_lpdf_functor, y, mu, sigma, alpha_scal);
   stan::math::test::test_opencl_broadcasting_prim_rev<3>(
       skew_normal_lpdf_functor_propto, y, mu, sigma, alpha_scal);
+  stan::math::test::test_opencl_broadcasting_prim_rev<3>(
+      skew_normal_lpdf_functor, y.transpose().eval(), mu.transpose().eval(),
+      sigma, alpha_scal);
+  stan::math::test::test_opencl_broadcasting_prim_rev<3>(
+      skew_normal_lpdf_functor_propto, y, mu.transpose().eval(),
+      sigma.transpose().eval(), alpha_scal);
 }
 
 TEST(ProbDistributionsSkewNormal, opencl_matches_cpu_big) {
@@ -191,6 +222,12 @@ TEST(ProbDistributionsSkewNormal, opencl_matches_cpu_big) {
                                                 sigma, alpha);
   stan::math::test::compare_cpu_opencl_prim_rev(skew_normal_lpdf_functor_propto,
                                                 y, mu, sigma, alpha);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      skew_normal_lpdf_functor, y.transpose().eval(), mu.transpose().eval(),
+      sigma.transpose().eval(), alpha);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      skew_normal_lpdf_functor_propto, y, mu.transpose().eval(),
+      sigma.transpose().eval(), alpha.transpose().eval());
 }
 
 #endif
diff --git a/test/unit/math/opencl/rev/std_normal_lpdf_test.cpp b/test/unit/math/opencl/rev/std_normal_lpdf_test.cpp
index f4bc0176d4f..7f8fcea56bd 100644
--- a/test/unit/math/opencl/rev/std_normal_lpdf_test.cpp
+++ b/test/unit/math/opencl/rev/std_normal_lpdf_test.cpp
@@ -35,6 +35,10 @@ TEST(ProbDistributionsStdNormal, opencl_matches_cpu_small) {
   stan::math::test::compare_cpu_opencl_prim_rev(std_normal_lpdf_functor, y);
   stan::math::test::compare_cpu_opencl_prim_rev(std_normal_lpdf_functor_propto,
                                                 y);
+  stan::math::test::compare_cpu_opencl_prim_rev(std_normal_lpdf_functor,
+                                                y.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(std_normal_lpdf_functor_propto,
+                                                y.transpose().eval());
 }
 
 TEST(ProbDistributionsStdNormal, opencl_matches_cpu_big) {
@@ -46,6 +50,10 @@ TEST(ProbDistributionsStdNormal, opencl_matches_cpu_big) {
   stan::math::test::compare_cpu_opencl_prim_rev(std_normal_lpdf_functor, y);
   stan::math::test::compare_cpu_opencl_prim_rev(std_normal_lpdf_functor_propto,
                                                 y);
+  stan::math::test::compare_cpu_opencl_prim_rev(std_normal_lpdf_functor,
+                                                y.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(std_normal_lpdf_functor_propto,
+                                                y.transpose().eval());
 }
 
 #endif
diff --git a/test/unit/math/opencl/rev/student_t_lpdf2_test.cpp b/test/unit/math/opencl/rev/student_t_lpdf2_test.cpp
new file mode 100644
index 00000000000..84ca4cb42af
--- /dev/null
+++ b/test/unit/math/opencl/rev/student_t_lpdf2_test.cpp
@@ -0,0 +1,86 @@
+#ifdef STAN_OPENCL
+#include <stan/math/opencl/rev.hpp>
+#include <stan/math.hpp>
+#include <gtest/gtest.h>
+#include <test/unit/math/opencl/util.hpp>
+#include <vector>
+
+auto student_t_lpdf_functor
+    = [](const auto& y, const auto& nu, const auto& mu, const auto& sigma) {
+        return stan::math::student_t_lpdf(y, nu, mu, sigma);
+      };
+auto student_t_lpdf_functor_propto
+    = [](const auto& y, const auto& nu, const auto& mu, const auto& sigma) {
+        return stan::math::student_t_lpdf<true>(y, nu, mu, sigma);
+      };
+
+TEST(ProbDistributionsStudentT, opencl_broadcast_nu) {
+  int N = 3;
+
+  Eigen::VectorXd y(N);
+  y << 0.3, -0.8, 1.0;
+  double nu_scal = 12.3;
+  Eigen::VectorXd mu(N);
+  mu << 0.3, 0.8, -1.0;
+  Eigen::VectorXd sigma(N);
+  sigma << 0.3, 0.8, 1.0;
+
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(student_t_lpdf_functor,
+                                                         y, nu_scal, mu, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      student_t_lpdf_functor_propto, y, nu_scal, mu, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      student_t_lpdf_functor, y, nu_scal, mu.transpose().eval(),
+      sigma.transpose().eval());
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      student_t_lpdf_functor_propto, y.transpose().eval(), nu_scal, mu,
+      sigma.transpose().eval());
+}
+
+TEST(ProbDistributionsStudentT, opencl_broadcast_mu) {
+  int N = 3;
+
+  Eigen::VectorXd y(N);
+  y << 0.3, -0.8, 1.0;
+  Eigen::VectorXd nu(N);
+  nu << 0.3, 0.3, 1.5;
+  double mu_scal = 12.3;
+  Eigen::VectorXd sigma(N);
+  sigma << 0.3, 0.8, 1.0;
+
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(student_t_lpdf_functor,
+                                                         y, nu, mu_scal, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      student_t_lpdf_functor_propto, y, nu, mu_scal, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      student_t_lpdf_functor, y.transpose().eval(), nu, mu_scal,
+      sigma.transpose().eval());
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      student_t_lpdf_functor_propto, y.transpose().eval(),
+      nu.transpose().eval(), mu_scal, sigma);
+}
+
+TEST(ProbDistributionsStudentT, opencl_broadcast_sigma) {
+  int N = 3;
+
+  Eigen::VectorXd y(N);
+  y << 0.3, -0.8, 1.0;
+  Eigen::VectorXd nu(N);
+  nu << 0.3, 0.3, 1.5;
+  Eigen::VectorXd mu(N);
+  mu << 0.3, 0.8, -1.0;
+  double sigma_scal = 12.3;
+
+  stan::math::test::test_opencl_broadcasting_prim_rev<3>(student_t_lpdf_functor,
+                                                         y, nu, mu, sigma_scal);
+  stan::math::test::test_opencl_broadcasting_prim_rev<3>(
+      student_t_lpdf_functor_propto, y, nu, mu, sigma_scal);
+  stan::math::test::test_opencl_broadcasting_prim_rev<3>(
+      student_t_lpdf_functor, y.transpose().eval(), nu.transpose().eval(), mu,
+      sigma_scal);
+  stan::math::test::test_opencl_broadcasting_prim_rev<3>(
+      student_t_lpdf_functor_propto, y, nu.transpose().eval(),
+      mu.transpose().eval(), sigma_scal);
+}
+
+#endif
diff --git a/test/unit/math/opencl/rev/student_t_lpdf_test.cpp b/test/unit/math/opencl/rev/student_t_lpdf_test.cpp
index 3a2eb447316..b6b56727a50 100644
--- a/test/unit/math/opencl/rev/student_t_lpdf_test.cpp
+++ b/test/unit/math/opencl/rev/student_t_lpdf_test.cpp
@@ -106,6 +106,12 @@ TEST(ProbDistributionsStudentT, opencl_matches_cpu_small) {
                                                 mu, sigma);
   stan::math::test::compare_cpu_opencl_prim_rev(student_t_lpdf_functor_propto,
                                                 y, nu, mu, sigma);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      student_t_lpdf_functor, y.transpose().eval(), nu.transpose().eval(),
+      mu.transpose().eval(), sigma.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      student_t_lpdf_functor_propto, y.transpose().eval(),
+      nu.transpose().eval(), mu.transpose().eval(), sigma.transpose().eval());
 }
 
 TEST(ProbDistributionsStudentT, opencl_broadcast_y) {
@@ -123,57 +129,12 @@ TEST(ProbDistributionsStudentT, opencl_broadcast_y) {
                                                          y_scal, nu, mu, sigma);
   stan::math::test::test_opencl_broadcasting_prim_rev<0>(
       student_t_lpdf_functor_propto, y_scal, nu, mu, sigma);
-}
-
-TEST(ProbDistributionsStudentT, opencl_broadcast_nu) {
-  int N = 3;
-
-  Eigen::VectorXd y(N);
-  y << 0.3, -0.8, 1.0;
-  double nu_scal = 12.3;
-  Eigen::VectorXd mu(N);
-  mu << 0.3, 0.8, -1.0;
-  Eigen::VectorXd sigma(N);
-  sigma << 0.3, 0.8, 1.0;
-
-  stan::math::test::test_opencl_broadcasting_prim_rev<1>(student_t_lpdf_functor,
-                                                         y, nu_scal, mu, sigma);
-  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
-      student_t_lpdf_functor_propto, y, nu_scal, mu, sigma);
-}
-
-TEST(ProbDistributionsStudentT, opencl_broadcast_mu) {
-  int N = 3;
-
-  Eigen::VectorXd y(N);
-  y << 0.3, -0.8, 1.0;
-  Eigen::VectorXd nu(N);
-  nu << 0.3, 0.3, 1.5;
-  double mu_scal = 12.3;
-  Eigen::VectorXd sigma(N);
-  sigma << 0.3, 0.8, 1.0;
-
-  stan::math::test::test_opencl_broadcasting_prim_rev<2>(student_t_lpdf_functor,
-                                                         y, nu, mu_scal, sigma);
-  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
-      student_t_lpdf_functor_propto, y, nu, mu_scal, sigma);
-}
-
-TEST(ProbDistributionsStudentT, opencl_broadcast_sigma) {
-  int N = 3;
-
-  Eigen::VectorXd y(N);
-  y << 0.3, -0.8, 1.0;
-  Eigen::VectorXd nu(N);
-  nu << 0.3, 0.3, 1.5;
-  Eigen::VectorXd mu(N);
-  mu << 0.3, 0.8, -1.0;
-  double sigma_scal = 12.3;
-
-  stan::math::test::test_opencl_broadcasting_prim_rev<3>(student_t_lpdf_functor,
-                                                         y, nu, mu, sigma_scal);
-  stan::math::test::test_opencl_broadcasting_prim_rev<3>(
-      student_t_lpdf_functor_propto, y, nu, mu, sigma_scal);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      student_t_lpdf_functor, y_scal, nu.transpose().eval(),
+      mu.transpose().eval(), sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      student_t_lpdf_functor_propto, y_scal, nu, mu.transpose().eval(),
+      sigma.transpose().eval());
 }
 
 TEST(ProbDistributionsStudentT, opencl_matches_cpu_big) {
@@ -192,6 +153,12 @@ TEST(ProbDistributionsStudentT, opencl_matches_cpu_big) {
                                                 mu, sigma);
   stan::math::test::compare_cpu_opencl_prim_rev(student_t_lpdf_functor_propto,
                                                 y, nu, mu, sigma);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      student_t_lpdf_functor, y.transpose().eval(), nu.transpose().eval(),
+      mu.transpose().eval(), sigma);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      student_t_lpdf_functor_propto, y.transpose().eval(),
+      nu.transpose().eval(), mu.transpose().eval(), sigma.transpose().eval());
 }
 
 #endif
diff --git a/test/unit/math/opencl/rev/uniform_lpdf_test.cpp b/test/unit/math/opencl/rev/uniform_lpdf_test.cpp
index 0990b60bc09..8f58bf84d32 100644
--- a/test/unit/math/opencl/rev/uniform_lpdf_test.cpp
+++ b/test/unit/math/opencl/rev/uniform_lpdf_test.cpp
@@ -84,6 +84,12 @@ TEST(ProbDistributionsUniform, opencl_matches_cpu_small) {
                                                 beta);
   stan::math::test::compare_cpu_opencl_prim_rev(uniform_lpdf_functor_propto, y,
                                                 alpha, beta);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      uniform_lpdf_functor, y.transpose().eval(), alpha.transpose().eval(),
+      beta.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      uniform_lpdf_functor_propto, y.transpose().eval(),
+      alpha.transpose().eval(), beta.transpose().eval());
 }
 
 TEST(ProbDistributionsUniform, opencl_matches_cpu_small_y_out_of_bounds) {
@@ -116,6 +122,10 @@ TEST(ProbDistributionsUniform, opencl_broadcast_y) {
                                                          y_scal, alpha, beta);
   stan::math::test::test_opencl_broadcasting_prim_rev<0>(
       uniform_lpdf_functor_propto, y_scal, alpha, beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      uniform_lpdf_functor, y_scal, alpha.transpose().eval(), beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      uniform_lpdf_functor_propto, y_scal, alpha, beta.transpose().eval());
 }
 
 TEST(ProbDistributionsUniform, opencl_broadcast_alpha) {
@@ -131,6 +141,10 @@ TEST(ProbDistributionsUniform, opencl_broadcast_alpha) {
                                                          y, alpha_scal, beta);
   stan::math::test::test_opencl_broadcasting_prim_rev<1>(
       uniform_lpdf_functor_propto, y, alpha_scal, beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      uniform_lpdf_functor, y.transpose().eval(), alpha_scal, beta);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      uniform_lpdf_functor_propto, y, alpha_scal, beta.transpose().eval());
 }
 
 TEST(ProbDistributionsUniform, opencl_broadcast_beta) {
@@ -146,6 +160,10 @@ TEST(ProbDistributionsUniform, opencl_broadcast_beta) {
                                                          y, alpha, beta_scal);
   stan::math::test::test_opencl_broadcasting_prim_rev<2>(
       uniform_lpdf_functor_propto, y, alpha, beta_scal);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      uniform_lpdf_functor, y.transpose().eval(), alpha, beta_scal);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      uniform_lpdf_functor_propto, y, alpha.transpose().eval(), beta_scal);
 }
 
 TEST(ProbDistributionsUniform, opencl_matches_cpu_big) {
@@ -165,6 +183,12 @@ TEST(ProbDistributionsUniform, opencl_matches_cpu_big) {
                                                 beta);
   stan::math::test::compare_cpu_opencl_prim_rev(uniform_lpdf_functor_propto, y,
                                                 alpha, beta);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      uniform_lpdf_functor, y.transpose().eval(), alpha.transpose().eval(),
+      beta.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      uniform_lpdf_functor_propto, y.transpose().eval(),
+      alpha.transpose().eval(), beta.transpose().eval());
 }
 
 #endif
diff --git a/test/unit/math/opencl/rev/weibull_lpdf_test.cpp b/test/unit/math/opencl/rev/weibull_lpdf_test.cpp
index 28942846af8..c6c4e13ab69 100644
--- a/test/unit/math/opencl/rev/weibull_lpdf_test.cpp
+++ b/test/unit/math/opencl/rev/weibull_lpdf_test.cpp
@@ -90,6 +90,12 @@ TEST(ProbDistributionsWeibull, opencl_matches_cpu_small) {
                                                 sigma);
   stan::math::test::compare_cpu_opencl_prim_rev(weibull_lpdf_functor_propto, y,
                                                 alpha, sigma);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      weibull_lpdf_functor, y.transpose().eval(), alpha.transpose().eval(),
+      sigma.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      weibull_lpdf_functor_propto, y.transpose().eval(),
+      alpha.transpose().eval(), sigma.transpose().eval());
 }
 
 TEST(ProbDistributionsWeibull, opencl_broadcast_y) {
@@ -105,6 +111,10 @@ TEST(ProbDistributionsWeibull, opencl_broadcast_y) {
                                                          y_scal, alpha, sigma);
   stan::math::test::test_opencl_broadcasting_prim_rev<0>(
       weibull_lpdf_functor_propto, y_scal, alpha, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      weibull_lpdf_functor, y_scal, alpha.transpose().eval(), sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<0>(
+      weibull_lpdf_functor_propto, y_scal, alpha, sigma.transpose().eval());
 }
 
 TEST(ProbDistributionsWeibull, opencl_broadcast_alpha) {
@@ -120,6 +130,10 @@ TEST(ProbDistributionsWeibull, opencl_broadcast_alpha) {
                                                          y, alpha_scal, sigma);
   stan::math::test::test_opencl_broadcasting_prim_rev<1>(
       weibull_lpdf_functor_propto, y, alpha_scal, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      weibull_lpdf_functor, y.transpose().eval(), alpha_scal, sigma);
+  stan::math::test::test_opencl_broadcasting_prim_rev<1>(
+      weibull_lpdf_functor_propto, y, alpha_scal, sigma.transpose().eval());
 }
 
 TEST(ProbDistributionsWeibull, opencl_broadcast_sigma) {
@@ -135,6 +149,10 @@ TEST(ProbDistributionsWeibull, opencl_broadcast_sigma) {
                                                          y, alpha, sigma_scal);
   stan::math::test::test_opencl_broadcasting_prim_rev<2>(
       weibull_lpdf_functor_propto, y, alpha, sigma_scal);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      weibull_lpdf_functor, y.transpose().eval(), alpha, sigma_scal);
+  stan::math::test::test_opencl_broadcasting_prim_rev<2>(
+      weibull_lpdf_functor_propto, y, alpha.transpose().eval(), sigma_scal);
 }
 
 TEST(ProbDistributionsWeibull, opencl_matches_cpu_big) {
@@ -151,6 +169,12 @@ TEST(ProbDistributionsWeibull, opencl_matches_cpu_big) {
                                                 sigma);
   stan::math::test::compare_cpu_opencl_prim_rev(weibull_lpdf_functor_propto, y,
                                                 alpha, sigma);
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      weibull_lpdf_functor, y.transpose().eval(), alpha.transpose().eval(),
+      sigma.transpose().eval());
+  stan::math::test::compare_cpu_opencl_prim_rev(
+      weibull_lpdf_functor_propto, y.transpose().eval(),
+      alpha.transpose().eval(), sigma.transpose().eval());
 }
 
 #endif
diff --git a/test/unit/math/opencl/util.hpp b/test/unit/math/opencl/util.hpp
index 358f8209496..e474548609d 100644
--- a/test/unit/math/opencl/util.hpp
+++ b/test/unit/math/opencl/util.hpp
@@ -199,15 +199,26 @@ void test_opencl_broadcasting_prim_rev_impl(const Functor& functor,
                                                 const auto& args_vector) {
         auto res_scalar
             = eval(functor(opencl_argument(std::get<Is>(args_broadcast))...));
-        auto res_vec = eval(functor(opencl_argument(
-            to_vector_if<Is == I>(std::get<Is>(args_vector), N))...));
         std::string signature = type_name<decltype(args_broadcast)>().data();
-        expect_eq(res_vec, res_scalar,
-                  ("return values of broadcast and vector arguments do not "
-                   "match for signature "
-                   + signature + "!")
-                      .c_str());
-        var(recursive_sum(res_scalar) + recursive_sum(res_vec)).grad();
+
+        try {
+          auto res_vec = eval(functor(opencl_argument(
+              to_vector_if<Is == I>(std::get<Is>(args_vector), N))...));
+          expect_eq(res_vec, res_scalar,
+                    ("return values of broadcast and vector arguments do not "
+                     "match for signature "
+                     + signature + "!")
+                        .c_str());
+          try {
+            var(recursive_sum(res_scalar) + recursive_sum(res_vec)).grad();
+          } catch (...) {
+            std::cerr << "throw in rev pass!" << std::endl;
+            throw;
+          }
+        } catch (...) {
+          std::cerr << "throw in signature: " << signature << "!" << std::endl;
+          throw;
+        }
 
         static_cast<void>(std::initializer_list<int>{
             (expect_adj_near(
diff --git a/test/unit/math/rev/fun/cholesky_decompose_test.cpp b/test/unit/math/rev/fun/cholesky_decompose_test.cpp
index 714c4bb8a98..7885b6bc69f 100644
--- a/test/unit/math/rev/fun/cholesky_decompose_test.cpp
+++ b/test/unit/math/rev/fun/cholesky_decompose_test.cpp
@@ -449,6 +449,34 @@ TEST(AgradRevMatrix, mat_cholesky_1st_deriv_large_gradients) {
   test_simple_vec_mult(45, 1e-08);
 }
 
+TEST(AgradRevMatrix, cholesky_replicated_input) {
+  using stan::math::var;
+
+  auto f = [](int size, const auto& y) {
+    auto m = stan::math::diag_matrix(stan::math::rep_vector(y, size));
+    auto L = stan::math::cholesky_decompose(m);
+    return stan::math::sum(L);
+  };
+
+  double ydbl = 1.5;
+  double dx = 1e-5;
+  var y = ydbl;
+  int size = 4;
+  var s = f(size, y);
+  s.grad();
+
+  double fd_ref = (f(size, ydbl + dx) - f(size, ydbl - dx)) / (2.0 * dx);
+  EXPECT_FLOAT_EQ(y.adj(), fd_ref);
+
+  stan::math::set_zero_all_adjoints();
+  size = 40;
+  s = f(size, y);
+  s.grad();
+
+  fd_ref = (f(size, ydbl + dx) - f(size, ydbl - dx)) / (2.0 * dx);
+  EXPECT_FLOAT_EQ(y.adj(), fd_ref);
+}
+
 #ifdef STAN_OPENCL
 TEST(AgradRevMatrix, mat_cholesky_1st_deriv_large_gradients_opencl) {
   stan::math::opencl_context.tuning_opts().cholesky_size_worth_transfer = 25;