diff --git a/benchmarks/linear_programming/utils/get_datasets.py b/benchmarks/linear_programming/utils/get_datasets.py
index 01ed86dc9..39e79ff32 100644
--- a/benchmarks/linear_programming/utils/get_datasets.py
+++ b/benchmarks/linear_programming/utils/get_datasets.py
@@ -693,7 +693,7 @@ def extract(file, dir, type):
         raise Exception(f"Unknown file extension found for extraction {file}")
     # download emps and compile
     # Disable emps for now
-    if type == "netlib":
+    if type == "netlib" and False:
         url = MittelmannInstances["emps"]
         file = os.path.join(dir, "emps.c")
         download(url, file)
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index ab18b6fab..5d4868c82 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -208,6 +208,27 @@ create_logger_macros(CUOPT "cuopt::default_logger()" include/cuopt)
 
 find_package(CUDSS REQUIRED)
 
+# Find Protocol Buffers for remote solve support
+find_package(Protobuf REQUIRED)
+include_directories(${Protobuf_INCLUDE_DIRS})
+include_directories(${CMAKE_CURRENT_BINARY_DIR})
+
+# Generate C++ code from .proto file
+set(PROTO_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/linear_programming/utilities/cuopt_remote.proto")
+set(PROTO_SRCS "${CMAKE_CURRENT_BINARY_DIR}/cuopt_remote.pb.cc")
+set(PROTO_HDRS "${CMAKE_CURRENT_BINARY_DIR}/cuopt_remote.pb.h")
+
+add_custom_command(
+  OUTPUT "${PROTO_SRCS}" "${PROTO_HDRS}"
+  COMMAND ${Protobuf_PROTOC_EXECUTABLE}
+  ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR}
+       --proto_path ${CMAKE_CURRENT_SOURCE_DIR}/src/linear_programming/utilities
+       ${PROTO_FILE}
+  DEPENDS ${PROTO_FILE}
+  COMMENT "Generating C++ code from cuopt_remote.proto"
+  VERBATIM
+)
+
 if(BUILD_TESTS)
   include(cmake/thirdparty/get_gtest.cmake)
 endif()
@@ -219,6 +240,7 @@ if (HOST_LINEINFO)
 endif()
 add_library(cuopt SHARED
   ${CUOPT_SRC_FILES}
+  ${PROTO_SRCS}
 )
 
 set_target_properties(cuopt
@@ -317,6 +339,7 @@ target_link_libraries(cuopt
   raft::raft
   cuopt::mps_parser
   ${CUDSS_LIB_FILE}
+  protobuf::libprotobuf
   PRIVATE
   ${CUOPT_PRIVATE_CUDA_LIBS}
   )
@@ -449,6 +472,95 @@ install(TARGETS cuopt_cli
   COMPONENT runtime
   RUNTIME DESTINATION ${_BIN_DEST}
 )
+
+# Remote solve server executable (synchronous)
+add_executable(cuopt_remote_server cuopt_remote_server.cpp)
+target_compile_options(cuopt_remote_server
+  PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${CUOPT_CXX_FLAGS}>"
+)
+
+target_include_directories(cuopt_remote_server
+  PRIVATE
+  "${CMAKE_CURRENT_SOURCE_DIR}/src"
+  PUBLIC
+  "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
+  "$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/include>"
+  ${CUDSS_INCLUDE}
+  "$<INSTALL_INTERFACE:include>"
+)
+
+target_link_libraries(cuopt_remote_server
+  PUBLIC
+  cuopt
+  OpenMP::OpenMP_CXX
+  ${CUDSS_LIBRARIES}
+  PRIVATE
+)
+set_property(TARGET cuopt_remote_server PROPERTY INSTALL_RPATH "$ORIGIN/../${lib_dir}")
+
+# Install the remote server
+install(TARGETS cuopt_remote_server
+  COMPONENT runtime
+  RUNTIME DESTINATION ${_BIN_DEST}
+)
+
+# Async remote solve server executable
+add_executable(cuopt_async_server cuopt_async_server.cpp ${PROTO_SRCS})
+target_compile_options(cuopt_async_server
+  PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${CUOPT_CXX_FLAGS}>"
+)
+
+target_include_directories(cuopt_async_server
+  PRIVATE
+  "${CMAKE_CURRENT_SOURCE_DIR}/src"
+  PUBLIC
+  "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
+  "$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/include>"
+  ${CUDSS_INCLUDE}
+  "$<INSTALL_INTERFACE:include>"
+)
+
+target_link_libraries(cuopt_async_server
+  PUBLIC
+  OpenMP::OpenMP_CXX
+  protobuf::libprotobuf
+  rt
+  PRIVATE
+)
+set_property(TARGET cuopt_async_server PROPERTY INSTALL_RPATH "$ORIGIN/../${lib_dir}")
+
+# Solver worker process
+add_executable(cuopt_solver_worker cuopt_solver_worker.cpp ${PROTO_SRCS})
+target_compile_options(cuopt_solver_worker
+  PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${CUOPT_CXX_FLAGS}>"
+)
+
+target_include_directories(cuopt_solver_worker
+  PRIVATE
+  "${CMAKE_CURRENT_SOURCE_DIR}/src"
+  PUBLIC
+  "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
+  "$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/include>"
+  ${CUDSS_INCLUDE}
+  "$<INSTALL_INTERFACE:include>"
+)
+
+target_link_libraries(cuopt_solver_worker
+  PUBLIC
+  cuopt
+  OpenMP::OpenMP_CXX
+  ${CUDSS_LIBRARIES}
+  protobuf::libprotobuf
+  rt
+  PRIVATE
+)
+set_property(TARGET cuopt_solver_worker PROPERTY INSTALL_RPATH "$ORIGIN/../${lib_dir}")
+
+# Install async server and worker
+install(TARGETS cuopt_async_server cuopt_solver_worker
+  COMPONENT runtime
+  RUNTIME DESTINATION ${_BIN_DEST}
+)
 endif()
 
 
diff --git a/cpp/cuopt_async_server.cpp b/cpp/cuopt_async_server.cpp
new file mode 100644
index 000000000..889fc9f67
--- /dev/null
+++ b/cpp/cuopt_async_server.cpp
@@ -0,0 +1,774 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights
+ * reserved. SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file cuopt_async_server.cpp
+ * @brief Async remote solve server with job queue and worker processes
+ *
+ * This server:
+ * - Accepts async requests (submit, check status, get result, delete)
+ * - Uses shared memory queues for job distribution
+ * - Spawns solver worker processes
+ * - Tracks job status and stores results
+ * - Threaded result retrieval
+ */
+
+#include <cuopt_remote.pb.h>
+
+#include <arpa/inet.h>
+#include <fcntl.h>
+#include <netinet/in.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <chrono>
+#include <condition_variable>
+#include <csignal>
+#include <cstring>
+#include <iostream>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <random>
+#include <thread>
+
+// Shared memory structures (must match worker)
+struct JobQueueEntry {
+  char job_id[64];
+  uint32_t problem_type;
+  uint32_t data_size;
+  uint8_t data[1024 * 1024];  // 1MB
+  bool ready;
+  bool processed;
+};
+
+struct ResultQueueEntry {
+  char job_id[64];
+  uint32_t status;
+  uint32_t data_size;
+  uint8_t data[2 * 1024 * 1024];  // 2MB
+  bool ready;
+  bool retrieved;
+};
+
+const size_t MAX_JOBS    = 100;
+const size_t MAX_RESULTS = 100;
+
+// Job tracking
+struct JobInfo {
+  std::string job_id;
+  cuopt::remote::JobStatus status;
+  std::chrono::steady_clock::time_point submit_time;
+  std::vector<uint8_t> result_data;  // Stored result
+  uint32_t result_status;            // 0 = success, 1 = error
+  bool is_blocking;                  // True if a thread is waiting synchronously
+};
+
+// Per-job condition variable for synchronous waiting
+struct JobWaiter {
+  std::mutex mutex;
+  std::condition_variable cv;
+  std::vector<uint8_t> result_data;
+  uint32_t result_status;
+  bool ready;
+
+  JobWaiter() : ready(false), result_status(0) {}
+};
+
+// Global state
+volatile sig_atomic_t keep_running = 1;
+std::map<std::string, JobInfo> job_tracker;
+std::mutex tracker_mutex;
+
+std::map<std::string, std::shared_ptr<JobWaiter>> waiting_threads;
+std::mutex waiters_mutex;
+
+JobQueueEntry* job_queue       = nullptr;
+ResultQueueEntry* result_queue = nullptr;
+pid_t worker_pid               = -1;
+
+void signal_handler(int signal)
+{
+  if (signal == SIGINT || signal == SIGTERM) {
+    std::cout << "\n[Server] Received shutdown signal\n";
+    keep_running = 0;
+  }
+}
+
+// Generate unique job ID
+std::string generate_job_id()
+{
+  static std::random_device rd;
+  static std::mt19937 gen(rd());
+  static std::uniform_int_distribution<uint64_t> dis;
+
+  uint64_t id = dis(gen);
+  char buf[32];
+  snprintf(buf, sizeof(buf), "job_%016lx", id);
+  return std::string(buf);
+}
+
+// Socket helpers
+static void write_all(int sockfd, const void* data, size_t size)
+{
+  const uint8_t* ptr = static_cast<const uint8_t*>(data);
+  size_t remaining   = size;
+  while (remaining > 0) {
+    ssize_t written = ::write(sockfd, ptr, remaining);
+    if (written <= 0) throw std::runtime_error("Socket write failed");
+    ptr += written;
+    remaining -= written;
+  }
+}
+
+static void read_all(int sockfd, void* data, size_t size)
+{
+  uint8_t* ptr     = static_cast<uint8_t*>(data);
+  size_t remaining = size;
+  while (remaining > 0) {
+    ssize_t nread = ::read(sockfd, ptr, remaining);
+    if (nread <= 0) throw std::runtime_error("Socket read failed");
+    ptr += nread;
+    remaining -= nread;
+  }
+}
+
+// Result retrieval thread
+void result_retrieval_thread()
+{
+  std::cout << "[Server] Result retrieval thread started\n";
+
+  while (keep_running) {
+    bool found = false;
+
+    // Scan result queue
+    for (size_t i = 0; i < MAX_RESULTS; ++i) {
+      if (result_queue[i].ready && !result_queue[i].retrieved) {
+        found = true;
+        std::string job_id(result_queue[i].job_id);
+
+        std::cout << "[Server] Result retrieved for job: " << job_id << "\n";
+
+        // Check if this is a blocking request (thread waiting)
+        bool is_blocking = false;
+        {
+          std::lock_guard<std::mutex> lock(tracker_mutex);
+          auto it = job_tracker.find(job_id);
+          if (it != job_tracker.end()) { is_blocking = it->second.is_blocking; }
+        }
+
+        if (is_blocking) {
+          // Synchronous mode - notify specific waiting thread
+          std::lock_guard<std::mutex> lock(waiters_mutex);
+          auto waiter_it = waiting_threads.find(job_id);
+
+          if (waiter_it != waiting_threads.end()) {
+            auto waiter = waiter_it->second;
+
+            // Store result and signal THIS specific waiter
+            {
+              std::lock_guard<std::mutex> result_lock(waiter->mutex);
+              waiter->result_data.assign(result_queue[i].data,
+                                         result_queue[i].data + result_queue[i].data_size);
+              waiter->result_status = result_queue[i].status;
+              waiter->ready         = true;
+              waiter->cv.notify_one();  // Wake ONLY this thread
+            }
+
+            std::cout << "[Server] Notified blocking thread for job: " << job_id << "\n";
+          }
+        } else {
+          // Asynchronous mode - store in job tracker
+          std::lock_guard<std::mutex> lock(tracker_mutex);
+          auto it = job_tracker.find(job_id);
+          if (it != job_tracker.end()) {
+            it->second.status        = cuopt::remote::COMPLETED;
+            it->second.result_status = result_queue[i].status;
+            it->second.result_data.assign(result_queue[i].data,
+                                          result_queue[i].data + result_queue[i].data_size);
+          }
+        }
+
+        result_queue[i].retrieved = true;
+        result_queue[i].ready     = false;  // Free slot
+      }
+    }
+
+    if (!found) {
+      usleep(50000);  // Sleep 50ms
+    }
+  }
+
+  std::cout << "[Server] Result retrieval thread stopped\n";
+}
+
+// Handle job submission (async mode)
+cuopt::remote::AsyncResponse handle_submit(const cuopt::remote::AsyncRequest& request)
+{
+  cuopt::remote::AsyncResponse response;
+  response.set_request_type(cuopt::remote::SUBMIT_JOB);
+
+  auto* submit_resp = response.mutable_submit_response();
+
+  try {
+    std::string job_id = generate_job_id();
+
+    // Serialize the job data
+    std::string job_data;
+    if (request.has_lp_request()) {
+      job_data = request.lp_request().SerializeAsString();
+    } else if (request.has_mip_request()) {
+      job_data = request.mip_request().SerializeAsString();
+    } else {
+      submit_resp->set_status(cuopt::remote::ERROR_INVALID_REQUEST);
+      submit_resp->set_message("No job data provided");
+      return response;
+    }
+
+    if (job_data.size() > sizeof(job_queue[0].data)) {
+      submit_resp->set_status(cuopt::remote::ERROR_INVALID_REQUEST);
+      submit_resp->set_message("Problem data too large");
+      return response;
+    }
+
+    // Find free job slot
+    bool queued = false;
+    for (size_t i = 0; i < MAX_JOBS; ++i) {
+      if (!job_queue[i].ready) {
+        strncpy(job_queue[i].job_id, job_id.c_str(), sizeof(job_queue[i].job_id) - 1);
+        job_queue[i].problem_type = request.has_lp_request() ? 0 : 1;
+        job_queue[i].data_size    = job_data.size();
+        std::memcpy(job_queue[i].data, job_data.data(), job_data.size());
+        job_queue[i].processed = false;
+        job_queue[i].ready     = true;
+        queued                 = true;
+        break;
+      }
+    }
+
+    if (!queued) {
+      submit_resp->set_status(cuopt::remote::ERROR_INTERNAL);
+      submit_resp->set_message("Job queue full");
+      return response;
+    }
+
+    // Track job (async mode)
+    {
+      std::lock_guard<std::mutex> lock(tracker_mutex);
+      JobInfo info;
+      info.job_id         = job_id;
+      info.status         = cuopt::remote::QUEUED;
+      info.submit_time    = std::chrono::steady_clock::now();
+      info.is_blocking    = false;  // Async mode
+      job_tracker[job_id] = info;
+    }
+
+    submit_resp->set_status(cuopt::remote::SUCCESS);
+    submit_resp->set_job_id(job_id);
+    submit_resp->set_message("Job queued successfully");
+
+    std::cout << "[Server] Job submitted (async): " << job_id << "\n";
+
+  } catch (const std::exception& e) {
+    submit_resp->set_status(cuopt::remote::ERROR_INTERNAL);
+    submit_resp->set_message(std::string("Error: ") + e.what());
+  }
+
+  return response;
+}
+
+// Handle synchronous (blocking) solve request
+cuopt::remote::AsyncResponse handle_sync_solve(const cuopt::remote::AsyncRequest& request)
+{
+  cuopt::remote::AsyncResponse response;
+  response.set_request_type(cuopt::remote::SUBMIT_JOB);  // Reuse submit type
+
+  try {
+    std::string job_id = generate_job_id();
+
+    std::cout << "[Server] Sync solve request, job_id: " << job_id << "\n";
+
+    // Serialize the job data
+    std::string job_data;
+    bool is_lp = false;
+    if (request.has_lp_request()) {
+      job_data = request.lp_request().SerializeAsString();
+      is_lp    = true;
+    } else if (request.has_mip_request()) {
+      job_data = request.mip_request().SerializeAsString();
+      is_lp    = false;
+    } else {
+      auto* error_resp = response.mutable_result_response();
+      error_resp->set_status(cuopt::remote::ERROR_INVALID_REQUEST);
+      error_resp->set_error_message("No job data provided");
+      return response;
+    }
+
+    if (job_data.size() > sizeof(job_queue[0].data)) {
+      auto* error_resp = response.mutable_result_response();
+      error_resp->set_status(cuopt::remote::ERROR_INVALID_REQUEST);
+      error_resp->set_error_message("Problem data too large");
+      return response;
+    }
+
+    // Create waiter BEFORE submitting job
+    auto waiter = std::make_shared<JobWaiter>();
+    {
+      std::lock_guard<std::mutex> lock(waiters_mutex);
+      waiting_threads[job_id] = waiter;
+    }
+
+    // Submit to job queue
+    bool queued = false;
+    for (size_t i = 0; i < MAX_JOBS; ++i) {
+      if (!job_queue[i].ready) {
+        strncpy(job_queue[i].job_id, job_id.c_str(), sizeof(job_queue[i].job_id) - 1);
+        job_queue[i].problem_type = is_lp ? 0 : 1;
+        job_queue[i].data_size    = job_data.size();
+        std::memcpy(job_queue[i].data, job_data.data(), job_data.size());
+        job_queue[i].processed = false;
+        job_queue[i].ready     = true;
+        queued                 = true;
+        break;
+      }
+    }
+
+    if (!queued) {
+      std::lock_guard<std::mutex> lock(waiters_mutex);
+      waiting_threads.erase(job_id);
+
+      auto* error_resp = response.mutable_result_response();
+      error_resp->set_status(cuopt::remote::ERROR_INTERNAL);
+      error_resp->set_error_message("Job queue full");
+      return response;
+    }
+
+    // Track job (blocking mode)
+    {
+      std::lock_guard<std::mutex> lock(tracker_mutex);
+      JobInfo info;
+      info.job_id         = job_id;
+      info.status         = cuopt::remote::QUEUED;
+      info.submit_time    = std::chrono::steady_clock::now();
+      info.is_blocking    = true;  // Blocking mode!
+      job_tracker[job_id] = info;
+    }
+
+    std::cout << "[Server] Job queued (blocking), waiting for result...\n";
+
+    // WAIT for result using per-job condition variable
+    {
+      std::unique_lock<std::mutex> lock(waiter->mutex);
+      waiter->cv.wait(lock, [&waiter] { return waiter->ready; });
+    }
+
+    std::cout << "[Server] Job completed, returning result\n";
+
+    // Result is ready, return it
+    auto* result_resp = response.mutable_result_response();
+
+    if (waiter->result_status == 0) {
+      // Parse and return solution
+      cuopt::remote::ResultResponse stored_result;
+      if (stored_result.ParseFromArray(waiter->result_data.data(), waiter->result_data.size())) {
+        result_resp->set_status(stored_result.status());
+        if (stored_result.has_lp_solution()) {
+          result_resp->mutable_lp_solution()->CopyFrom(stored_result.lp_solution());
+        }
+        if (stored_result.has_mip_solution()) {
+          result_resp->mutable_mip_solution()->CopyFrom(stored_result.mip_solution());
+        }
+      } else {
+        result_resp->set_status(cuopt::remote::ERROR_INTERNAL);
+        result_resp->set_error_message("Failed to parse result");
+      }
+    } else {
+      result_resp->set_status(cuopt::remote::ERROR_SOLVE_FAILED);
+      result_resp->set_error_message("Solve failed");
+    }
+
+    // Cleanup waiter
+    {
+      std::lock_guard<std::mutex> lock(waiters_mutex);
+      waiting_threads.erase(job_id);
+    }
+
+    // Cleanup job tracker
+    {
+      std::lock_guard<std::mutex> lock(tracker_mutex);
+      job_tracker.erase(job_id);
+    }
+
+  } catch (const std::exception& e) {
+    auto* error_resp = response.mutable_result_response();
+    error_resp->set_status(cuopt::remote::ERROR_INTERNAL);
+    error_resp->set_error_message(std::string("Error: ") + e.what());
+  }
+
+  return response;
+}
+
+// Handle status check
+cuopt::remote::AsyncResponse handle_status(const cuopt::remote::AsyncRequest& request)
+{
+  cuopt::remote::AsyncResponse response;
+  response.set_request_type(cuopt::remote::CHECK_STATUS);
+
+  auto* status_resp = response.mutable_status_response();
+
+  std::lock_guard<std::mutex> lock(tracker_mutex);
+  auto it = job_tracker.find(request.job_id());
+
+  if (it == job_tracker.end()) {
+    status_resp->set_job_status(cuopt::remote::NOT_FOUND);
+    status_resp->set_message("Job ID not found");
+  } else {
+    status_resp->set_job_status(it->second.status);
+
+    switch (it->second.status) {
+      case cuopt::remote::QUEUED: status_resp->set_message("Job is queued"); break;
+      case cuopt::remote::PROCESSING: status_resp->set_message("Job is being processed"); break;
+      case cuopt::remote::COMPLETED: status_resp->set_message("Job completed"); break;
+      case cuopt::remote::FAILED: status_resp->set_message("Job failed"); break;
+      default: status_resp->set_message("Unknown status");
+    }
+  }
+
+  return response;
+}
+
+// Handle result retrieval
+cuopt::remote::AsyncResponse handle_get_result(const cuopt::remote::AsyncRequest& request)
+{
+  cuopt::remote::AsyncResponse response;
+  response.set_request_type(cuopt::remote::GET_RESULT);
+
+  auto* result_resp = response.mutable_result_response();
+
+  std::lock_guard<std::mutex> lock(tracker_mutex);
+  auto it = job_tracker.find(request.job_id());
+
+  if (it == job_tracker.end()) {
+    result_resp->set_status(cuopt::remote::ERROR_NOT_FOUND);
+    result_resp->set_error_message("Job ID not found");
+    return response;
+  }
+
+  if (it->second.status != cuopt::remote::COMPLETED) {
+    result_resp->set_status(cuopt::remote::ERROR_INVALID_REQUEST);
+    result_resp->set_error_message("Job not completed yet");
+    return response;
+  }
+
+  if (it->second.result_status != 0) {
+    result_resp->set_status(cuopt::remote::ERROR_SOLVE_FAILED);
+    result_resp->set_error_message("Solve failed");
+    return response;
+  }
+
+  // Parse stored result
+  cuopt::remote::ResultResponse stored_result;
+  if (!stored_result.ParseFromArray(it->second.result_data.data(), it->second.result_data.size())) {
+    result_resp->set_status(cuopt::remote::ERROR_INTERNAL);
+    result_resp->set_error_message("Failed to parse result");
+    return response;
+  }
+
+  // Copy result
+  result_resp->set_status(stored_result.status());
+  if (stored_result.has_lp_solution()) {
+    result_resp->mutable_lp_solution()->CopyFrom(stored_result.lp_solution());
+  }
+  if (stored_result.has_mip_solution()) {
+    result_resp->mutable_mip_solution()->CopyFrom(stored_result.mip_solution());
+  }
+
+  std::cout << "[Server] Result retrieved for job: " << request.job_id() << "\n";
+
+  return response;
+}
+
+// Handle delete
+cuopt::remote::AsyncResponse handle_delete(const cuopt::remote::AsyncRequest& request)
+{
+  cuopt::remote::AsyncResponse response;
+  response.set_request_type(cuopt::remote::DELETE_RESULT);
+
+  auto* delete_resp = response.mutable_delete_response();
+
+  std::lock_guard<std::mutex> lock(tracker_mutex);
+  auto it = job_tracker.find(request.job_id());
+
+  if (it == job_tracker.end()) {
+    delete_resp->set_status(cuopt::remote::ERROR_NOT_FOUND);
+    delete_resp->set_message("Job ID not found");
+  } else {
+    job_tracker.erase(it);
+    delete_resp->set_status(cuopt::remote::SUCCESS);
+    delete_resp->set_message("Job deleted");
+
+    std::cout << "[Server] Job deleted: " << request.job_id() << "\n";
+  }
+
+  return response;
+}
+
+// Handle client connection
+void handle_client(int client_socket)
+{
+  try {
+    // Read request size
+    uint32_t request_size;
+    read_all(client_socket, &request_size, sizeof(request_size));
+
+    // Read request data
+    std::vector<uint8_t> request_data(request_size);
+    read_all(client_socket, request_data.data(), request_size);
+
+    // Parse request
+    cuopt::remote::AsyncRequest request;
+    if (!request.ParseFromArray(request_data.data(), request_size)) {
+      throw std::runtime_error("Failed to parse request");
+    }
+
+    // Handle based on request type and blocking flag
+    cuopt::remote::AsyncResponse response;
+
+    // Check for synchronous (blocking) mode
+    if (request.request_type() == cuopt::remote::SUBMIT_JOB && request.blocking()) {
+      // SYNCHRONOUS MODE - handler will block until result ready
+      std::cout << "[Server] Handling blocking request\n";
+      response = handle_sync_solve(request);
+    } else {
+      // ASYNCHRONOUS MODE - normal async workflow
+      switch (request.request_type()) {
+        case cuopt::remote::SUBMIT_JOB: response = handle_submit(request); break;
+        case cuopt::remote::CHECK_STATUS: response = handle_status(request); break;
+        case cuopt::remote::GET_RESULT: response = handle_get_result(request); break;
+        case cuopt::remote::DELETE_RESULT: response = handle_delete(request); break;
+        default: throw std::runtime_error("Unknown request type");
+      }
+    }
+
+    // Send response (socket kept open during wait for blocking requests)
+    std::string response_data = response.SerializeAsString();
+    uint32_t response_size    = static_cast<uint32_t>(response_data.size());
+
+    write_all(client_socket, &response_size, sizeof(response_size));
+    write_all(client_socket, response_data.data(), response_data.size());
+
+  } catch (const std::exception& e) {
+    std::cerr << "[Server] Error handling client: " << e.what() << "\n";
+  }
+
+  close(client_socket);
+}
+
+int main(int argc, char* argv[])
+{
+  GOOGLE_PROTOBUF_VERIFY_VERSION;
+
+  int port = 9999;
+  if (argc > 1) {
+    port = std::atoi(argv[1]);
+    if (port <= 0 || port > 65535) {
+      std::cerr << "Error: Invalid port\n";
+      return 1;
+    }
+  }
+
+  std::cout << "==========================================================\n";
+  std::cout << "cuOpt Async Remote Solve Server\n";
+  std::cout << "==========================================================\n";
+  std::cout << "Port: " << port << "\n";
+  std::cout << "Press Ctrl+C to stop\n";
+  std::cout << "==========================================================\n\n";
+
+  signal(SIGINT, signal_handler);
+  signal(SIGTERM, signal_handler);
+
+  // Create shared memory for job queue
+  shm_unlink("/cuopt_job_queue");
+  int job_shm_fd = shm_open("/cuopt_job_queue", O_CREAT | O_RDWR, 0666);
+  if (job_shm_fd == -1) {
+    std::cerr << "Error: Failed to create job queue\n";
+    return 1;
+  }
+  ftruncate(job_shm_fd, sizeof(JobQueueEntry) * MAX_JOBS);
+
+  job_queue = static_cast<JobQueueEntry*>(mmap(
+    nullptr, sizeof(JobQueueEntry) * MAX_JOBS, PROT_READ | PROT_WRITE, MAP_SHARED, job_shm_fd, 0));
+
+  if (job_queue == MAP_FAILED) {
+    std::cerr << "Error: Failed to map job queue\n";
+    return 1;
+  }
+
+  // Initialize job queue
+  std::memset(job_queue, 0, sizeof(JobQueueEntry) * MAX_JOBS);
+
+  // Create shared memory for result queue
+  shm_unlink("/cuopt_result_queue");
+  int result_shm_fd = shm_open("/cuopt_result_queue", O_CREAT | O_RDWR, 0666);
+  if (result_shm_fd == -1) {
+    std::cerr << "Error: Failed to create result queue\n";
+    return 1;
+  }
+  ftruncate(result_shm_fd, sizeof(ResultQueueEntry) * MAX_RESULTS);
+
+  result_queue = static_cast<ResultQueueEntry*>(mmap(nullptr,
+                                                     sizeof(ResultQueueEntry) * MAX_RESULTS,
+                                                     PROT_READ | PROT_WRITE,
+                                                     MAP_SHARED,
+                                                     result_shm_fd,
+                                                     0));
+
+  if (result_queue == MAP_FAILED) {
+    std::cerr << "Error: Failed to map result queue\n";
+    return 1;
+  }
+
+  // Initialize result queue
+  std::memset(result_queue, 0, sizeof(ResultQueueEntry) * MAX_RESULTS);
+
+  std::cout << "[Server] Shared memory queues created\n";
+
+  // Spawn solver worker
+  worker_pid = fork();
+  if (worker_pid == 0) {
+    // Child process - become worker
+
+    // First, try to find worker in same directory as this executable
+    char self_path[1024];
+    ssize_t len = readlink("/proc/self/exe", self_path, sizeof(self_path) - 1);
+    if (len != -1) {
+      self_path[len] = '\0';
+      // Find last '/' to get directory
+      char* last_slash = strrchr(self_path, '/');
+      if (last_slash) {
+        *(last_slash + 1) = '\0';  // Truncate to directory
+        strcat(self_path, "cuopt_solver_worker");
+        execl(self_path, "cuopt_solver_worker", nullptr);
+      }
+    }
+
+    // Try PATH search
+    execlp("cuopt_solver_worker", "cuopt_solver_worker", nullptr);
+
+    // Try other common locations
+    const char* worker_paths[] = {"/usr/local/bin/cuopt_solver_worker",
+                                  "/usr/bin/cuopt_solver_worker",
+                                  "./cuopt_solver_worker",
+                                  nullptr};
+
+    for (int i = 0; worker_paths[i] != nullptr; ++i) {
+      execl(worker_paths[i], "cuopt_solver_worker", nullptr);
+    }
+
+    std::cerr << "Error: Failed to exec worker\n";
+    std::cerr << "Searched: /proc/self/exe directory, PATH, and standard locations\n";
+    exit(1);
+  } else if (worker_pid < 0) {
+    std::cerr << "Error: Failed to fork worker\n";
+    return 1;
+  }
+
+  std::cout << "[Server] Solver worker started (PID: " << worker_pid << ")\n";
+
+  // Start result retrieval thread
+  std::thread result_thread(result_retrieval_thread);
+
+  // Create socket
+  int server_socket = socket(AF_INET, SOCK_STREAM, 0);
+  if (server_socket < 0) {
+    std::cerr << "Error: Failed to create socket\n";
+    return 1;
+  }
+
+  int opt = 1;
+  setsockopt(server_socket, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
+
+  struct sockaddr_in server_addr;
+  std::memset(&server_addr, 0, sizeof(server_addr));
+  server_addr.sin_family      = AF_INET;
+  server_addr.sin_addr.s_addr = INADDR_ANY;
+  server_addr.sin_port        = htons(port);
+
+  if (bind(server_socket, (struct sockaddr*)&server_addr, sizeof(server_addr)) < 0) {
+    std::cerr << "Error: Failed to bind\n";
+    return 1;
+  }
+
+  if (listen(server_socket, 5) < 0) {
+    std::cerr << "Error: Failed to listen\n";
+    return 1;
+  }
+
+  std::cout << "[Server] Listening on port " << port << "...\n\n";
+
+  // Accept loop
+  while (keep_running) {
+    struct timeval tv;
+    tv.tv_sec  = 1;
+    tv.tv_usec = 0;
+    setsockopt(server_socket, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
+
+    struct sockaddr_in client_addr;
+    socklen_t client_len = sizeof(client_addr);
+
+    int client_socket = accept(server_socket, (struct sockaddr*)&client_addr, &client_len);
+
+    if (client_socket < 0) {
+      if (errno == EAGAIN || errno == EWOULDBLOCK) continue;
+      if (keep_running) std::cerr << "[Server] Warning: Failed to accept\n";
+      continue;
+    }
+
+    char client_ip[INET_ADDRSTRLEN];
+    inet_ntop(AF_INET, &client_addr.sin_addr, client_ip, INET_ADDRSTRLEN);
+    std::cout << "[Server] Connection from " << client_ip << "\n";
+
+    handle_client(client_socket);
+  }
+
+  // Cleanup
+  std::cout << "[Server] Shutting down...\n";
+
+  close(server_socket);
+  result_thread.join();
+
+  if (worker_pid > 0) {
+    kill(worker_pid, SIGTERM);
+    waitpid(worker_pid, nullptr, 0);
+    std::cout << "[Server] Worker process stopped\n";
+  }
+
+  munmap(job_queue, sizeof(JobQueueEntry) * MAX_JOBS);
+  munmap(result_queue, sizeof(ResultQueueEntry) * MAX_RESULTS);
+  close(job_shm_fd);
+  close(result_shm_fd);
+  shm_unlink("/cuopt_job_queue");
+  shm_unlink("/cuopt_result_queue");
+
+  google::protobuf::ShutdownProtobufLibrary();
+
+  std::cout << "[Server] Stopped\n";
+  return 0;
+}
diff --git a/cpp/cuopt_cli.cpp b/cpp/cuopt_cli.cpp
index a9aeaef77..4ce93cd00 100644
--- a/cpp/cuopt_cli.cpp
+++ b/cpp/cuopt_cli.cpp
@@ -118,7 +118,7 @@ int run_single_file(const std::string& file_path,
   }
 
   auto op_problem =
-    cuopt::linear_programming::mps_data_model_to_optimization_problem(&handle_, mps_data_model);
+    cuopt::linear_programming::mps_data_model_to_optimization_problem(mps_data_model);
 
   const bool is_mip =
     (op_problem.get_problem_category() == cuopt::linear_programming::problem_category_t::MIP ||
diff --git a/cpp/cuopt_remote_server.cpp b/cpp/cuopt_remote_server.cpp
new file mode 100644
index 000000000..0b1dfc145
--- /dev/null
+++ b/cpp/cuopt_remote_server.cpp
@@ -0,0 +1,457 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights
+ * reserved. SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file cuopt_remote_server.cpp
+ * @brief Remote solve server for cuOpt using Protocol Buffers
+ *
+ * This server listens for TCP connections and solves optimization problems
+ * sent from remote clients using Protocol Buffers serialization.
+ */
+
+#include <cuopt/linear_programming/solve.hpp>
+
+#include <cuopt_remote.pb.h>
+
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include <csignal>
+#include <cstring>
+#include <iostream>
+#include <stdexcept>
+
+// Global flag for graceful shutdown
+volatile sig_atomic_t keep_running = 1;
+
+void signal_handler(int signal)
+{
+  if (signal == SIGINT || signal == SIGTERM) {
+    std::cout << "\n[Server] Received shutdown signal, cleaning up...\n";
+    keep_running = 0;
+  }
+}
+
+// Helper to write data to socket
+static void write_all(int sockfd, const void* data, size_t size)
+{
+  const uint8_t* ptr = static_cast<const uint8_t*>(data);
+  size_t remaining   = size;
+
+  while (remaining > 0) {
+    ssize_t written = ::write(sockfd, ptr, remaining);
+    if (written <= 0) { throw std::runtime_error("Socket write failed"); }
+    ptr += written;
+    remaining -= written;
+  }
+}
+
+// Helper to read data from socket
+static void read_all(int sockfd, void* data, size_t size)
+{
+  uint8_t* ptr     = static_cast<uint8_t*>(data);
+  size_t remaining = size;
+
+  while (remaining > 0) {
+    ssize_t nread = ::read(sockfd, ptr, remaining);
+    if (nread <= 0) { throw std::runtime_error("Socket read failed"); }
+    ptr += nread;
+    remaining -= nread;
+  }
+}
+
+// Convert protobuf OptimizationProblem to optimization_problem_t
+template <typename i_t, typename f_t>
+static cuopt::linear_programming::optimization_problem_t<i_t, f_t> protobuf_to_problem(
+  const cuopt::remote::OptimizationProblem& pb_problem)
+{
+  cuopt::linear_programming::optimization_problem_t<i_t, f_t> problem;
+
+  // Set problem sense
+  problem.set_maximize(pb_problem.maximize());
+  problem.set_objective_scaling_factor(static_cast<f_t>(pb_problem.objective_scaling_factor()));
+  problem.set_objective_offset(static_cast<f_t>(pb_problem.objective_offset()));
+
+  // Convert constraint matrix
+  std::vector<f_t> matrix_values;
+  std::vector<i_t> matrix_indices;
+  std::vector<i_t> matrix_offsets;
+
+  matrix_values.reserve(pb_problem.constraint_matrix_values_size());
+  for (int i = 0; i < pb_problem.constraint_matrix_values_size(); ++i) {
+    matrix_values.push_back(static_cast<f_t>(pb_problem.constraint_matrix_values(i)));
+  }
+
+  matrix_indices.reserve(pb_problem.constraint_matrix_indices_size());
+  for (int i = 0; i < pb_problem.constraint_matrix_indices_size(); ++i) {
+    matrix_indices.push_back(static_cast<i_t>(pb_problem.constraint_matrix_indices(i)));
+  }
+
+  matrix_offsets.reserve(pb_problem.constraint_matrix_offsets_size());
+  for (int i = 0; i < pb_problem.constraint_matrix_offsets_size(); ++i) {
+    matrix_offsets.push_back(static_cast<i_t>(pb_problem.constraint_matrix_offsets(i)));
+  }
+
+  problem.set_csr_constraint_matrix(matrix_values.data(),
+                                    matrix_values.size(),
+                                    matrix_indices.data(),
+                                    matrix_indices.size(),
+                                    matrix_offsets.data(),
+                                    matrix_offsets.size());
+
+  // Convert problem vectors
+  std::vector<f_t> obj_coeffs;
+  std::vector<f_t> constraint_bounds;
+  std::vector<f_t> var_lower;
+  std::vector<f_t> var_upper;
+
+  obj_coeffs.reserve(pb_problem.objective_coefficients_size());
+  for (int i = 0; i < pb_problem.objective_coefficients_size(); ++i) {
+    obj_coeffs.push_back(static_cast<f_t>(pb_problem.objective_coefficients(i)));
+  }
+
+  constraint_bounds.reserve(pb_problem.constraint_bounds_size());
+  for (int i = 0; i < pb_problem.constraint_bounds_size(); ++i) {
+    constraint_bounds.push_back(static_cast<f_t>(pb_problem.constraint_bounds(i)));
+  }
+
+  var_lower.reserve(pb_problem.variable_lower_bounds_size());
+  for (int i = 0; i < pb_problem.variable_lower_bounds_size(); ++i) {
+    var_lower.push_back(static_cast<f_t>(pb_problem.variable_lower_bounds(i)));
+  }
+
+  var_upper.reserve(pb_problem.variable_upper_bounds_size());
+  for (int i = 0; i < pb_problem.variable_upper_bounds_size(); ++i) {
+    var_upper.push_back(static_cast<f_t>(pb_problem.variable_upper_bounds(i)));
+  }
+
+  problem.set_objective_coefficients(obj_coeffs.data(), obj_coeffs.size());
+  problem.set_constraint_bounds(constraint_bounds.data(), constraint_bounds.size());
+  problem.set_variable_lower_bounds(var_lower.data(), var_lower.size());
+  problem.set_variable_upper_bounds(var_upper.data(), var_upper.size());
+
+  // Constraint lower/upper bounds (if provided)
+  if (pb_problem.constraint_lower_bounds_size() > 0) {
+    std::vector<f_t> constraint_lower;
+    constraint_lower.reserve(pb_problem.constraint_lower_bounds_size());
+    for (int i = 0; i < pb_problem.constraint_lower_bounds_size(); ++i) {
+      constraint_lower.push_back(static_cast<f_t>(pb_problem.constraint_lower_bounds(i)));
+    }
+    problem.set_constraint_lower_bounds(constraint_lower.data(), constraint_lower.size());
+  }
+
+  if (pb_problem.constraint_upper_bounds_size() > 0) {
+    std::vector<f_t> constraint_upper;
+    constraint_upper.reserve(pb_problem.constraint_upper_bounds_size());
+    for (int i = 0; i < pb_problem.constraint_upper_bounds_size(); ++i) {
+      constraint_upper.push_back(static_cast<f_t>(pb_problem.constraint_upper_bounds(i)));
+    }
+    problem.set_constraint_upper_bounds(constraint_upper.data(), constraint_upper.size());
+  }
+
+  // Row types (if provided)
+  if (!pb_problem.row_types().empty()) {
+    const std::string& rt = pb_problem.row_types();
+    problem.set_row_types(rt.data(), rt.size());
+  }
+
+  return problem;
+}
+
+// Convert LP solution to protobuf
+template <typename i_t, typename f_t>
+static void lp_solution_to_protobuf(
+  cuopt::linear_programming::optimization_problem_solution_t<i_t, f_t>& solution,
+  cuopt::remote::LPSolution* pb_solution)
+{
+  // Solution vectors
+  for (const auto& val : solution.get_primal_solution()) {
+    pb_solution->add_primal_solution(static_cast<double>(val));
+  }
+  for (const auto& val : solution.get_dual_solution()) {
+    pb_solution->add_dual_solution(static_cast<double>(val));
+  }
+  for (const auto& val : solution.get_reduced_cost()) {
+    pb_solution->add_reduced_cost(static_cast<double>(val));
+  }
+
+  // Termination status
+  pb_solution->set_termination_status(
+    static_cast<cuopt::remote::PDLPTerminationStatus>(solution.get_termination_status()));
+
+  // Solution statistics
+  const auto& stats = solution.get_additional_termination_information();
+  pb_solution->set_l2_primal_residual(stats.l2_primal_residual);
+  pb_solution->set_l2_dual_residual(stats.l2_dual_residual);
+  pb_solution->set_primal_objective(stats.primal_objective);
+  pb_solution->set_dual_objective(stats.dual_objective);
+  pb_solution->set_gap(stats.gap);
+  pb_solution->set_nb_iterations(stats.number_of_steps_taken);
+  pb_solution->set_solve_time(stats.solve_time);
+  pb_solution->set_solved_by_pdlp(stats.solved_by_pdlp);
+
+  // Warm start data
+  const auto& ws = solution.get_pdlp_warm_start_data();
+  auto* pb_ws    = pb_solution->mutable_warm_start_data();
+
+  for (const auto& val : ws.current_primal_solution_) {
+    pb_ws->add_current_primal_solution(static_cast<double>(val));
+  }
+  for (const auto& val : ws.current_dual_solution_) {
+    pb_ws->add_current_dual_solution(static_cast<double>(val));
+  }
+  for (const auto& val : ws.initial_primal_average_) {
+    pb_ws->add_initial_primal_average(static_cast<double>(val));
+  }
+  for (const auto& val : ws.initial_dual_average_) {
+    pb_ws->add_initial_dual_average(static_cast<double>(val));
+  }
+  for (const auto& val : ws.current_ATY_) {
+    pb_ws->add_current_aty(static_cast<double>(val));
+  }
+  for (const auto& val : ws.sum_primal_solutions_) {
+    pb_ws->add_sum_primal_solutions(static_cast<double>(val));
+  }
+  for (const auto& val : ws.sum_dual_solutions_) {
+    pb_ws->add_sum_dual_solutions(static_cast<double>(val));
+  }
+  for (const auto& val : ws.last_restart_duality_gap_primal_solution_) {
+    pb_ws->add_last_restart_duality_gap_primal_solution(static_cast<double>(val));
+  }
+  for (const auto& val : ws.last_restart_duality_gap_dual_solution_) {
+    pb_ws->add_last_restart_duality_gap_dual_solution(static_cast<double>(val));
+  }
+
+  pb_ws->set_initial_primal_weight(static_cast<double>(ws.initial_primal_weight_));
+  pb_ws->set_initial_step_size(static_cast<double>(ws.initial_step_size_));
+  pb_ws->set_total_pdlp_iterations(ws.total_pdlp_iterations_);
+  pb_ws->set_total_pdhg_iterations(ws.total_pdhg_iterations_);
+  pb_ws->set_last_candidate_kkt_score(static_cast<double>(ws.last_candidate_kkt_score_));
+  pb_ws->set_last_restart_kkt_score(static_cast<double>(ws.last_restart_kkt_score_));
+  pb_ws->set_sum_solution_weight(static_cast<double>(ws.sum_solution_weight_));
+  pb_ws->set_iterations_since_last_restart(ws.iterations_since_last_restart_);
+}
+
+// Handle a single client connection
+void handle_client(int client_socket)
+{
+  try {
+    std::cout << "[Server] Client connected\n";
+
+    // Read request size
+    uint32_t request_size;
+    read_all(client_socket, &request_size, sizeof(request_size));
+
+    std::cout << "[Server] Receiving request (" << request_size << " bytes)...\n";
+
+    // Read request data
+    std::vector<uint8_t> request_data(request_size);
+    read_all(client_socket, request_data.data(), request_size);
+
+    // Parse request
+    cuopt::remote::SolveLPRequest lp_request;
+    cuopt::remote::SolveMIPRequest mip_request;
+
+    // Try LP first
+    bool is_lp  = lp_request.ParseFromArray(request_data.data(), request_size);
+    bool is_mip = false;
+
+    if (!is_lp) {
+      // Try MIP
+      is_mip = mip_request.ParseFromArray(request_data.data(), request_size);
+    }
+
+    if (!is_lp && !is_mip) { throw std::runtime_error("Failed to parse request as LP or MIP"); }
+
+    // Create response
+    cuopt::remote::SolveResponse response;
+    response.set_status(cuopt::remote::SUCCESS);
+
+    if (is_lp) {
+      std::cout << "[Server] Processing LP request\n";
+      std::cout << "  Version: " << lp_request.header().version() << "\n";
+      std::cout << "  Variables: " << lp_request.problem().objective_coefficients_size() << "\n";
+      std::cout << "  Constraints: " << (lp_request.problem().constraint_matrix_offsets_size() - 1)
+                << "\n";
+
+      // Convert problem
+      auto problem = protobuf_to_problem<int, double>(lp_request.problem());
+
+      // Solve LP
+      std::cout << "[Server] Solving LP problem...\n";
+      cuopt::linear_programming::pdlp_solver_settings_t<int, double> settings;
+      auto solution = cuopt::linear_programming::solve_lp(problem, settings);
+
+      std::cout << "[Server] LP solve completed\n";
+      std::cout << "  Status: " << static_cast<int>(solution.get_termination_status()) << "\n";
+      std::cout << "  Objective: " << solution.get_objective_value() << "\n";
+
+      // Convert solution to protobuf
+      lp_solution_to_protobuf(solution, response.mutable_lp_solution());
+
+    } else if (is_mip) {
+      std::cout << "[Server] Processing MIP request\n";
+
+      // MIP not yet implemented with protobuf
+      response.set_status(cuopt::remote::ERROR_INTERNAL);
+      response.set_error_message("MIP solving not yet implemented with protobuf");
+    }
+
+    // Serialize response
+    std::string response_data = response.SerializeAsString();
+    uint32_t response_size    = static_cast<uint32_t>(response_data.size());
+
+    std::cout << "[Server] Sending response (" << response_size << " bytes)...\n";
+
+    // Send response size and data
+    write_all(client_socket, &response_size, sizeof(response_size));
+    write_all(client_socket, response_data.data(), response_data.size());
+
+    std::cout << "[Server] Response sent successfully\n";
+
+  } catch (const std::exception& e) {
+    std::cerr << "[Server] Error handling client: " << e.what() << "\n";
+
+    // Try to send error response
+    try {
+      cuopt::remote::SolveResponse response;
+      response.set_status(cuopt::remote::ERROR_INTERNAL);
+      response.set_error_message(e.what());
+
+      std::string response_data = response.SerializeAsString();
+      uint32_t response_size    = static_cast<uint32_t>(response_data.size());
+
+      write_all(client_socket, &response_size, sizeof(response_size));
+      write_all(client_socket, response_data.data(), response_data.size());
+    } catch (...) {
+      // Ignore write errors during error handling
+    }
+  }
+
+  close(client_socket);
+  std::cout << "[Server] Client disconnected\n\n";
+}
+
+int main(int argc, char* argv[])
+{
+  // Verify protobuf version
+  GOOGLE_PROTOBUF_VERIFY_VERSION;
+
+  // Parse command line arguments
+  int port = 9999;  // Default port
+
+  if (argc > 1) {
+    port = std::atoi(argv[1]);
+    if (port <= 0 || port > 65535) {
+      std::cerr << "Error: Invalid port number: " << argv[1] << "\n";
+      std::cerr << "Usage: " << argv[0] << " [port]\n";
+      std::cerr << "  Default port: 9999\n";
+      return 1;
+    }
+  }
+
+  std::cout << "==========================================================\n";
+  std::cout << "cuOpt Remote Solve Server (Protocol Buffers)\n";
+  std::cout << "==========================================================\n";
+  std::cout << "Port: " << port << "\n";
+  std::cout << "Press Ctrl+C to stop\n";
+  std::cout << "==========================================================\n\n";
+
+  // Setup signal handlers for graceful shutdown
+  signal(SIGINT, signal_handler);
+  signal(SIGTERM, signal_handler);
+
+  // Create socket
+  int server_socket = socket(AF_INET, SOCK_STREAM, 0);
+  if (server_socket < 0) {
+    std::cerr << "Error: Failed to create socket\n";
+    return 1;
+  }
+
+  // Set socket options
+  int opt = 1;
+  if (setsockopt(server_socket, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) {
+    std::cerr << "Warning: Failed to set SO_REUSEADDR\n";
+  }
+
+  // Bind to port
+  struct sockaddr_in server_addr;
+  std::memset(&server_addr, 0, sizeof(server_addr));
+  server_addr.sin_family      = AF_INET;
+  server_addr.sin_addr.s_addr = INADDR_ANY;
+  server_addr.sin_port        = htons(port);
+
+  if (bind(server_socket, (struct sockaddr*)&server_addr, sizeof(server_addr)) < 0) {
+    std::cerr << "Error: Failed to bind to port " << port << "\n";
+    std::cerr << "Port may already be in use\n";
+    close(server_socket);
+    return 1;
+  }
+
+  // Listen for connections
+  if (listen(server_socket, 5) < 0) {
+    std::cerr << "Error: Failed to listen on socket\n";
+    close(server_socket);
+    return 1;
+  }
+
+  std::cout << "[Server] Listening on port " << port << "...\n\n";
+
+  // Main server loop
+  while (keep_running) {
+    // Set timeout for accept to allow checking keep_running
+    struct timeval tv;
+    tv.tv_sec  = 1;
+    tv.tv_usec = 0;
+    setsockopt(server_socket, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
+
+    // Accept client connection
+    struct sockaddr_in client_addr;
+    socklen_t client_len = sizeof(client_addr);
+
+    int client_socket = accept(server_socket, (struct sockaddr*)&client_addr, &client_len);
+
+    if (client_socket < 0) {
+      if (errno == EAGAIN || errno == EWOULDBLOCK) {
+        // Timeout, check keep_running and continue
+        continue;
+      }
+      if (keep_running) { std::cerr << "[Server] Warning: Failed to accept connection\n"; }
+      continue;
+    }
+
+    // Get client IP
+    char client_ip[INET_ADDRSTRLEN];
+    inet_ntop(AF_INET, &client_addr.sin_addr, client_ip, INET_ADDRSTRLEN);
+    std::cout << "[Server] Connection from " << client_ip << ":" << ntohs(client_addr.sin_port)
+              << "\n";
+
+    // Handle client (blocking - single-threaded for simplicity)
+    handle_client(client_socket);
+  }
+
+  // Cleanup
+  close(server_socket);
+  std::cout << "[Server] Server stopped\n";
+
+  // Cleanup protobuf
+  google::protobuf::ShutdownProtobufLibrary();
+
+  return 0;
+}
diff --git a/cpp/cuopt_solver_worker.cpp b/cpp/cuopt_solver_worker.cpp
new file mode 100644
index 000000000..9606dede8
--- /dev/null
+++ b/cpp/cuopt_solver_worker.cpp
@@ -0,0 +1,402 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights
+ * reserved. SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file cuopt_solver_worker.cpp
+ * @brief Solver worker process for async job queue
+ *
+ * This worker:
+ * - Reads solve jobs from a shared memory queue
+ * - Solves problems using GPU
+ * - Writes results back to result queue
+ * - Runs in a separate process for isolation
+ */
+
+#include <cuopt_remote.pb.h>
+#include <cuopt/linear_programming/solve.hpp>
+
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <csignal>
+#include <cstring>
+#include <iostream>
+#include <string>
+
+// Shared memory queue structure
+struct JobQueueEntry {
+  char job_id[64];
+  uint32_t problem_type;  // 0 = LP, 1 = MIP
+  uint32_t data_size;
+  uint8_t data[1024 * 1024];  // 1MB buffer
+  bool ready;
+  bool processed;
+};
+
+struct ResultQueueEntry {
+  char job_id[64];
+  uint32_t status;  // 0 = success, 1 = error
+  uint32_t data_size;
+  uint8_t data[2 * 1024 * 1024];  // 2MB buffer for results
+  bool ready;
+  bool retrieved;
+};
+
+const size_t MAX_JOBS    = 100;
+const size_t MAX_RESULTS = 100;
+
+// Global flag for graceful shutdown
+volatile sig_atomic_t keep_running = 1;
+
+void signal_handler(int signal)
+{
+  if (signal == SIGINT || signal == SIGTERM) {
+    std::cout << "[Worker] Received shutdown signal\n";
+    keep_running = 0;
+  }
+}
+
+// Convert protobuf OptimizationProblem to optimization_problem_t
+template <typename i_t, typename f_t>
+static cuopt::linear_programming::optimization_problem_t<i_t, f_t> protobuf_to_problem(
+  const cuopt::remote::OptimizationProblem& pb_problem)
+{
+  cuopt::linear_programming::optimization_problem_t<i_t, f_t> problem;
+
+  problem.set_maximize(pb_problem.maximize());
+  problem.set_objective_scaling_factor(static_cast<f_t>(pb_problem.objective_scaling_factor()));
+  problem.set_objective_offset(static_cast<f_t>(pb_problem.objective_offset()));
+
+  std::vector<f_t> matrix_values;
+  std::vector<i_t> matrix_indices;
+  std::vector<i_t> matrix_offsets;
+
+  for (int i = 0; i < pb_problem.constraint_matrix_values_size(); ++i) {
+    matrix_values.push_back(static_cast<f_t>(pb_problem.constraint_matrix_values(i)));
+  }
+  for (int i = 0; i < pb_problem.constraint_matrix_indices_size(); ++i) {
+    matrix_indices.push_back(static_cast<i_t>(pb_problem.constraint_matrix_indices(i)));
+  }
+  for (int i = 0; i < pb_problem.constraint_matrix_offsets_size(); ++i) {
+    matrix_offsets.push_back(static_cast<i_t>(pb_problem.constraint_matrix_offsets(i)));
+  }
+
+  problem.set_csr_constraint_matrix(matrix_values.data(),
+                                    matrix_values.size(),
+                                    matrix_indices.data(),
+                                    matrix_indices.size(),
+                                    matrix_offsets.data(),
+                                    matrix_offsets.size());
+
+  std::vector<f_t> obj_coeffs, constraint_bounds, var_lower, var_upper;
+
+  for (int i = 0; i < pb_problem.objective_coefficients_size(); ++i) {
+    obj_coeffs.push_back(static_cast<f_t>(pb_problem.objective_coefficients(i)));
+  }
+  for (int i = 0; i < pb_problem.constraint_bounds_size(); ++i) {
+    constraint_bounds.push_back(static_cast<f_t>(pb_problem.constraint_bounds(i)));
+  }
+  for (int i = 0; i < pb_problem.variable_lower_bounds_size(); ++i) {
+    var_lower.push_back(static_cast<f_t>(pb_problem.variable_lower_bounds(i)));
+  }
+  for (int i = 0; i < pb_problem.variable_upper_bounds_size(); ++i) {
+    var_upper.push_back(static_cast<f_t>(pb_problem.variable_upper_bounds(i)));
+  }
+
+  problem.set_objective_coefficients(obj_coeffs.data(), obj_coeffs.size());
+  problem.set_constraint_bounds(constraint_bounds.data(), constraint_bounds.size());
+  problem.set_variable_lower_bounds(var_lower.data(), var_lower.size());
+  problem.set_variable_upper_bounds(var_upper.data(), var_upper.size());
+
+  if (pb_problem.constraint_lower_bounds_size() > 0) {
+    std::vector<f_t> constraint_lower;
+    for (int i = 0; i < pb_problem.constraint_lower_bounds_size(); ++i) {
+      constraint_lower.push_back(static_cast<f_t>(pb_problem.constraint_lower_bounds(i)));
+    }
+    problem.set_constraint_lower_bounds(constraint_lower.data(), constraint_lower.size());
+  }
+
+  if (pb_problem.constraint_upper_bounds_size() > 0) {
+    std::vector<f_t> constraint_upper;
+    for (int i = 0; i < pb_problem.constraint_upper_bounds_size(); ++i) {
+      constraint_upper.push_back(static_cast<f_t>(pb_problem.constraint_upper_bounds(i)));
+    }
+    problem.set_constraint_upper_bounds(constraint_upper.data(), constraint_upper.size());
+  }
+
+  if (!pb_problem.row_types().empty()) {
+    const std::string& rt = pb_problem.row_types();
+    problem.set_row_types(rt.data(), rt.size());
+  }
+
+  return problem;
+}
+
+// Convert LP solution to protobuf
+template <typename i_t, typename f_t>
+static void lp_solution_to_protobuf(
+  cuopt::linear_programming::optimization_problem_solution_t<i_t, f_t>& solution,
+  cuopt::remote::LPSolution* pb_solution)
+{
+  for (const auto& val : solution.get_primal_solution()) {
+    pb_solution->add_primal_solution(static_cast<double>(val));
+  }
+  for (const auto& val : solution.get_dual_solution()) {
+    pb_solution->add_dual_solution(static_cast<double>(val));
+  }
+  for (const auto& val : solution.get_reduced_cost()) {
+    pb_solution->add_reduced_cost(static_cast<double>(val));
+  }
+
+  pb_solution->set_termination_status(
+    static_cast<cuopt::remote::PDLPTerminationStatus>(solution.get_termination_status()));
+
+  const auto& stats = solution.get_additional_termination_information();
+  pb_solution->set_primal_objective(stats.primal_objective);
+  pb_solution->set_dual_objective(stats.dual_objective);
+  pb_solution->set_solve_time(stats.solve_time);
+  pb_solution->set_l2_primal_residual(stats.l2_primal_residual);
+  pb_solution->set_l2_dual_residual(stats.l2_dual_residual);
+  pb_solution->set_gap(stats.gap);
+  pb_solution->set_nb_iterations(stats.number_of_steps_taken);
+  pb_solution->set_solved_by_pdlp(stats.solved_by_pdlp);
+}
+
+// Convert MIP solution to protobuf
+template <typename i_t, typename f_t>
+static void mip_solution_to_protobuf(cuopt::linear_programming::mip_solution_t<i_t, f_t>& solution,
+                                     cuopt::remote::MIPSolution* pb_solution)
+{
+  for (const auto& val : solution.get_solution()) {
+    pb_solution->add_solution(static_cast<double>(val));
+  }
+
+  pb_solution->set_termination_status(
+    static_cast<cuopt::remote::MIPTerminationStatus>(solution.get_termination_status()));
+  pb_solution->set_objective(solution.get_objective_value());
+  pb_solution->set_solution_bound(solution.get_solution_bound());
+  pb_solution->set_total_solve_time(solution.get_total_solve_time());
+  pb_solution->set_presolve_time(solution.get_presolve_time());
+  pb_solution->set_mip_gap(solution.get_mip_gap());
+  pb_solution->set_max_constraint_violation(solution.get_max_constraint_violation());
+  pb_solution->set_max_int_violation(solution.get_max_int_violation());
+  pb_solution->set_max_variable_bound_violation(solution.get_max_variable_bound_violation());
+  pb_solution->set_nodes(solution.get_num_nodes());
+  pb_solution->set_simplex_iterations(solution.get_num_simplex_iterations());
+}
+
+int main(int argc, char* argv[])
+{
+  GOOGLE_PROTOBUF_VERIFY_VERSION;
+
+  std::cout << "==========================================================\n";
+  std::cout << "cuOpt Solver Worker Process\n";
+  std::cout << "==========================================================\n";
+
+  signal(SIGINT, signal_handler);
+  signal(SIGTERM, signal_handler);
+
+  // Open shared memory for job queue
+  int job_shm_fd = shm_open("/cuopt_job_queue", O_RDWR, 0666);
+  if (job_shm_fd == -1) {
+    std::cerr << "[Worker] Error: Failed to open job queue shared memory\n";
+    return 1;
+  }
+
+  JobQueueEntry* job_queue = static_cast<JobQueueEntry*>(mmap(
+    nullptr, sizeof(JobQueueEntry) * MAX_JOBS, PROT_READ | PROT_WRITE, MAP_SHARED, job_shm_fd, 0));
+
+  if (job_queue == MAP_FAILED) {
+    std::cerr << "[Worker] Error: Failed to map job queue\n";
+    close(job_shm_fd);
+    return 1;
+  }
+
+  // Open shared memory for result queue
+  int result_shm_fd = shm_open("/cuopt_result_queue", O_RDWR, 0666);
+  if (result_shm_fd == -1) {
+    std::cerr << "[Worker] Error: Failed to open result queue shared memory\n";
+    munmap(job_queue, sizeof(JobQueueEntry) * MAX_JOBS);
+    close(job_shm_fd);
+    return 1;
+  }
+
+  ResultQueueEntry* result_queue =
+    static_cast<ResultQueueEntry*>(mmap(nullptr,
+                                        sizeof(ResultQueueEntry) * MAX_RESULTS,
+                                        PROT_READ | PROT_WRITE,
+                                        MAP_SHARED,
+                                        result_shm_fd,
+                                        0));
+
+  if (result_queue == MAP_FAILED) {
+    std::cerr << "[Worker] Error: Failed to map result queue\n";
+    munmap(job_queue, sizeof(JobQueueEntry) * MAX_JOBS);
+    close(job_shm_fd);
+    close(result_shm_fd);
+    return 1;
+  }
+
+  std::cout << "[Worker] Connected to shared memory queues\n";
+  std::cout << "[Worker] Waiting for jobs...\n\n";
+
+  // Main worker loop
+  while (keep_running) {
+    // Scan job queue for ready jobs
+    bool found_job = false;
+
+    for (size_t i = 0; i < MAX_JOBS && keep_running; ++i) {
+      if (job_queue[i].ready && !job_queue[i].processed) {
+        found_job = true;
+        std::string job_id(job_queue[i].job_id);
+
+        std::cout << "[Worker] Processing job: " << job_id
+                  << " (type: " << (job_queue[i].problem_type == 0 ? "LP" : "MIP") << ")\n";
+
+        try {
+          cuopt::remote::ResultResponse result_response;
+          result_response.set_status(cuopt::remote::SUCCESS);
+
+          if (job_queue[i].problem_type == 0) {
+            // LP problem
+            cuopt::remote::SolveLPRequest lp_request;
+            if (!lp_request.ParseFromArray(job_queue[i].data, job_queue[i].data_size)) {
+              throw std::runtime_error("Failed to parse LP request");
+            }
+
+            auto problem = protobuf_to_problem<int, double>(lp_request.problem());
+
+            std::cout << "[Worker] Solving LP: " << problem.get_n_variables() << " vars, "
+                      << problem.get_n_constraints() << " constraints\n";
+
+            cuopt::linear_programming::pdlp_solver_settings_t<int, double> settings;
+            auto solution = cuopt::linear_programming::solve_lp(problem, settings);
+
+            std::cout << "[Worker] LP solve completed, status: "
+                      << static_cast<int>(solution.get_termination_status()) << "\n";
+
+            lp_solution_to_protobuf(solution, result_response.mutable_lp_solution());
+
+          } else if (job_queue[i].problem_type == 1) {
+            // MIP problem
+            cuopt::remote::SolveMIPRequest mip_request;
+            if (!mip_request.ParseFromArray(job_queue[i].data, job_queue[i].data_size)) {
+              throw std::runtime_error("Failed to parse MIP request");
+            }
+
+            auto problem = protobuf_to_problem<int, double>(mip_request.problem());
+
+            // Set variable types from is_integer and is_binary fields
+            const auto& pb_problem = mip_request.problem();
+            std::cout << "[Worker] MIP problem has " << pb_problem.is_integer_size()
+                      << " is_integer entries, " << pb_problem.is_binary_size()
+                      << " is_binary entries\n";
+            if (pb_problem.is_integer_size() > 0 || pb_problem.is_binary_size() > 0) {
+              int n_vars = problem.get_n_variables();
+              std::vector<cuopt::linear_programming::var_t> var_types(
+                n_vars, cuopt::linear_programming::var_t::CONTINUOUS);
+
+              for (int j = 0; j < pb_problem.is_integer_size(); ++j) {
+                if (pb_problem.is_integer(j)) {
+                  var_types[j] = cuopt::linear_programming::var_t::INTEGER;
+                }
+              }
+              for (int j = 0; j < pb_problem.is_binary_size(); ++j) {
+                if (pb_problem.is_binary(j)) {
+                  var_types[j] = cuopt::linear_programming::var_t::INTEGER;
+                }
+              }
+
+              problem.set_variable_types(var_types.data(), var_types.size());
+            }
+
+            std::cout << "[Worker] Solving MIP: " << problem.get_n_variables() << " vars, "
+                      << problem.get_n_constraints() << " constraints\n";
+
+            cuopt::linear_programming::mip_solver_settings_t<int, double> settings;
+            auto solution = cuopt::linear_programming::solve_mip(problem, settings);
+
+            std::cout << "[Worker] MIP solve completed, status: "
+                      << static_cast<int>(solution.get_termination_status())
+                      << ", objective: " << solution.get_objective_value() << "\n";
+
+            mip_solution_to_protobuf(solution, result_response.mutable_mip_solution());
+
+          } else {
+            throw std::runtime_error("Unknown problem type: " +
+                                     std::to_string(job_queue[i].problem_type));
+          }
+
+          std::string result_data = result_response.SerializeAsString();
+
+          // Find free result slot
+          bool stored = false;
+          for (size_t j = 0; j < MAX_RESULTS; ++j) {
+            if (!result_queue[j].ready) {
+              strncpy(result_queue[j].job_id, job_id.c_str(), sizeof(result_queue[j].job_id) - 1);
+              result_queue[j].status    = 0;
+              result_queue[j].data_size = result_data.size();
+              std::memcpy(result_queue[j].data, result_data.data(), result_data.size());
+              result_queue[j].retrieved = false;
+              result_queue[j].ready     = true;
+              stored                    = true;
+              break;
+            }
+          }
+
+          if (!stored) { std::cerr << "[Worker] Warning: Result queue full, result may be lost\n"; }
+
+        } catch (const std::exception& e) {
+          std::cerr << "[Worker] Error solving job " << job_id << ": " << e.what() << "\n";
+
+          // Store error result
+          for (size_t j = 0; j < MAX_RESULTS; ++j) {
+            if (!result_queue[j].ready) {
+              strncpy(result_queue[j].job_id, job_id.c_str(), sizeof(result_queue[j].job_id) - 1);
+              result_queue[j].status    = 1;  // Error
+              result_queue[j].data_size = 0;
+              result_queue[j].retrieved = false;
+              result_queue[j].ready     = true;
+              break;
+            }
+          }
+        }
+
+        // Mark job as processed
+        job_queue[i].processed = true;
+      }
+    }
+
+    if (!found_job) {
+      usleep(100000);  // Sleep 100ms if no jobs
+    }
+  }
+
+  std::cout << "[Worker] Shutting down...\n";
+
+  munmap(job_queue, sizeof(JobQueueEntry) * MAX_JOBS);
+  munmap(result_queue, sizeof(ResultQueueEntry) * MAX_RESULTS);
+  close(job_shm_fd);
+  close(result_shm_fd);
+
+  google::protobuf::ShutdownProtobufLibrary();
+
+  std::cout << "[Worker] Stopped\n";
+  return 0;
+}
diff --git a/cpp/include/cuopt/linear_programming/gpu_optimization_problem.hpp b/cpp/include/cuopt/linear_programming/gpu_optimization_problem.hpp
new file mode 100644
index 000000000..ae02df7b9
--- /dev/null
+++ b/cpp/include/cuopt/linear_programming/gpu_optimization_problem.hpp
@@ -0,0 +1,427 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION &
+ * AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cuopt/linear_programming/pdlp/pdlp_warm_start_data.hpp>
+
+#include <cuopt/linear_programming/utilities/internals.hpp>
+#include <mps_parser/data_model_view.hpp>
+
+#include <raft/core/device_span.hpp>
+#include <raft/core/handle.hpp>
+
+#include <rmm/device_uvector.hpp>
+
+#include <cstdint>
+#include <string>
+#include <type_traits>
+#include <vector>
+
+namespace cuopt::linear_programming {
+
+// NOTE: These enum definitions are duplicated from optimization_problem.hpp
+// They MUST remain identical. In the future, these should be extracted to a common header.
+#ifndef CUOPT_VAR_T_DEFINED
+#define CUOPT_VAR_T_DEFINED
+enum class var_t { CONTINUOUS = 0, INTEGER };
+enum class problem_category_t : int8_t { LP = 0, MIP = 1, IP = 2 };
+#endif
+
+/**
+ * @brief GPU-based representation of a linear programming (LP) optimization problem
+ *
+ * This is an internal solver representation that stores all problem data on the GPU
+ * using rmm::device_uvector. This class is used internally by solvers and should not
+ * be directly exposed to users.
+ *
+ * @tparam i_t  Integer type for indexes
+ * @tparam f_t  Floating point type for values
+ *
+ * This structure stores all the information necessary to represent the
+ * following LP:
+ *
+ * <pre>
+ * Minimize:
+ *   dot(c, x)
+ * Subject to:
+ *   matmul(A, x) (= or >= or)<= b
+ * Where:
+ *   x = n-dim vector
+ *   A = mxn-dim sparse matrix
+ *   n = number of variables
+ *   m = number of constraints
+ *
+ * </pre>
+ *
+ * @note: By default this assumes objective minimization.
+ *
+ * Objective value can be scaled and offset accordingly:
+ * objective_scaling_factor * (dot(c, x) + objective_offset)
+ * please refer to the `set_objective_scaling_factor()` and
+ * `set_objective_offset()` methods.
+ */
+template <typename i_t, typename f_t>
+class gpu_optimization_problem_t {
+ public:
+  static_assert(std::is_integral<i_t>::value,
+                "'gpu_optimization_problem_t' accepts only integer types for indexes");
+  static_assert(std::is_floating_point<f_t>::value,
+                "'gpu_optimization_problem_t' accepts only floating point types for weights");
+
+  /**
+   * @brief A device-side view of the `gpu_optimization_problem_t` structure with
+   * the RAII stuffs stripped out, to make it easy to work inside kernels
+   *
+   * @note It is assumed that the pointers are NOT owned by this class, but
+   * rather by the encompassing `gpu_optimization_problem_t` class via RAII
+   * abstractions like `rmm::device_uvector`
+   */
+  struct view_t {
+    /** number of variables */
+    i_t n_vars;
+    /** number of constraints in the LP representation */
+    i_t n_constraints;
+    /** number of non-zero elements in the constraint matrix */
+    i_t nnz;
+    /**
+     * constraint matrix in the CSR format
+     * @{
+     */
+    raft::device_span<f_t> A;
+    raft::device_span<const i_t> A_indices;
+    raft::device_span<const i_t> A_offsets;
+    /** @} */
+    /** RHS of the constraints */
+    raft::device_span<const f_t> b;
+    /** array of weights used in the objective function */
+    raft::device_span<const f_t> c;
+    /** array of lower bounds for the variables */
+    raft::device_span<const f_t> variable_lower_bounds;
+    /** array of upper bounds for the variables */
+    raft::device_span<const f_t> variable_upper_bounds;
+    /** variable types */
+    raft::device_span<const var_t> variable_types;
+    /** array of lower bounds for the constraint */
+    raft::device_span<const f_t> constraint_lower_bounds;
+    /** array of upper bounds for the constraint */
+    raft::device_span<const f_t> constraint_upper_bounds;
+  };  // struct view_t
+
+  gpu_optimization_problem_t(raft::handle_t const* handle_ptr);
+  gpu_optimization_problem_t(const gpu_optimization_problem_t<i_t, f_t>& other);
+
+  std::vector<internals::base_solution_callback_t*> mip_callbacks_;
+
+  /**
+   * @brief Set the sense of optimization to maximize.
+   * @note Setting before calling the solver is optional, default value if false
+   * (minimize).
+   *
+   * @param[in] maximize true means to maximize the objective function, else
+   * minimize.
+   */
+  void set_maximize(bool maximize);
+  /**
+   * @brief Set the constraint matrix (A) in CSR format. For more information
+   * about CSR checkout:
+   * https://docs.nvidia.com/cuda/cusparse/index.html#compressed-sparse-row-csr
+
+   * @note Setting before calling the solver is mandatory.
+   *
+   * @throws cuopt::logic_error when an error occurs.
+   * @param[in] A_values Values of the CSR representation of the constraint
+   * matrix as a device or host memory pointer to a floating point array of size
+   * size_values.
+   * cuOpt copies this data. Copy happens on the stream of the raft:handler
+   * passed to the problem.
+   * @param size_values Size of the A_values array.
+   * @param[in] A_indices Indices of the CSR representation of the constraint
+   * matrix as a device or host memory pointer to an integer array of size
+   * size_indices.
+   * cuOpt copies this data. Copy happens on the stream of the raft:handler
+   * passed to the problem.
+   * @param size_indices Size of the A_indices array.
+   * @param[in] A_offsets Offsets of the CSR representation of the constraint
+   * matrix as a device or host memory pointer to a integer array of size
+   * size_offsets.
+   * cuOpt copies this data. Copy happens on the stream of the raft:handler
+   * passed to the problem.
+   * @param size_offsets Size of the A_offsets array.
+   */
+  void set_csr_constraint_matrix(const f_t* A_values,
+                                 i_t size_values,
+                                 const i_t* A_indices,
+                                 i_t size_indices,
+                                 const i_t* A_offsets,
+                                 i_t size_offsets);
+
+  /**
+   * @brief Set the constraint bounds (b / right-hand side) array.
+   * @note Setting before calling the solver is mandatory.
+   *
+   * @param[in] b Device or host memory pointer to a floating point array of
+   * size size. cuOpt copies this data. Copy happens on the stream of the
+   * raft:handler passed to the problem.
+   * @param size Size of the b array.
+   */
+  void set_constraint_bounds(const f_t* b, i_t size);
+  /**
+   * @brief Set the objective coefficients (c) array.
+   * @note Setting before calling the solver is mandatory.
+   *
+   * @param[in] c Device or host memory pointer to a floating point array of
+   * size size. cuOpt copies this data. Copy happens on the stream of the
+   * raft:handler passed to the problem.
+   * @param size Size of the c array.
+   */
+  void set_objective_coefficients(const f_t* c, i_t size);
+  /**
+   * @brief Set the scaling factor of the objective function (scaling_factor *
+   * objective_value).
+   * @note Setting before calling the solver is optional, default value if 1.
+   *
+   * @param objective_scaling_factor Objective scaling factor value.
+   */
+  void set_objective_scaling_factor(f_t objective_scaling_factor);
+  /**
+   * @brief Set the offset of the objective function (objective_offset +
+   * objective_value).
+   * @note Setting before calling the solver is optional, default value if 0.
+   *
+   * @param objective_offset Objective offset value.
+   */
+  void set_objective_offset(f_t objective_offset);
+  /**
+   * @brief Set the variables (x) lower bounds.
+   * @note Setting before calling the solver is optional, default value for all
+   * is 0.
+   *
+   * @param[in] variable_lower_bounds Device or host memory pointer to a
+   * floating point array of size size. cuOpt copies this data. Copy happens on
+   * the stream of the raft:handler passed to the problem.
+   * @param size Size of the variable_lower_bounds array
+   */
+  void set_variable_lower_bounds(const f_t* variable_lower_bounds, i_t size);
+  /**
+   * @brief Set the variables (x) upper bounds.
+   * @note Setting before calling the solver is optional, default value for all
+   * is +infinity.
+   *
+   * @param[in] variable_upper_bounds Device or host memory pointer to a
+   * floating point array of size size. cuOpt copies this data. Copy happens on
+   * the stream of the raft:handler passed to the problem.
+   * @param size Size of the variable_upper_bounds array.
+   */
+  void set_variable_upper_bounds(const f_t* variable_upper_bounds, i_t size);
+  /**
+   * @brief Set the variables types.
+   * @note Setting before calling the solver is optional, default value for all
+   * is CONTINUOUS.
+   *
+   * @param[in] variable_types Device or host memory pointer to a var_t array.
+   * cuOpt copies this data. Copy happens on the stream of the raft:handler
+   * passed to the problem.
+   * @param size Size of the variable_types array.
+   */
+  void set_variable_types(const var_t* variable_types, i_t size);
+  void set_problem_category(const problem_category_t& category);
+  /**
+   * @brief Set the constraints lower bounds.
+   * @note Setting before calling the solver is optional if you set the row
+   * type, else it's mandatory along with the upper bounds.
+   *
+   * @param[in] constraint_lower_bounds Device or host memory pointer to a
+   * floating point array of size size. cuOpt copies this data. Copy happens on
+   * the stream of the raft:handler passed to the problem.
+   * @param size Size of the constraint_lower_bounds array
+   */
+  void set_constraint_lower_bounds(const f_t* constraint_lower_bounds, i_t size);
+  /**
+   * @brief Set the constraints upper bounds.
+   * @note Setting before calling the solver is optional if you set the row
+   * type, else it's mandatory along with the lower bounds. If both are set,
+   * priority goes to set_constraints.
+   *
+   * @param[in] constraint_upper_bounds Device or host memory pointer to a
+   * floating point array of size size. cuOpt copies this data. Copy happens on
+   * the stream of the raft:handler passed to the problem.
+   * @param size Size of the constraint_upper_bounds array
+   */
+  void set_constraint_upper_bounds(const f_t* constraint_upper_bounds, i_t size);
+
+  /**
+   * @brief Set the type of each row (constraint). Possible values are:
+   * 'E' for equality ( = ): lower & upper constrains bound equal to b
+   * 'L' for less-than ( <= ): lower constrains bound equal to -infinity, upper
+   * constrains bound equal to b 'G' for greater-than ( >= ): lower constrains
+   * bound equal to b, upper constrains bound equal to +infinity
+   * @note Setting before calling the solver is optional if you set the
+   * constraint lower and upper bounds, else it's mandatory If both are set,
+   * priority goes to set_constraints.
+   *
+   * @param[in] row_types Device or host memory pointer to a character array of
+   * size size.
+   * cuOpt copies this data. Copy happens on the stream of the raft:handler
+   * passed to the problem.
+   * @param size Size of the row_types array
+   */
+  void set_row_types(const char* row_types, i_t size);
+
+  /**
+   * @brief Set the name of the objective function.
+   * @note Setting before calling the solver is optional. Value is only used for
+   * file generation of the solution.
+   *
+   * @param[in] objective_name Objective name value.
+   */
+  void set_objective_name(const std::string& objective_name);
+  /**
+   * @brief Set the problem name.
+   * @note Setting before calling the solver is optional.
+   *
+   * @param[in] problem_name Problem name value.
+   */
+  void set_problem_name(const std::string& problem_name);
+  /**
+   * @brief Set the variables names.
+   * @note Setting before calling the solver is optional. Value is only used for
+   * file generation of the solution.
+   *
+   * @param[in] variable_names Variable names values.
+   */
+  void set_variable_names(const std::vector<std::string>& variables_names);
+  /**
+   * @brief Set the row names.
+   * @note Setting before calling the solver is optional. Value is only used for
+   * file generation of the solution.
+   *
+   * @param[in] row_names Row names value.
+   */
+  void set_row_names(const std::vector<std::string>& row_names);
+
+  /**
+   * @brief Write the problem to an MPS formatted file
+   *
+   * @param[in] mps_file_path Path to the MPS file to write
+   */
+  void write_to_mps(const std::string& mps_file_path);
+
+  /* Print scaling information */
+  void print_scaling_information() const;
+
+  i_t get_n_variables() const;
+  i_t get_n_constraints() const;
+  i_t get_nnz() const;
+  i_t get_n_integers() const;
+  raft::handle_t const* get_handle_ptr() const noexcept;
+  const rmm::device_uvector<f_t>& get_constraint_matrix_values() const;
+  rmm::device_uvector<f_t>& get_constraint_matrix_values();
+  const rmm::device_uvector<i_t>& get_constraint_matrix_indices() const;
+  rmm::device_uvector<i_t>& get_constraint_matrix_indices();
+  const rmm::device_uvector<i_t>& get_constraint_matrix_offsets() const;
+  rmm::device_uvector<i_t>& get_constraint_matrix_offsets();
+  const rmm::device_uvector<f_t>& get_constraint_bounds() const;
+  rmm::device_uvector<f_t>& get_constraint_bounds();
+  const rmm::device_uvector<f_t>& get_objective_coefficients() const;
+  rmm::device_uvector<f_t>& get_objective_coefficients();
+  f_t get_objective_scaling_factor() const;
+  f_t get_objective_offset() const;
+  const rmm::device_uvector<f_t>& get_variable_lower_bounds() const;
+  const rmm::device_uvector<f_t>& get_variable_upper_bounds() const;
+  rmm::device_uvector<f_t>& get_variable_lower_bounds();
+  rmm::device_uvector<f_t>& get_variable_upper_bounds();
+  const rmm::device_uvector<f_t>& get_constraint_lower_bounds() const;
+  const rmm::device_uvector<f_t>& get_constraint_upper_bounds() const;
+  rmm::device_uvector<f_t>& get_constraint_lower_bounds();
+  rmm::device_uvector<f_t>& get_constraint_upper_bounds();
+  const rmm::device_uvector<char>& get_row_types() const;
+  const rmm::device_uvector<var_t>& get_variable_types() const;
+  bool get_sense() const;
+  bool empty() const;
+
+  std::string get_objective_name() const;
+  std::string get_problem_name() const;
+  // Unless an integer variable is added, by default it is LP
+  problem_category_t get_problem_category() const;
+  const std::vector<std::string>& get_variable_names() const;
+  const std::vector<std::string>& get_row_names() const;
+
+  /**
+   * @brief Gets the device-side view (with raw pointers), for ease of access
+   *        inside cuda kernels
+   */
+  view_t view() const;
+
+ private:
+  void add_row_related_vars_to_row(std::vector<i_t>& indices,
+                                   std::vector<f_t>& values,
+                                   std::vector<i_t>& A_offsets,
+                                   std::vector<i_t>& A_indices,
+                                   std::vector<f_t>& A_values);
+
+  // Pointer to library handle (RAFT) containing hardware resources information
+  raft::handle_t const* handle_ptr_{nullptr};
+  rmm::cuda_stream_view stream_view_;
+
+  /** problem classification */
+  problem_category_t problem_category_;
+  /** whether to maximize or minimize the objective function */
+  bool maximize_;
+  /** number of variables */
+  i_t n_vars_;
+  /** number of constraints in the LP representation */
+  i_t n_constraints_;
+  /**
+   * the constraint matrix itself in the CSR format (GPU)
+   * @{
+   */
+  rmm::device_uvector<f_t> A_;
+  rmm::device_uvector<i_t> A_indices_;
+  rmm::device_uvector<i_t> A_offsets_;
+  /** @} */
+  /** RHS of the constraints (GPU) */
+  rmm::device_uvector<f_t> b_;
+  /** weights in the objective function (GPU) */
+  rmm::device_uvector<f_t> c_;
+  /** scale factor of the objective function */
+  f_t objective_scaling_factor_{1};
+  /** offset of the objective function */
+  f_t objective_offset_{0};
+  /** lower bounds of the variables (primal part, GPU) */
+  rmm::device_uvector<f_t> variable_lower_bounds_;
+  /** upper bounds of the variables (primal part, GPU) */
+  rmm::device_uvector<f_t> variable_upper_bounds_;
+  /** lower bounds of the constraint (dual part, GPU) */
+  rmm::device_uvector<f_t> constraint_lower_bounds_;
+  /** upper bounds of the constraint (dual part, GPU) */
+  rmm::device_uvector<f_t> constraint_upper_bounds_;
+  /** Type of each constraint (GPU) */
+  rmm::device_uvector<char> row_types_;
+  /** Type of each variable (GPU) */
+  rmm::device_uvector<var_t> variable_types_;
+  /** name of the objective (only a single objective is currently allowed) */
+  std::string objective_name_;
+  /** name of the problem  */
+  std::string problem_name_;
+  /** names of each of the variables in the OP */
+  std::vector<std::string> var_names_{};
+  /** names of each of the rows (aka constraints or objective) in the OP */
+  std::vector<std::string> row_names_{};
+};  // class gpu_optimization_problem_t
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/include/cuopt/linear_programming/mip/solver_solution.hpp b/cpp/include/cuopt/linear_programming/mip/solver_solution.hpp
index fc98e9aff..69bfd9bc9 100644
--- a/cpp/include/cuopt/linear_programming/mip/solver_solution.hpp
+++ b/cpp/include/cuopt/linear_programming/mip/solver_solution.hpp
@@ -22,9 +22,6 @@
 #include <cuopt/linear_programming/mip/solver_stats.hpp>
 #include <cuopt/linear_programming/utilities/internals.hpp>
 
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_uvector.hpp>
-
 #include <raft/core/handle.hpp>
 
 #include <fstream>
@@ -45,7 +42,7 @@ enum class mip_termination_status_t : int8_t {
 template <typename i_t, typename f_t>
 class mip_solution_t : public base_solution_t {
  public:
-  mip_solution_t(rmm::device_uvector<f_t> solution,
+  mip_solution_t(std::vector<f_t> solution,
                  std::vector<std::string> var_names,
                  f_t objective,
                  f_t mip_gap,
@@ -54,16 +51,14 @@ class mip_solution_t : public base_solution_t {
                  f_t max_int_violation,
                  f_t max_variable_bound_violation,
                  solver_stats_t<i_t, f_t> stats,
-                 std::vector<rmm::device_uvector<f_t>> solution_pool = {});
+                 std::vector<std::vector<f_t>> solution_pool = {});
 
-  mip_solution_t(mip_termination_status_t termination_status,
-                 solver_stats_t<i_t, f_t> stats,
-                 rmm::cuda_stream_view stream_view);
-  mip_solution_t(const cuopt::logic_error& error_status, rmm::cuda_stream_view stream_view);
+  mip_solution_t(mip_termination_status_t termination_status, solver_stats_t<i_t, f_t> stats);
+  mip_solution_t(const cuopt::logic_error& error_status);
 
   bool is_mip() const override { return true; }
-  const rmm::device_uvector<f_t>& get_solution() const;
-  rmm::device_uvector<f_t>& get_solution();
+  const std::vector<f_t>& get_solution() const;
+  std::vector<f_t>& get_solution();
 
   f_t get_objective_value() const;
   f_t get_mip_gap() const;
@@ -81,12 +76,13 @@ class mip_solution_t : public base_solution_t {
   i_t get_num_nodes() const;
   i_t get_num_simplex_iterations() const;
   const std::vector<std::string>& get_variable_names() const;
-  const std::vector<rmm::device_uvector<f_t>>& get_solution_pool() const;
-  void write_to_sol_file(std::string_view filename, rmm::cuda_stream_view stream_view) const;
+  const std::vector<std::vector<f_t>>& get_solution_pool() const;
+  void write_to_sol_file(std::string_view filename) const;
   void log_summary() const;
+  void print_solution_stats() const;
 
  private:
-  rmm::device_uvector<f_t> solution_;
+  std::vector<f_t> solution_;
   std::vector<std::string> var_names_;
   f_t objective_;
   f_t mip_gap_;
@@ -96,7 +92,7 @@ class mip_solution_t : public base_solution_t {
   f_t max_int_violation_;
   f_t max_variable_bound_violation_;
   solver_stats_t<i_t, f_t> stats_;
-  std::vector<rmm::device_uvector<f_t>> solution_pool_;
+  std::vector<std::vector<f_t>> solution_pool_;
 };
 
 }  // namespace cuopt::linear_programming
diff --git a/cpp/include/cuopt/linear_programming/optimization_problem.hpp b/cpp/include/cuopt/linear_programming/optimization_problem.hpp
index 6f8f95973..08b43fd91 100644
--- a/cpp/include/cuopt/linear_programming/optimization_problem.hpp
+++ b/cpp/include/cuopt/linear_programming/optimization_problem.hpp
@@ -34,8 +34,11 @@
 
 namespace cuopt::linear_programming {
 
+#ifndef CUOPT_VAR_T_DEFINED
+#define CUOPT_VAR_T_DEFINED
 enum class var_t { CONTINUOUS = 0, INTEGER };
 enum class problem_category_t : int8_t { LP = 0, MIP = 1, IP = 2 };
+#endif
 
 /**
  * @brief A representation of a linear programming (LP) optimization problem
@@ -112,7 +115,7 @@ class optimization_problem_t {
     raft::device_span<const f_t> constraint_upper_bounds;
   };  // struct view_t
 
-  optimization_problem_t(raft::handle_t const* handle_ptr);
+  optimization_problem_t();
   optimization_problem_t(const optimization_problem_t<i_t, f_t>& other);
 
   std::vector<internals::base_solution_callback_t*> mip_callbacks_;
@@ -318,29 +321,28 @@ class optimization_problem_t {
   i_t get_n_constraints() const;
   i_t get_nnz() const;
   i_t get_n_integers() const;
-  raft::handle_t const* get_handle_ptr() const noexcept;
-  const rmm::device_uvector<f_t>& get_constraint_matrix_values() const;
-  rmm::device_uvector<f_t>& get_constraint_matrix_values();
-  const rmm::device_uvector<i_t>& get_constraint_matrix_indices() const;
-  rmm::device_uvector<i_t>& get_constraint_matrix_indices();
-  const rmm::device_uvector<i_t>& get_constraint_matrix_offsets() const;
-  rmm::device_uvector<i_t>& get_constraint_matrix_offsets();
-  const rmm::device_uvector<f_t>& get_constraint_bounds() const;
-  rmm::device_uvector<f_t>& get_constraint_bounds();
-  const rmm::device_uvector<f_t>& get_objective_coefficients() const;
-  rmm::device_uvector<f_t>& get_objective_coefficients();
+  const std::vector<f_t>& get_constraint_matrix_values() const;
+  std::vector<f_t>& get_constraint_matrix_values();
+  const std::vector<i_t>& get_constraint_matrix_indices() const;
+  std::vector<i_t>& get_constraint_matrix_indices();
+  const std::vector<i_t>& get_constraint_matrix_offsets() const;
+  std::vector<i_t>& get_constraint_matrix_offsets();
+  const std::vector<f_t>& get_constraint_bounds() const;
+  std::vector<f_t>& get_constraint_bounds();
+  const std::vector<f_t>& get_objective_coefficients() const;
+  std::vector<f_t>& get_objective_coefficients();
   f_t get_objective_scaling_factor() const;
   f_t get_objective_offset() const;
-  const rmm::device_uvector<f_t>& get_variable_lower_bounds() const;
-  const rmm::device_uvector<f_t>& get_variable_upper_bounds() const;
-  rmm::device_uvector<f_t>& get_variable_lower_bounds();
-  rmm::device_uvector<f_t>& get_variable_upper_bounds();
-  const rmm::device_uvector<f_t>& get_constraint_lower_bounds() const;
-  const rmm::device_uvector<f_t>& get_constraint_upper_bounds() const;
-  rmm::device_uvector<f_t>& get_constraint_lower_bounds();
-  rmm::device_uvector<f_t>& get_constraint_upper_bounds();
-  const rmm::device_uvector<char>& get_row_types() const;
-  const rmm::device_uvector<var_t>& get_variable_types() const;
+  const std::vector<f_t>& get_variable_lower_bounds() const;
+  const std::vector<f_t>& get_variable_upper_bounds() const;
+  std::vector<f_t>& get_variable_lower_bounds();
+  std::vector<f_t>& get_variable_upper_bounds();
+  const std::vector<f_t>& get_constraint_lower_bounds() const;
+  const std::vector<f_t>& get_constraint_upper_bounds() const;
+  std::vector<f_t>& get_constraint_lower_bounds();
+  std::vector<f_t>& get_constraint_upper_bounds();
+  const std::vector<char>& get_row_types() const;
+  const std::vector<var_t>& get_variable_types() const;
   bool get_sense() const;
   bool empty() const;
 
@@ -364,10 +366,6 @@ class optimization_problem_t {
                                    std::vector<i_t>& A_indices,
                                    std::vector<f_t>& A_values);
 
-  // Pointer to library handle (RAFT) containing hardware resources information
-  raft::handle_t const* handle_ptr_{nullptr};
-  rmm::cuda_stream_view stream_view_;
-
   /** problem classification */
   problem_category_t problem_category_ = problem_category_t::LP;
   /** whether to maximize or minimize the objective function */
@@ -377,33 +375,33 @@ class optimization_problem_t {
   /** number of constraints in the LP representation */
   i_t n_constraints_;
   /**
-   * the constraint matrix itself in the CSR format
+   * the constraint matrix itself in the CSR format (HOST memory)
    * @{
    */
-  rmm::device_uvector<f_t> A_;
-  rmm::device_uvector<i_t> A_indices_;
-  rmm::device_uvector<i_t> A_offsets_;
+  std::vector<f_t> A_;
+  std::vector<i_t> A_indices_;
+  std::vector<i_t> A_offsets_;
   /** @} */
-  /** RHS of the constraints */
-  rmm::device_uvector<f_t> b_;
-  /** weights in the objective function */
-  rmm::device_uvector<f_t> c_;
+  /** RHS of the constraints (HOST memory) */
+  std::vector<f_t> b_;
+  /** weights in the objective function (HOST memory) */
+  std::vector<f_t> c_;
   /** scale factor of the objective function */
   f_t objective_scaling_factor_{1};
   /** offset of the objective function */
   f_t objective_offset_{0};
-  /** lower bounds of the variables (primal part) */
-  rmm::device_uvector<f_t> variable_lower_bounds_;
-  /** upper bounds of the variables (primal part) */
-  rmm::device_uvector<f_t> variable_upper_bounds_;
-  /** lower bounds of the constraint (dual part) */
-  rmm::device_uvector<f_t> constraint_lower_bounds_;
-  /** upper bounds of the constraint (dual part) */
-  rmm::device_uvector<f_t> constraint_upper_bounds_;
-  /** Type of each constraint */
-  rmm::device_uvector<char> row_types_;
-  /** Type of each variable */
-  rmm::device_uvector<var_t> variable_types_;
+  /** lower bounds of the variables (primal part, HOST memory) */
+  std::vector<f_t> variable_lower_bounds_;
+  /** upper bounds of the variables (primal part, HOST memory) */
+  std::vector<f_t> variable_upper_bounds_;
+  /** lower bounds of the constraint (dual part, HOST memory) */
+  std::vector<f_t> constraint_lower_bounds_;
+  /** upper bounds of the constraint (dual part, HOST memory) */
+  std::vector<f_t> constraint_upper_bounds_;
+  /** Type of each constraint (HOST memory) */
+  std::vector<char> row_types_;
+  /** Type of each variable (HOST memory) */
+  std::vector<var_t> variable_types_;
   /** name of the objective (only a single objective is currently allowed) */
   std::string objective_name_;
   /** name of the problem  */
diff --git a/cpp/include/cuopt/linear_programming/pdlp/pdlp_warm_start_data.hpp b/cpp/include/cuopt/linear_programming/pdlp/pdlp_warm_start_data.hpp
index d897c7cf4..4039476c8 100644
--- a/cpp/include/cuopt/linear_programming/pdlp/pdlp_warm_start_data.hpp
+++ b/cpp/include/cuopt/linear_programming/pdlp/pdlp_warm_start_data.hpp
@@ -29,18 +29,18 @@ struct pdlp_warm_start_data_view_t;
 // Holds everything necessary to warm start PDLP
 template <typename i_t, typename f_t>
 struct pdlp_warm_start_data_t {
-  rmm::device_uvector<f_t>
+  std::vector<f_t>
     current_primal_solution_;  // Can't just be pulled from solution object as we might return the
                                // average as solution while we want to continue on optimize on the
                                // current, no the average
-  rmm::device_uvector<f_t> current_dual_solution_;   // Same as above
-  rmm::device_uvector<f_t> initial_primal_average_;  // Same as above but if current is returned
-  rmm::device_uvector<f_t> initial_dual_average_;    // Same as above
-  rmm::device_uvector<f_t> current_ATY_;
-  rmm::device_uvector<f_t> sum_primal_solutions_;
-  rmm::device_uvector<f_t> sum_dual_solutions_;
-  rmm::device_uvector<f_t> last_restart_duality_gap_primal_solution_;
-  rmm::device_uvector<f_t> last_restart_duality_gap_dual_solution_;
+  std::vector<f_t> current_dual_solution_;   // Same as above
+  std::vector<f_t> initial_primal_average_;  // Same as above but if current is returned
+  std::vector<f_t> initial_dual_average_;    // Same as above
+  std::vector<f_t> current_ATY_;
+  std::vector<f_t> sum_primal_solutions_;
+  std::vector<f_t> sum_dual_solutions_;
+  std::vector<f_t> last_restart_duality_gap_primal_solution_;
+  std::vector<f_t> last_restart_duality_gap_dual_solution_;
   f_t initial_primal_weight_{-1};
   f_t initial_step_size_{-1};
   i_t total_pdlp_iterations_{-1};
@@ -51,15 +51,15 @@ struct pdlp_warm_start_data_t {
   i_t iterations_since_last_restart_{-1};
 
   // Constructor when building it in the solution object
-  pdlp_warm_start_data_t(rmm::device_uvector<f_t>& current_primal_solution,
-                         rmm::device_uvector<f_t>& current_dual_solution,
-                         rmm::device_uvector<f_t>& initial_primal_average,
-                         rmm::device_uvector<f_t>& initial_dual_average,
-                         rmm::device_uvector<f_t>& current_ATY,
-                         rmm::device_uvector<f_t>& sum_primal_solutions,
-                         rmm::device_uvector<f_t>& sum_dual_solutions,
-                         rmm::device_uvector<f_t>& last_restart_duality_gap_primal_solution,
-                         rmm::device_uvector<f_t>& last_restart_duality_gap_dual_solution,
+  pdlp_warm_start_data_t(std::vector<f_t> current_primal_solution,
+                         std::vector<f_t> current_dual_solution,
+                         std::vector<f_t> initial_primal_average,
+                         std::vector<f_t> initial_dual_average,
+                         std::vector<f_t> current_ATY,
+                         std::vector<f_t> sum_primal_solutions,
+                         std::vector<f_t> sum_dual_solutions,
+                         std::vector<f_t> last_restart_duality_gap_primal_solution,
+                         std::vector<f_t> last_restart_duality_gap_dual_solution,
                          f_t initial_primal_weight,
                          f_t initial_step_size,
                          i_t total_pdlp_iterations,
@@ -73,12 +73,10 @@ struct pdlp_warm_start_data_t {
   pdlp_warm_start_data_t();
 
   // Copy constructor using the view version for the cython_solver
-  pdlp_warm_start_data_t(const pdlp_warm_start_data_view_t<i_t, f_t>& other,
-                         rmm::cuda_stream_view stream_view);
+  pdlp_warm_start_data_t(const pdlp_warm_start_data_view_t<i_t, f_t>& other);
 
   // Copy constructor for when copying the solver_settings object in the PDLP object
-  pdlp_warm_start_data_t(const pdlp_warm_start_data_t<i_t, f_t>& other,
-                         rmm::cuda_stream_view stream_view);
+  pdlp_warm_start_data_t(const pdlp_warm_start_data_t<i_t, f_t>& other);
 
  private:
   // Check sizes through assertion
diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp
index 11e1ec5bc..a0aeee4e4 100644
--- a/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp
+++ b/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp
@@ -108,10 +108,8 @@ class optimization_problem_solution_t : public base_solution_t {
    *
    * @param[in] termination_status_ Reason for termination. Possible values are : 'NumericalError'
    * 'Optimal', 'PrimalInfeasible', 'DualInfeasible', 'TimeLimit'
-   * @param[in] stream_view An rmm view to a stream. All computations will go through this stream
    */
-  optimization_problem_solution_t(pdlp_termination_status_t termination_status_,
-                                  rmm::cuda_stream_view stream_view);
+  optimization_problem_solution_t(pdlp_termination_status_t termination_status_);
 
   /**
    * @brief Construct an optimization problem solution that serves as PDLP solver output
@@ -119,10 +117,8 @@ class optimization_problem_solution_t : public base_solution_t {
    *
    * @param[in] error_status_ The error object, containing info about what went wrong
    * 'Optimal', 'PrimalInfeasible', 'DualInfeasible', 'TimeLimit'
-   * @param[in] stream_view An rmm view to a stream. All computations will go through this stream
    */
-  optimization_problem_solution_t(cuopt::logic_error error_status_,
-                                  rmm::cuda_stream_view stream_view);
+  optimization_problem_solution_t(cuopt::logic_error error_status_);
   /**
    * @brief Construct an optimization problem solution that serves as PDLP solver output
    *
@@ -135,23 +131,23 @@ class optimization_problem_solution_t : public base_solution_t {
    * @param[in] termination_stats The termination statistics
    * @param[in] termination_status_ The termination reason
    */
-  optimization_problem_solution_t(rmm::device_uvector<f_t>& final_primal_solution,
-                                  rmm::device_uvector<f_t>& final_dual_solution,
-                                  rmm::device_uvector<f_t>& final_reduced_cost,
-                                  pdlp_warm_start_data_t<i_t, f_t>& warm_start_data,
+  optimization_problem_solution_t(std::vector<f_t> final_primal_solution,
+                                  std::vector<f_t> final_dual_solution,
+                                  std::vector<f_t> final_reduced_cost,
+                                  pdlp_warm_start_data_t<i_t, f_t> warm_start_data,
                                   const std::string objective_name,
                                   const std::vector<std::string>& var_names,
                                   const std::vector<std::string>& row_names,
-                                  additional_termination_information_t& termination_stats,
+                                  additional_termination_information_t termination_stats,
                                   pdlp_termination_status_t termination_status_);
 
-  optimization_problem_solution_t(rmm::device_uvector<f_t>& final_primal_solution,
-                                  rmm::device_uvector<f_t>& final_dual_solution,
-                                  rmm::device_uvector<f_t>& final_reduced_cost,
+  optimization_problem_solution_t(std::vector<f_t> final_primal_solution,
+                                  std::vector<f_t> final_dual_solution,
+                                  std::vector<f_t> final_reduced_cost,
                                   const std::string objective_name,
                                   const std::vector<std::string>& var_names,
                                   const std::vector<std::string>& row_names,
-                                  additional_termination_information_t& termination_stats,
+                                  additional_termination_information_t termination_stats,
                                   pdlp_termination_status_t termination_status_);
 
   /**
@@ -167,15 +163,14 @@ class optimization_problem_solution_t : public base_solution_t {
    * @param[in] termination_stats The termination statistics
    * @param[in] termination_status_ The termination reason
    */
-  optimization_problem_solution_t(rmm::device_uvector<f_t>& final_primal_solution,
-                                  rmm::device_uvector<f_t>& final_dual_solution,
-                                  rmm::device_uvector<f_t>& final_reduced_cost,
+  optimization_problem_solution_t(const std::vector<f_t>& final_primal_solution,
+                                  const std::vector<f_t>& final_dual_solution,
+                                  const std::vector<f_t>& final_reduced_cost,
                                   const std::string objective_name,
                                   const std::vector<std::string>& var_names,
                                   const std::vector<std::string>& row_names,
-                                  additional_termination_information_t& termination_stats,
+                                  additional_termination_information_t termination_stats,
                                   pdlp_termination_status_t termination_status,
-                                  const raft::handle_t* handler_ptr,
                                   bool deep_copy);
 
   /**
@@ -223,26 +218,26 @@ class optimization_problem_solution_t : public base_solution_t {
   /**
    * @brief Returns the solution for the values of the primal variables as a vector of `f_t`.
    *
-   * @return rmm::device_uvector<i_t> The device memory container for the primal solution.
+   * @return std::vector<f_t> The host memory container for the primal solution.
    */
-  rmm::device_uvector<f_t>& get_primal_solution();
-  const rmm::device_uvector<f_t>& get_primal_solution() const;
+  std::vector<f_t>& get_primal_solution();
+  const std::vector<f_t>& get_primal_solution() const;
 
   /**
    * @brief Returns the solution for the values of the dual variables as a vector of `f_t`.
    *
-   * @return rmm::device_uvector<i_t> The device memory container for the dual solution.
+   * @return std::vector<f_t> The host memory container for the dual solution.
    */
-  rmm::device_uvector<f_t>& get_dual_solution();
-  const rmm::device_uvector<f_t>& get_dual_solution() const;
+  std::vector<f_t>& get_dual_solution();
+  const std::vector<f_t>& get_dual_solution() const;
 
   /**
    * @brief Returns the reduced cost as a vector of `f_t`. The reduced cost contains the dual
    * multipliers for the linear constraints.
    *
-   * @return rmm::device_uvector<i_t> The device memory container for the reduced cost.
+   * @return std::vector<f_t> The host memory container for the reduced cost.
    */
-  rmm::device_uvector<f_t>& get_reduced_cost();
+  std::vector<f_t>& get_reduced_cost();
 
   /**
    * @brief Get termination reason
@@ -266,23 +261,18 @@ class optimization_problem_solution_t : public base_solution_t {
   pdlp_warm_start_data_t<i_t, f_t>& get_pdlp_warm_start_data();
 
   /**
-   * @brief Writes the solver_solution object as a JSON object to the 'filename' file using
-   * 'stream_view' to transfer the data from device to host before it is written to the file.
+   * @brief Writes the solver_solution object as a JSON object to the 'filename' file.
+   * Solution is already on host memory.
    * @param filename Name of the output file
-   * @param stream_view Non-owning stream view object
    */
-  void write_to_file(std::string_view filename,
-                     rmm::cuda_stream_view stream_view,
-                     bool generate_variable_values = true);
+  void write_to_file(std::string_view filename, bool generate_variable_values = true);
 
   /**
    * @brief Writes the solver_solution object as a '.sol' file as supported by other solvers and
-   * used in MIPLIB using 'stream_view' to transfer the data from device to host before it is
-   * written to the file.
+   * used in MIPLIB. Solution is already on host memory.
    * @param filename Name of the output file
-   * @param stream_view Non-owning stream view object
    */
-  void write_to_sol_file(std::string_view filename, rmm::cuda_stream_view stream_view) const;
+  void write_to_sol_file(std::string_view filename) const;
 
   /**
    * @brief Copy solution from another solution object
@@ -292,12 +282,17 @@ class optimization_problem_solution_t : public base_solution_t {
   void copy_from(const raft::handle_t* handle_ptr,
                  const optimization_problem_solution_t<i_t, f_t>& other);
 
+  /**
+   * @brief Print solution statistics in compact single-line format
+   */
+  void print_solution_stats() const;
+
  private:
   void write_additional_termination_statistics_to_file(std::ofstream& myfile);
 
-  rmm::device_uvector<f_t> primal_solution_;
-  rmm::device_uvector<f_t> dual_solution_;
-  rmm::device_uvector<f_t> reduced_cost_;
+  std::vector<f_t> primal_solution_;
+  std::vector<f_t> dual_solution_;
+  std::vector<f_t> reduced_cost_;
   pdlp_warm_start_data_t<i_t, f_t> pdlp_warm_start_data_;
 
   pdlp_termination_status_t termination_status_;
diff --git a/cpp/include/cuopt/linear_programming/solve.hpp b/cpp/include/cuopt/linear_programming/solve.hpp
index 11e8f9dcf..e5b0b7c1c 100644
--- a/cpp/include/cuopt/linear_programming/solve.hpp
+++ b/cpp/include/cuopt/linear_programming/solve.hpp
@@ -81,6 +81,16 @@ optimization_problem_solution_t<i_t, f_t> solve_lp(
   bool problem_checking                            = true,
   bool use_pdlp_solver_mode                        = true);
 
+/**
+ * @brief Linear programming solve function without handle
+ */
+template <typename i_t, typename f_t>
+optimization_problem_solution_t<i_t, f_t> solve_lp(
+  const cuopt::mps_parser::mps_data_model_t<i_t, f_t>& mps_data_model,
+  pdlp_solver_settings_t<i_t, f_t> const& settings = pdlp_solver_settings_t<i_t, f_t>{},
+  bool problem_checking                            = true,
+  bool use_pdlp_solver_mode                        = true);
+
 /**
  * @brief Mixed integer programming solve function.
  *
@@ -108,13 +118,11 @@ mip_solution_t<i_t, f_t> solve_mip(
  */
 template <typename i_t, typename f_t>
 mip_solution_t<i_t, f_t> solve_mip(
-  raft::handle_t const* handle_ptr,
   const cuopt::mps_parser::mps_data_model_t<i_t, f_t>& mps_data_model,
   mip_solver_settings_t<i_t, f_t> const& settings = mip_solver_settings_t<i_t, f_t>{});
 
 template <typename i_t, typename f_t>
 optimization_problem_t<i_t, f_t> mps_data_model_to_optimization_problem(
-  raft::handle_t const* handle_ptr,
   const cuopt::mps_parser::mps_data_model_t<i_t, f_t>& data_model);
 
 }  // namespace cuopt::linear_programming
diff --git a/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp b/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp
index 46d672cb1..32f507786 100644
--- a/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp
+++ b/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp
@@ -34,20 +34,21 @@ namespace cython {
 
 // aggregate for call_solve() return type
 // to be exposed to cython:
+// NOTE: Uses host memory (std::vector) to support remote solving without GPU
 struct linear_programming_ret_t {
-  std::unique_ptr<rmm::device_buffer> primal_solution_;
-  std::unique_ptr<rmm::device_buffer> dual_solution_;
-  std::unique_ptr<rmm::device_buffer> reduced_cost_;
+  std::vector<double> primal_solution_;
+  std::vector<double> dual_solution_;
+  std::vector<double> reduced_cost_;
   /* -- PDLP Warm Start Data -- */
-  std::unique_ptr<rmm::device_buffer> current_primal_solution_;
-  std::unique_ptr<rmm::device_buffer> current_dual_solution_;
-  std::unique_ptr<rmm::device_buffer> initial_primal_average_;
-  std::unique_ptr<rmm::device_buffer> initial_dual_average_;
-  std::unique_ptr<rmm::device_buffer> current_ATY_;
-  std::unique_ptr<rmm::device_buffer> sum_primal_solutions_;
-  std::unique_ptr<rmm::device_buffer> sum_dual_solutions_;
-  std::unique_ptr<rmm::device_buffer> last_restart_duality_gap_primal_solution_;
-  std::unique_ptr<rmm::device_buffer> last_restart_duality_gap_dual_solution_;
+  std::vector<double> current_primal_solution_;
+  std::vector<double> current_dual_solution_;
+  std::vector<double> initial_primal_average_;
+  std::vector<double> initial_dual_average_;
+  std::vector<double> current_ATY_;
+  std::vector<double> sum_primal_solutions_;
+  std::vector<double> sum_dual_solutions_;
+  std::vector<double> last_restart_duality_gap_primal_solution_;
+  std::vector<double> last_restart_duality_gap_dual_solution_;
   double initial_primal_weight_;
   double initial_step_size_;
   int total_pdlp_iterations_;
@@ -74,7 +75,7 @@ struct linear_programming_ret_t {
 };
 
 struct mip_ret_t {
-  std::unique_ptr<rmm::device_buffer> solution_;
+  std::vector<double> solution_;
 
   linear_programming::mip_termination_status_t termination_status_;
   error_type_t error_status_;
diff --git a/cpp/include/cuopt/linear_programming/utilities/problem_conversion.cuh b/cpp/include/cuopt/linear_programming/utilities/problem_conversion.cuh
new file mode 100644
index 000000000..c97bc5be9
--- /dev/null
+++ b/cpp/include/cuopt/linear_programming/utilities/problem_conversion.cuh
@@ -0,0 +1,105 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights
+ * reserved. SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cuopt/linear_programming/gpu_optimization_problem.hpp>
+#include <cuopt/linear_programming/optimization_problem.hpp>
+
+#include <raft/common/nvtx.hpp>
+#include <raft/core/handle.hpp>
+
+namespace cuopt::linear_programming {
+
+/**
+ * @brief Convert host-based optimization_problem_t to GPU-based gpu_optimization_problem_t
+ *
+ * This function performs the actual host-to-device memory copies for local solving.
+ * For remote solving, this conversion is skipped entirely.
+ *
+ * @tparam i_t Integer type for indices
+ * @tparam f_t Floating point type for values
+ * @param handle_ptr RAFT handle for stream and device context
+ * @param host_problem The problem with data in host memory (std::vector)
+ * @return gpu_optimization_problem_t The problem with data in GPU memory (rmm::device_uvector)
+ */
+template <typename i_t, typename f_t>
+gpu_optimization_problem_t<i_t, f_t> host_to_gpu_problem(
+  raft::handle_t const* handle_ptr, const optimization_problem_t<i_t, f_t>& host_problem)
+{
+  raft::common::nvtx::range fun_scope("host_to_gpu_problem");
+
+  // Create GPU problem
+  gpu_optimization_problem_t<i_t, f_t> gpu_problem(handle_ptr);
+
+  // Copy basic properties
+  gpu_problem.set_maximize(host_problem.get_sense());
+  gpu_problem.set_objective_scaling_factor(host_problem.get_objective_scaling_factor());
+  gpu_problem.set_objective_offset(host_problem.get_objective_offset());
+  gpu_problem.set_problem_category(host_problem.get_problem_category());
+  gpu_problem.set_objective_name(host_problem.get_objective_name());
+  gpu_problem.set_problem_name(host_problem.get_problem_name());
+  gpu_problem.set_variable_names(host_problem.get_variable_names());
+  gpu_problem.set_row_names(host_problem.get_row_names());
+
+  // Copy constraint matrix (CSR format)
+  const auto& A_values  = host_problem.get_constraint_matrix_values();
+  const auto& A_indices = host_problem.get_constraint_matrix_indices();
+  const auto& A_offsets = host_problem.get_constraint_matrix_offsets();
+  if (!A_values.empty()) {
+    gpu_problem.set_csr_constraint_matrix(A_values.data(),
+                                          A_values.size(),
+                                          A_indices.data(),
+                                          A_indices.size(),
+                                          A_offsets.data(),
+                                          A_offsets.size());
+  }
+
+  // Copy objective coefficients
+  const auto& c = host_problem.get_objective_coefficients();
+  if (!c.empty()) { gpu_problem.set_objective_coefficients(c.data(), c.size()); }
+
+  // Copy constraint bounds
+  const auto& b = host_problem.get_constraint_bounds();
+  if (!b.empty()) { gpu_problem.set_constraint_bounds(b.data(), b.size()); }
+
+  // Copy variable bounds
+  const auto& var_lb = host_problem.get_variable_lower_bounds();
+  if (!var_lb.empty()) { gpu_problem.set_variable_lower_bounds(var_lb.data(), var_lb.size()); }
+
+  const auto& var_ub = host_problem.get_variable_upper_bounds();
+  if (!var_ub.empty()) { gpu_problem.set_variable_upper_bounds(var_ub.data(), var_ub.size()); }
+
+  // Copy constraint bounds
+  const auto& con_lb = host_problem.get_constraint_lower_bounds();
+  if (!con_lb.empty()) { gpu_problem.set_constraint_lower_bounds(con_lb.data(), con_lb.size()); }
+
+  const auto& con_ub = host_problem.get_constraint_upper_bounds();
+  if (!con_ub.empty()) { gpu_problem.set_constraint_upper_bounds(con_ub.data(), con_ub.size()); }
+
+  // Copy row types
+  const auto& row_types = host_problem.get_row_types();
+  if (!row_types.empty()) { gpu_problem.set_row_types(row_types.data(), row_types.size()); }
+
+  // Copy variable types
+  const auto& var_types = host_problem.get_variable_types();
+  if (!var_types.empty()) { gpu_problem.set_variable_types(var_types.data(), var_types.size()); }
+
+  return gpu_problem;
+}
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/include/cuopt/linear_programming/utilities/remote_solve.hpp b/cpp/include/cuopt/linear_programming/utilities/remote_solve.hpp
new file mode 100644
index 000000000..1511a81d9
--- /dev/null
+++ b/cpp/include/cuopt/linear_programming/utilities/remote_solve.hpp
@@ -0,0 +1,76 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights
+ * reserved. SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cuopt/linear_programming/mip/solver_settings.hpp>
+#include <cuopt/linear_programming/mip/solver_solution.hpp>
+#include <cuopt/linear_programming/optimization_problem.hpp>
+#include <cuopt/linear_programming/pdlp/solver_settings.hpp>
+#include <cuopt/linear_programming/pdlp/solver_solution.hpp>
+
+namespace cuopt::linear_programming {
+
+/**
+ * @brief Check if remote solve is enabled via environment variables
+ *
+ * Checks for CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT environment variables.
+ *
+ * @param[out] host Pointer to store the host string (or nullptr if not set)
+ * @param[out] port Pointer to store the port string (or nullptr if not set)
+ * @return true if both environment variables are set
+ */
+bool is_remote_solve_enabled(const char** host, const char** port);
+
+/**
+ * @brief Solve LP problem on remote server using Protocol Buffers
+ *
+ * Reads CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT from environment,
+ * serializes the problem and settings using Protocol Buffers,
+ * sends to remote server via TCP, and deserializes the solution.
+ *
+ * @tparam i_t Integer type for indices
+ * @tparam f_t Float type for values
+ * @param problem The optimization problem (host memory)
+ * @param settings Solver settings
+ * @return Solution from remote server
+ * @throws std::runtime_error if remote solve is not enabled or connection fails
+ */
+template <typename i_t, typename f_t>
+optimization_problem_solution_t<i_t, f_t> solve_lp_remote(
+  const optimization_problem_t<i_t, f_t>& problem,
+  const pdlp_solver_settings_t<i_t, f_t>& settings);
+
+/**
+ * @brief Solve MIP problem on remote server using Protocol Buffers
+ *
+ * Reads CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT from environment,
+ * serializes the problem and settings using Protocol Buffers,
+ * sends to remote server via TCP, and deserializes the solution.
+ *
+ * @tparam i_t Integer type for indices
+ * @tparam f_t Float type for values
+ * @param problem The optimization problem (host memory)
+ * @param settings Solver settings
+ * @return Solution from remote server
+ * @throws std::runtime_error if remote solve is not enabled or connection fails
+ */
+template <typename i_t, typename f_t>
+mip_solution_t<i_t, f_t> solve_mip_remote(const optimization_problem_t<i_t, f_t>& problem,
+                                          const mip_solver_settings_t<i_t, f_t>& settings);
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/src/dual_simplex/barrier.cu b/cpp/src/dual_simplex/barrier.cu
index 47f1218f3..40daf1696 100644
--- a/cpp/src/dual_simplex/barrier.cu
+++ b/cpp/src/dual_simplex/barrier.cu
@@ -3575,6 +3575,9 @@ lp_status_t barrier_solver_t<i_t, f_t>::solve(f_t start_time,
   } catch (const raft::cuda_error& e) {
     settings.log.debug("Error in barrier_solver_t: %s\n", e.what());
     return lp_status_t::NUMERICAL_ISSUES;
+  } catch (const rmm::out_of_memory& e) {
+    settings.log.debug("Out of memory in barrier_solver_t: %s\n", e.what());
+    return lp_status_t::NUMERICAL_ISSUES;
   }
 }
 
diff --git a/cpp/src/dual_simplex/cusparse_view.cu b/cpp/src/dual_simplex/cusparse_view.cu
index 8d2260473..0ae9ea9bc 100644
--- a/cpp/src/dual_simplex/cusparse_view.cu
+++ b/cpp/src/dual_simplex/cusparse_view.cu
@@ -138,6 +138,10 @@ cusparse_view_t<i_t, f_t>::cusparse_view_t(raft::handle_t const* handle_ptr,
     d_minus_one_(f_t(-1), handle_ptr->get_stream()),
     d_zero_(f_t(0), handle_ptr->get_stream())
 {
+  RAFT_CUBLAS_TRY(raft::linalg::detail::cublassetpointermode(
+    handle_ptr->get_cublas_handle(), CUBLAS_POINTER_MODE_DEVICE, handle_ptr->get_stream()));
+  RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsesetpointermode(
+    handle_ptr->get_cusparse_handle(), CUSPARSE_POINTER_MODE_DEVICE, handle_ptr->get_stream()));
   // TMP matrix data should already be on the GPU
   constexpr bool debug = false;
   if (debug) { printf("A hash: %zu\n", A.hash()); }
diff --git a/cpp/src/linear_programming/CMakeLists.txt b/cpp/src/linear_programming/CMakeLists.txt
index e2deef2e4..053e29715 100644
--- a/cpp/src/linear_programming/CMakeLists.txt
+++ b/cpp/src/linear_programming/CMakeLists.txt
@@ -17,7 +17,9 @@
 set(LP_CORE_FILES
   ${CMAKE_CURRENT_SOURCE_DIR}/solver_settings.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/optimization_problem.cu
+  ${CMAKE_CURRENT_SOURCE_DIR}/gpu_optimization_problem.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/utilities/problem_checking.cu
+  ${CMAKE_CURRENT_SOURCE_DIR}/utilities/remote_solve.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/solve.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/pdlp.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/pdhg.cu
diff --git a/cpp/src/linear_programming/cuopt_c.cpp b/cpp/src/linear_programming/cuopt_c.cpp
index df1fa1a4b..79569e583 100644
--- a/cpp/src/linear_programming/cuopt_c.cpp
+++ b/cpp/src/linear_programming/cuopt_c.cpp
@@ -95,8 +95,8 @@ cuopt_int_t cuOptReadProblem(const char* filename, cuOptOptimizationProblem* pro
     }
   }
   optimization_problem_t<cuopt_int_t, cuopt_float_t>* op_problem =
-    new optimization_problem_t<cuopt_int_t, cuopt_float_t>(mps_data_model_to_optimization_problem(
-      problem_and_stream->get_handle_ptr(), *mps_data_model_ptr));
+    new optimization_problem_t<cuopt_int_t, cuopt_float_t>(
+      mps_data_model_to_optimization_problem(*mps_data_model_ptr));
   problem_and_stream->op_problem = op_problem;
   *problem_ptr                   = static_cast<cuOptOptimizationProblem>(problem_and_stream);
   return CUOPT_SUCCESS;
@@ -128,8 +128,7 @@ cuopt_int_t cuOptCreateProblem(cuopt_int_t num_constraints,
   }
 
   problem_and_stream_view_t* problem_and_stream = new problem_and_stream_view_t();
-  problem_and_stream->op_problem =
-    new optimization_problem_t<cuopt_int_t, cuopt_float_t>(problem_and_stream->get_handle_ptr());
+  problem_and_stream->op_problem = new optimization_problem_t<cuopt_int_t, cuopt_float_t>();
   try {
     problem_and_stream->op_problem->set_maximize(objective_sense == CUOPT_MAXIMIZE);
     problem_and_stream->op_problem->set_objective_offset(objective_offset);
@@ -185,8 +184,7 @@ cuopt_int_t cuOptCreateRangedProblem(cuopt_int_t num_constraints,
   }
 
   problem_and_stream_view_t* problem_and_stream = new problem_and_stream_view_t();
-  problem_and_stream->op_problem =
-    new optimization_problem_t<cuopt_int_t, cuopt_float_t>(problem_and_stream->get_handle_ptr());
+  problem_and_stream->op_problem = new optimization_problem_t<cuopt_int_t, cuopt_float_t>();
   try {
     problem_and_stream->op_problem->set_maximize(objective_sense == CUOPT_MAXIMIZE);
     problem_and_stream->op_problem->set_objective_offset(objective_offset);
@@ -277,13 +275,10 @@ cuopt_int_t cuOptGetObjectiveCoefficients(cuOptOptimizationProblem problem,
   if (objective_coefficients_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& objective_coefficients =
+  const std::vector<cuopt_float_t>& objective_coefficients =
     problem_and_stream_view->op_problem->get_objective_coefficients();
-  raft::copy(objective_coefficients_ptr,
-             objective_coefficients.data(),
-             objective_coefficients.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+  std::copy(
+    objective_coefficients.begin(), objective_coefficients.end(), objective_coefficients_ptr);
   return CUOPT_SUCCESS;
 }
 
@@ -309,25 +304,21 @@ cuopt_int_t cuOptGetConstraintMatrix(cuOptOptimizationProblem problem,
   if (constraint_matrix_coefficients_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& constraint_matrix_coefficients =
+  const std::vector<cuopt_float_t>& constraint_matrix_coefficients =
     problem_and_stream_view->op_problem->get_constraint_matrix_values();
-  const rmm::device_uvector<cuopt_int_t>& constraint_matrix_column_indices =
+  const std::vector<cuopt_int_t>& constraint_matrix_column_indices =
     problem_and_stream_view->op_problem->get_constraint_matrix_indices();
-  const rmm::device_uvector<cuopt_int_t>& constraint_matrix_row_offsets =
+  const std::vector<cuopt_int_t>& constraint_matrix_row_offsets =
     problem_and_stream_view->op_problem->get_constraint_matrix_offsets();
-  raft::copy(constraint_matrix_coefficients_ptr,
-             constraint_matrix_coefficients.data(),
-             constraint_matrix_coefficients.size(),
-             problem_and_stream_view->stream_view);
-  raft::copy(constraint_matrix_column_indices_ptr,
-             constraint_matrix_column_indices.data(),
-             constraint_matrix_column_indices.size(),
-             problem_and_stream_view->stream_view);
-  raft::copy(constraint_matrix_row_offsets_ptr,
-             constraint_matrix_row_offsets.data(),
-             constraint_matrix_row_offsets.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+  std::copy(constraint_matrix_coefficients.begin(),
+            constraint_matrix_coefficients.end(),
+            constraint_matrix_coefficients_ptr);
+  std::copy(constraint_matrix_column_indices.begin(),
+            constraint_matrix_column_indices.end(),
+            constraint_matrix_column_indices_ptr);
+  std::copy(constraint_matrix_row_offsets.begin(),
+            constraint_matrix_row_offsets.end(),
+            constraint_matrix_row_offsets_ptr);
   return CUOPT_SUCCESS;
 }
 
@@ -337,13 +328,8 @@ cuopt_int_t cuOptGetConstraintSense(cuOptOptimizationProblem problem, char* cons
   if (constraint_sense_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<char>& constraint_sense =
-    problem_and_stream_view->op_problem->get_row_types();
-  raft::copy(constraint_sense_ptr,
-             constraint_sense.data(),
-             constraint_sense.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+  const std::vector<char>& constraint_sense = problem_and_stream_view->op_problem->get_row_types();
+  std::copy(constraint_sense.begin(), constraint_sense.end(), constraint_sense_ptr);
   return CUOPT_SUCCESS;
 }
 
@@ -354,10 +340,9 @@ cuopt_int_t cuOptGetConstraintRightHandSide(cuOptOptimizationProblem problem,
   if (rhs_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& rhs =
+  const std::vector<cuopt_float_t>& rhs =
     problem_and_stream_view->op_problem->get_constraint_bounds();
-  raft::copy(rhs_ptr, rhs.data(), rhs.size(), problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+  std::copy(rhs.begin(), rhs.end(), rhs_ptr);
   return CUOPT_SUCCESS;
 }
 
@@ -368,13 +353,9 @@ cuopt_int_t cuOptGetConstraintLowerBounds(cuOptOptimizationProblem problem,
   if (lower_bounds_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& lower_bounds =
+  const std::vector<cuopt_float_t>& lower_bounds =
     problem_and_stream_view->op_problem->get_constraint_lower_bounds();
-  raft::copy(lower_bounds_ptr,
-             lower_bounds.data(),
-             lower_bounds.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+  std::copy(lower_bounds.begin(), lower_bounds.end(), lower_bounds_ptr);
   return CUOPT_SUCCESS;
 }
 
@@ -385,13 +366,9 @@ cuopt_int_t cuOptGetConstraintUpperBounds(cuOptOptimizationProblem problem,
   if (upper_bounds_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& upper_bounds =
+  const std::vector<cuopt_float_t>& upper_bounds =
     problem_and_stream_view->op_problem->get_constraint_upper_bounds();
-  raft::copy(upper_bounds_ptr,
-             upper_bounds.data(),
-             upper_bounds.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+  std::copy(upper_bounds.begin(), upper_bounds.end(), upper_bounds_ptr);
   return CUOPT_SUCCESS;
 }
 
@@ -402,13 +379,9 @@ cuopt_int_t cuOptGetVariableLowerBounds(cuOptOptimizationProblem problem,
   if (lower_bounds_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& lower_bounds =
+  const std::vector<cuopt_float_t>& lower_bounds =
     problem_and_stream_view->op_problem->get_variable_lower_bounds();
-  raft::copy(lower_bounds_ptr,
-             lower_bounds.data(),
-             lower_bounds.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+  std::copy(lower_bounds.begin(), lower_bounds.end(), lower_bounds_ptr);
   return CUOPT_SUCCESS;
 }
 
@@ -419,13 +392,9 @@ cuopt_int_t cuOptGetVariableUpperBounds(cuOptOptimizationProblem problem,
   if (upper_bounds_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& upper_bounds =
+  const std::vector<cuopt_float_t>& upper_bounds =
     problem_and_stream_view->op_problem->get_variable_upper_bounds();
-  raft::copy(upper_bounds_ptr,
-             upper_bounds.data(),
-             upper_bounds.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+  std::copy(upper_bounds.begin(), upper_bounds.end(), upper_bounds_ptr);
   return CUOPT_SUCCESS;
 }
 
@@ -435,17 +404,11 @@ cuopt_int_t cuOptGetVariableTypes(cuOptOptimizationProblem problem, char* variab
   if (variable_types_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<var_t>& variable_types =
+  const std::vector<var_t>& variable_types =
     problem_and_stream_view->op_problem->get_variable_types();
-  std::vector<cuopt::linear_programming::var_t> variable_types_host(variable_types.size());
-  raft::copy(variable_types_host.data(),
-             variable_types.data(),
-             variable_types.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
-  for (size_t j = 0; j < variable_types_host.size(); j++) {
-    variable_types_ptr[j] =
-      variable_types_host[j] == var_t::INTEGER ? CUOPT_INTEGER : CUOPT_CONTINUOUS;
+  // Data is already on host - just convert enum to char
+  for (size_t j = 0; j < variable_types.size(); j++) {
+    variable_types_ptr[j] = variable_types[j] == var_t::INTEGER ? CUOPT_INTEGER : CUOPT_CONTINUOUS;
   }
   return CUOPT_SUCCESS;
 }
@@ -735,24 +698,17 @@ cuopt_int_t cuOptGetPrimalSolution(cuOptSolution solution, cuopt_float_t* soluti
     mip_solution_t<cuopt_int_t, cuopt_float_t>* mip_solution =
       static_cast<mip_solution_t<cuopt_int_t, cuopt_float_t>*>(
         solution_and_stream_view->mip_solution_ptr);
-    const rmm::device_uvector<cuopt_float_t>& solution_values = mip_solution->get_solution();
-    rmm::cuda_stream_view stream_view{};
-    raft::copy(solution_values_ptr,
-               solution_values.data(),
-               solution_values.size(),
-               solution_and_stream_view->stream_view);
-    solution_and_stream_view->stream_view.synchronize();
+    const std::vector<cuopt_float_t>& solution_values = mip_solution->get_solution();
+    // Solution is already on host, just copy
+    std::copy(solution_values.begin(), solution_values.end(), solution_values_ptr);
   } else {
     optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>* optimization_problem_solution =
       static_cast<optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>*>(
         solution_and_stream_view->lp_solution_ptr);
-    const rmm::device_uvector<cuopt_float_t>& solution_values =
+    // Solution is already on host, just copy directly
+    const std::vector<cuopt_float_t>& solution_values =
       optimization_problem_solution->get_primal_solution();
-    raft::copy(solution_values_ptr,
-               solution_values.data(),
-               solution_values.size(),
-               solution_and_stream_view->stream_view);
-    solution_and_stream_view->stream_view.synchronize();
+    std::copy(solution_values.begin(), solution_values.end(), solution_values_ptr);
   }
   return CUOPT_SUCCESS;
 }
@@ -843,13 +799,10 @@ cuopt_int_t cuOptGetDualSolution(cuOptSolution solution, cuopt_float_t* dual_sol
     optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>* optimization_problem_solution =
       static_cast<optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>*>(
         solution_and_stream_view->lp_solution_ptr);
-    const rmm::device_uvector<cuopt_float_t>& dual_solution =
+    // Solution is already on host, just copy directly
+    const std::vector<cuopt_float_t>& dual_solution =
       optimization_problem_solution->get_dual_solution();
-    raft::copy(dual_solution_ptr,
-               dual_solution.data(),
-               dual_solution.size(),
-               solution_and_stream_view->stream_view);
-    solution_and_stream_view->stream_view.synchronize();
+    std::copy(dual_solution.begin(), dual_solution.end(), dual_solution_ptr);
     return CUOPT_SUCCESS;
   }
 }
@@ -884,13 +837,10 @@ cuopt_int_t cuOptGetReducedCosts(cuOptSolution solution, cuopt_float_t* reduced_
     optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>* optimization_problem_solution =
       static_cast<optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>*>(
         solution_and_stream_view->lp_solution_ptr);
-    const rmm::device_uvector<cuopt_float_t>& reduced_cost =
+    // Solution is already on host, just copy directly
+    const std::vector<cuopt_float_t>& reduced_cost =
       optimization_problem_solution->get_reduced_cost();
-    raft::copy(reduced_cost_ptr,
-               reduced_cost.data(),
-               reduced_cost.size(),
-               solution_and_stream_view->stream_view);
-    solution_and_stream_view->stream_view.synchronize();
+    std::copy(reduced_cost.begin(), reduced_cost.end(), reduced_cost_ptr);
     return CUOPT_SUCCESS;
   }
 }
diff --git a/cpp/src/linear_programming/gpu_optimization_problem.cu b/cpp/src/linear_programming/gpu_optimization_problem.cu
new file mode 100644
index 000000000..ae4ce0ff3
--- /dev/null
+++ b/cpp/src/linear_programming/gpu_optimization_problem.cu
@@ -0,0 +1,695 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights
+ * reserved. SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cuopt/error.hpp>
+#include <cuopt/logger.hpp>
+#include <mps_parser/writer.hpp>
+
+#include <cuopt/linear_programming/gpu_optimization_problem.hpp>
+#include <mip/mip_constants.hpp>
+#include <utilities/copy_helpers.hpp>
+
+#include <raft/common/nvtx.hpp>
+#include <raft/util/cuda_utils.cuh>
+#include <raft/util/cudart_utils.hpp>
+
+#include <thrust/count.h>
+
+#include <cuda_profiler_api.h>
+
+#include <algorithm>
+
+namespace cuopt::linear_programming {
+
+template <typename i_t, typename f_t>
+gpu_optimization_problem_t<i_t, f_t>::gpu_optimization_problem_t(raft::handle_t const* handle_ptr)
+  : handle_ptr_(handle_ptr),
+    stream_view_(handle_ptr_->get_stream()),
+    maximize_{false},
+    n_vars_{0},
+    n_constraints_{0},
+    A_{0, stream_view_},
+    A_indices_{0, stream_view_},
+    A_offsets_{0, stream_view_},
+    b_{0, stream_view_},
+    c_{0, stream_view_},
+    variable_lower_bounds_{0, stream_view_},
+    variable_upper_bounds_{0, stream_view_},
+    constraint_lower_bounds_{0, stream_view_},
+    constraint_upper_bounds_{0, stream_view_},
+    row_types_{0, stream_view_},
+    variable_types_{0, stream_view_},
+    var_names_{},
+    row_names_{}
+{
+  raft::common::nvtx::range fun_scope("optimization problem construction");
+}
+
+template <typename i_t, typename f_t>
+gpu_optimization_problem_t<i_t, f_t>::gpu_optimization_problem_t(
+  const gpu_optimization_problem_t<i_t, f_t>& other)
+  : handle_ptr_(other.get_handle_ptr()),
+    stream_view_(handle_ptr_->get_stream()),
+    maximize_{other.get_sense()},
+    n_vars_{other.get_n_variables()},
+    n_constraints_{other.get_n_constraints()},
+    A_{other.get_constraint_matrix_values(), stream_view_},
+    A_indices_{other.get_constraint_matrix_indices(), stream_view_},
+    A_offsets_{other.get_constraint_matrix_offsets(), stream_view_},
+    b_{other.get_constraint_bounds(), stream_view_},
+    c_{other.get_objective_coefficients(), stream_view_},
+    objective_scaling_factor_{other.get_objective_scaling_factor()},
+    objective_offset_{other.get_objective_offset()},
+    variable_lower_bounds_{other.get_variable_lower_bounds(), stream_view_},
+    variable_upper_bounds_{other.get_variable_upper_bounds(), stream_view_},
+    constraint_lower_bounds_{other.get_constraint_lower_bounds(), stream_view_},
+    constraint_upper_bounds_{other.get_constraint_upper_bounds(), stream_view_},
+    row_types_{other.get_row_types(), stream_view_},
+    variable_types_{other.get_variable_types(), stream_view_},
+    objective_name_{other.get_objective_name()},
+    problem_name_{other.get_problem_name()},
+    problem_category_{other.get_problem_category()},
+    var_names_{other.get_variable_names()},
+    row_names_{other.get_row_names()}
+{
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_csr_constraint_matrix(const f_t* A_values,
+                                                                     i_t size_values,
+                                                                     const i_t* A_indices,
+                                                                     i_t size_indices,
+                                                                     const i_t* A_offsets,
+                                                                     i_t size_offsets)
+{
+  if (size_values != 0) {
+    cuopt_expects(A_values != nullptr, error_type_t::ValidationError, "A_values cannot be null");
+  }
+  A_.resize(size_values, stream_view_);
+  raft::copy(A_.data(), A_values, size_values, stream_view_);
+
+  if (size_indices != 0) {
+    cuopt_expects(A_indices != nullptr, error_type_t::ValidationError, "A_indices cannot be null");
+  }
+  A_indices_.resize(size_indices, stream_view_);
+  raft::copy(A_indices_.data(), A_indices, size_indices, stream_view_);
+
+  cuopt_expects(A_offsets != nullptr, error_type_t::ValidationError, "A_offsets cannot be null");
+  A_offsets_.resize(size_offsets, stream_view_);
+  raft::copy(A_offsets_.data(), A_offsets, size_offsets, stream_view_);
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_constraint_bounds(const f_t* b, i_t size)
+{
+  cuopt_expects(b != nullptr, error_type_t::ValidationError, "b cannot be null");
+  b_.resize(size, stream_view_);
+  n_constraints_ = size;
+  raft::copy(b_.data(), b, size, stream_view_);
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_objective_coefficients(const f_t* c, i_t size)
+{
+  cuopt_expects(c != nullptr, error_type_t::ValidationError, "c cannot be null");
+  c_.resize(size, stream_view_);
+  n_vars_ = size;
+  raft::copy(c_.data(), c, size, stream_view_);
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_objective_scaling_factor(
+  f_t objective_scaling_factor)
+{
+  objective_scaling_factor_ = objective_scaling_factor;
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_objective_offset(f_t objective_offset)
+{
+  objective_offset_ = objective_offset;
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_variable_lower_bounds(
+  const f_t* variable_lower_bounds, i_t size)
+{
+  if (size != 0) {
+    cuopt_expects(variable_lower_bounds != nullptr,
+                  error_type_t::ValidationError,
+                  "variable_lower_bounds cannot be null");
+  }
+  n_vars_ = size;
+  variable_lower_bounds_.resize(size, stream_view_);
+  raft::copy(variable_lower_bounds_.data(), variable_lower_bounds, size, stream_view_);
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_variable_upper_bounds(
+  const f_t* variable_upper_bounds, i_t size)
+{
+  if (size != 0) {
+    cuopt_expects(variable_upper_bounds != nullptr,
+                  error_type_t::ValidationError,
+                  "variable_upper_bounds cannot be null");
+  }
+  n_vars_ = size;
+  variable_upper_bounds_.resize(size, stream_view_);
+  raft::copy(variable_upper_bounds_.data(), variable_upper_bounds, size, stream_view_);
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_constraint_lower_bounds(
+  const f_t* constraint_lower_bounds, i_t size)
+{
+  if (size != 0) {
+    cuopt_expects(constraint_lower_bounds != nullptr,
+                  error_type_t::ValidationError,
+                  "constraint_lower_bounds cannot be null");
+  }
+  n_constraints_ = size;
+  constraint_lower_bounds_.resize(size, stream_view_);
+  raft::copy(constraint_lower_bounds_.data(), constraint_lower_bounds, size, stream_view_);
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_constraint_upper_bounds(
+  const f_t* constraint_upper_bounds, i_t size)
+{
+  if (size != 0) {
+    cuopt_expects(constraint_upper_bounds != nullptr,
+                  error_type_t::ValidationError,
+                  "constraint_upper_bounds cannot be null");
+  }
+  n_constraints_ = size;
+  constraint_upper_bounds_.resize(size, stream_view_);
+  raft::copy(constraint_upper_bounds_.data(), constraint_upper_bounds, size, stream_view_);
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_row_types(const char* row_types, i_t size)
+{
+  cuopt_expects(row_types != nullptr, error_type_t::ValidationError, "row_types cannot be null");
+  n_constraints_ = size;
+  row_types_.resize(size, stream_view_);
+  raft::copy(row_types_.data(), row_types, size, stream_view_);
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_variable_types(const var_t* var_types, i_t size)
+{
+  cuopt_expects(var_types != nullptr, error_type_t::ValidationError, "var_types cannot be null");
+  variable_types_.resize(size, stream_view_);
+  raft::copy(variable_types_.data(), var_types, size, stream_view_);
+  // TODO when having a unified problem representation
+  // compute this in a single places (currently also in problem.cu)
+  i_t n_integer = thrust::count_if(handle_ptr_->get_thrust_policy(),
+                                   variable_types_.begin(),
+                                   variable_types_.end(),
+                                   [] __device__(auto val) { return val == var_t::INTEGER; });
+  // by default it is LP
+  if (n_integer == size) {
+    problem_category_ = problem_category_t::IP;
+  } else if (n_integer > 0) {
+    problem_category_ = problem_category_t::MIP;
+  }
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_problem_category(const problem_category_t& category)
+{
+  problem_category_ = category;
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_objective_name(const std::string& objective_name)
+{
+  objective_name_ = objective_name;
+}
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_problem_name(const std::string& problem_name)
+{
+  problem_name_ = problem_name;
+}
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_variable_names(
+  const std::vector<std::string>& variable_names)
+{
+  var_names_ = variable_names;
+}
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_row_names(const std::vector<std::string>& row_names)
+{
+  row_names_ = row_names;
+}
+
+template <typename i_t, typename f_t>
+i_t gpu_optimization_problem_t<i_t, f_t>::get_n_variables() const
+{
+  return n_vars_;
+}
+
+template <typename i_t, typename f_t>
+i_t gpu_optimization_problem_t<i_t, f_t>::get_n_constraints() const
+{
+  return n_constraints_;
+}
+
+template <typename i_t, typename f_t>
+i_t gpu_optimization_problem_t<i_t, f_t>::get_nnz() const
+{
+  return A_.size();
+}
+
+template <typename i_t, typename f_t>
+i_t gpu_optimization_problem_t<i_t, f_t>::get_n_integers() const
+{
+  i_t n_integers = 0;
+  if (get_n_variables() != 0) {
+    auto enum_variable_types = cuopt::host_copy(get_variable_types());
+
+    for (size_t i = 0; i < enum_variable_types.size(); ++i) {
+      if (enum_variable_types[i] == var_t::INTEGER) { n_integers++; }
+    }
+  }
+  return n_integers;
+}
+
+template <typename i_t, typename f_t>
+raft::handle_t const* gpu_optimization_problem_t<i_t, f_t>::get_handle_ptr() const noexcept
+{
+  return handle_ptr_;
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_values()
+  const
+{
+  return A_;
+}
+
+template <typename i_t, typename f_t>
+rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_values()
+{
+  return A_;
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<i_t>&
+gpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_indices() const
+{
+  return A_indices_;
+}
+
+template <typename i_t, typename f_t>
+rmm::device_uvector<i_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_indices()
+{
+  return A_indices_;
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<i_t>&
+gpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_offsets() const
+{
+  return A_offsets_;
+}
+
+template <typename i_t, typename f_t>
+rmm::device_uvector<i_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_offsets()
+{
+  return A_offsets_;
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_bounds() const
+{
+  return b_;
+}
+
+template <typename i_t, typename f_t>
+rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_bounds()
+{
+  return b_;
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_objective_coefficients()
+  const
+{
+  return c_;
+}
+
+template <typename i_t, typename f_t>
+rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_objective_coefficients()
+{
+  return c_;
+}
+
+template <typename i_t, typename f_t>
+f_t gpu_optimization_problem_t<i_t, f_t>::get_objective_scaling_factor() const
+{
+  return objective_scaling_factor_;
+}
+
+template <typename i_t, typename f_t>
+f_t gpu_optimization_problem_t<i_t, f_t>::get_objective_offset() const
+{
+  return objective_offset_;
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_variable_lower_bounds()
+  const
+{
+  return variable_lower_bounds_;
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_variable_upper_bounds()
+  const
+{
+  return variable_upper_bounds_;
+}
+
+template <typename i_t, typename f_t>
+rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_variable_lower_bounds()
+{
+  return variable_lower_bounds_;
+}
+
+template <typename i_t, typename f_t>
+rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_variable_upper_bounds()
+{
+  return variable_upper_bounds_;
+}
+template <typename i_t, typename f_t>
+const rmm::device_uvector<var_t>& gpu_optimization_problem_t<i_t, f_t>::get_variable_types() const
+{
+  return variable_types_;
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_lower_bounds()
+  const
+{
+  return constraint_lower_bounds_;
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_upper_bounds()
+  const
+{
+  return constraint_upper_bounds_;
+}
+
+template <typename i_t, typename f_t>
+rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_lower_bounds()
+{
+  return constraint_lower_bounds_;
+}
+
+template <typename i_t, typename f_t>
+rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_upper_bounds()
+{
+  return constraint_upper_bounds_;
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<char>& gpu_optimization_problem_t<i_t, f_t>::get_row_types() const
+{
+  return row_types_;
+}
+
+template <typename i_t, typename f_t>
+std::string gpu_optimization_problem_t<i_t, f_t>::get_objective_name() const
+{
+  return objective_name_;
+}
+
+template <typename i_t, typename f_t>
+std::string gpu_optimization_problem_t<i_t, f_t>::get_problem_name() const
+{
+  return problem_name_;
+}
+
+template <typename i_t, typename f_t>
+problem_category_t gpu_optimization_problem_t<i_t, f_t>::get_problem_category() const
+{
+  return problem_category_;
+}
+
+template <typename i_t, typename f_t>
+const std::vector<std::string>& gpu_optimization_problem_t<i_t, f_t>::get_variable_names() const
+{
+  return var_names_;
+}
+
+template <typename i_t, typename f_t>
+const std::vector<std::string>& gpu_optimization_problem_t<i_t, f_t>::get_row_names() const
+{
+  return row_names_;
+}
+
+template <typename i_t, typename f_t>
+bool gpu_optimization_problem_t<i_t, f_t>::get_sense() const
+{
+  return maximize_;
+}
+
+template <typename i_t, typename f_t>
+bool gpu_optimization_problem_t<i_t, f_t>::empty() const
+{
+  return n_vars_ == 0 && n_constraints_ == 0;
+}
+
+template <typename i_t, typename f_t>
+typename gpu_optimization_problem_t<i_t, f_t>::view_t gpu_optimization_problem_t<i_t, f_t>::view()
+  const
+{
+  gpu_optimization_problem_t<i_t, f_t>::view_t v;
+  v.n_vars        = get_n_variables();
+  v.n_constraints = get_n_constraints();
+  v.nnz           = get_nnz();
+  v.A             = raft::device_span<f_t>{const_cast<f_t*>(get_constraint_matrix_values().data()),
+                                           get_constraint_matrix_values().size()};
+  v.A_indices     = raft::device_span<const i_t>{get_constraint_matrix_indices().data(),
+                                                 get_constraint_matrix_indices().size()};
+  v.A_offsets     = raft::device_span<const i_t>{get_constraint_matrix_offsets().data(),
+                                                 get_constraint_matrix_offsets().size()};
+  v.b =
+    raft::device_span<const f_t>{get_constraint_bounds().data(), get_constraint_bounds().size()};
+  v.c                       = raft::device_span<const f_t>{get_objective_coefficients().data(),
+                                                           get_objective_coefficients().size()};
+  v.variable_lower_bounds   = raft::device_span<const f_t>{get_variable_lower_bounds().data(),
+                                                           get_variable_lower_bounds().size()};
+  v.variable_upper_bounds   = raft::device_span<const f_t>{get_variable_upper_bounds().data(),
+                                                           get_variable_upper_bounds().size()};
+  v.constraint_lower_bounds = raft::device_span<const f_t>{get_constraint_lower_bounds().data(),
+                                                           get_constraint_lower_bounds().size()};
+  v.constraint_upper_bounds = raft::device_span<const f_t>{get_constraint_upper_bounds().data(),
+                                                           get_constraint_upper_bounds().size()};
+  return v;
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_maximize(bool _maximize)
+{
+  maximize_ = _maximize;
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::write_to_mps(const std::string& mps_file_path)
+{
+  cuopt::mps_parser::data_model_view_t<i_t, f_t> data_model_view;
+
+  // Set optimization sense
+  data_model_view.set_maximize(get_sense());
+
+  // Copy to host
+  auto constraint_matrix_values  = cuopt::host_copy(get_constraint_matrix_values());
+  auto constraint_matrix_indices = cuopt::host_copy(get_constraint_matrix_indices());
+  auto constraint_matrix_offsets = cuopt::host_copy(get_constraint_matrix_offsets());
+  auto constraint_bounds         = cuopt::host_copy(get_constraint_bounds());
+  auto objective_coefficients    = cuopt::host_copy(get_objective_coefficients());
+  auto variable_lower_bounds     = cuopt::host_copy(get_variable_lower_bounds());
+  auto variable_upper_bounds     = cuopt::host_copy(get_variable_upper_bounds());
+  auto constraint_lower_bounds   = cuopt::host_copy(get_constraint_lower_bounds());
+  auto constraint_upper_bounds   = cuopt::host_copy(get_constraint_upper_bounds());
+  auto row_types                 = cuopt::host_copy(get_row_types());
+
+  // Set constraint matrix in CSR format
+  if (get_nnz() != 0) {
+    data_model_view.set_csr_constraint_matrix(constraint_matrix_values.data(),
+                                              constraint_matrix_values.size(),
+                                              constraint_matrix_indices.data(),
+                                              constraint_matrix_indices.size(),
+                                              constraint_matrix_offsets.data(),
+                                              constraint_matrix_offsets.size());
+  }
+
+  // Set constraint bounds (RHS)
+  if (get_n_constraints() != 0) {
+    data_model_view.set_constraint_bounds(constraint_bounds.data(), constraint_bounds.size());
+  }
+
+  // Set objective coefficients
+  if (get_n_variables() != 0) {
+    data_model_view.set_objective_coefficients(objective_coefficients.data(),
+                                               objective_coefficients.size());
+  }
+
+  // Set objective scaling and offset
+  data_model_view.set_objective_scaling_factor(get_objective_scaling_factor());
+  data_model_view.set_objective_offset(get_objective_offset());
+
+  // Set variable bounds
+  if (get_n_variables() != 0) {
+    data_model_view.set_variable_lower_bounds(variable_lower_bounds.data(),
+                                              variable_lower_bounds.size());
+    data_model_view.set_variable_upper_bounds(variable_upper_bounds.data(),
+                                              variable_upper_bounds.size());
+  }
+
+  // Set row types (constraint types)
+  if (get_row_types().size() != 0) {
+    data_model_view.set_row_types(row_types.data(), row_types.size());
+  }
+
+  // Set constraint bounds (lower and upper)
+  if (get_constraint_lower_bounds().size() != 0 && get_constraint_upper_bounds().size() != 0) {
+    data_model_view.set_constraint_lower_bounds(constraint_lower_bounds.data(),
+                                                constraint_lower_bounds.size());
+    data_model_view.set_constraint_upper_bounds(constraint_upper_bounds.data(),
+                                                constraint_upper_bounds.size());
+  }
+
+  // Create a temporary vector to hold the converted variable types
+  std::vector<char> variable_types(get_n_variables());
+  // Set variable types (convert from enum to char)
+  if (get_n_variables() != 0) {
+    auto enum_variable_types = cuopt::host_copy(get_variable_types());
+
+    // Convert enum types to char types
+    for (size_t i = 0; i < variable_types.size(); ++i) {
+      variable_types[i] = (enum_variable_types[i] == var_t::INTEGER) ? 'I' : 'C';
+    }
+
+    data_model_view.set_variable_types(variable_types.data(), variable_types.size());
+  }
+
+  // Set problem and variable names if available
+  if (!get_problem_name().empty()) { data_model_view.set_problem_name(get_problem_name()); }
+
+  if (!get_objective_name().empty()) { data_model_view.set_objective_name(get_objective_name()); }
+
+  if (!get_variable_names().empty()) { data_model_view.set_variable_names(get_variable_names()); }
+
+  if (!get_row_names().empty()) { data_model_view.set_row_names(get_row_names()); }
+
+  cuopt::mps_parser::write_mps(data_model_view, mps_file_path);
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::print_scaling_information() const
+{
+  std::vector<f_t> constraint_matrix_values = cuopt::host_copy(get_constraint_matrix_values());
+  std::vector<f_t> constraint_rhs           = cuopt::host_copy(get_constraint_bounds());
+  std::vector<f_t> objective_coefficients   = cuopt::host_copy(get_objective_coefficients());
+  std::vector<f_t> variable_lower_bounds    = cuopt::host_copy(get_variable_lower_bounds());
+  std::vector<f_t> variable_upper_bounds    = cuopt::host_copy(get_variable_upper_bounds());
+  std::vector<f_t> constraint_lower_bounds  = cuopt::host_copy(get_constraint_lower_bounds());
+  std::vector<f_t> constraint_upper_bounds  = cuopt::host_copy(get_constraint_upper_bounds());
+
+  auto findMaxAbs = [](const std::vector<f_t>& vec) -> f_t {
+    if (vec.empty()) { return 0.0; }
+    const f_t inf = std::numeric_limits<f_t>::infinity();
+
+    const size_t sz = vec.size();
+    f_t max_abs_val = 0.0;
+    for (size_t i = 0; i < sz; ++i) {
+      const f_t val = std::abs(vec[i]);
+      if (val < inf) { max_abs_val = std::max(max_abs_val, val); }
+    }
+    return max_abs_val;
+  };
+
+  auto findMinAbs = [](const std::vector<f_t>& vec) -> f_t {
+    if (vec.empty()) { return 0.0; }
+    const size_t sz = vec.size();
+    const f_t inf   = std::numeric_limits<f_t>::infinity();
+    f_t min_abs_val = inf;
+    for (size_t i = 0; i < sz; ++i) {
+      const f_t val = std::abs(vec[i]);
+      if (val > 0.0) { min_abs_val = std::min(min_abs_val, val); }
+    }
+    return min_abs_val < inf ? min_abs_val : 0.0;
+  };
+
+  f_t A_max          = findMaxAbs(constraint_matrix_values);
+  f_t A_min          = findMinAbs(constraint_matrix_values);
+  f_t b_max          = findMaxAbs(constraint_rhs);
+  f_t b_min          = findMinAbs(constraint_rhs);
+  f_t c_max          = findMaxAbs(objective_coefficients);
+  f_t c_min          = findMinAbs(objective_coefficients);
+  f_t x_lower_max    = findMaxAbs(variable_lower_bounds);
+  f_t x_lower_min    = findMinAbs(variable_lower_bounds);
+  f_t x_upper_max    = findMaxAbs(variable_upper_bounds);
+  f_t x_upper_min    = findMinAbs(variable_upper_bounds);
+  f_t cstr_lower_max = findMaxAbs(constraint_lower_bounds);
+  f_t cstr_lower_min = findMinAbs(constraint_lower_bounds);
+  f_t cstr_upper_max = findMaxAbs(constraint_upper_bounds);
+  f_t cstr_upper_min = findMinAbs(constraint_upper_bounds);
+
+  f_t rhs_max = std::max(b_max, std::max(cstr_lower_max, cstr_upper_max));
+  f_t rhs_min = std::min(b_min, std::min(cstr_lower_min, cstr_upper_min));
+
+  f_t bound_max = std::max(x_upper_max, x_lower_max);
+  f_t bound_min = std::min(x_upper_min, x_lower_min);
+
+  CUOPT_LOG_INFO("Problem scaling:");
+  CUOPT_LOG_INFO("Objective coefficents range:          [%.0e, %.0e]", c_min, c_max);
+  CUOPT_LOG_INFO("Constraint matrix coefficients range: [%.0e, %.0e]", A_min, A_max);
+  CUOPT_LOG_INFO("Constraint rhs / bounds range:        [%.0e, %.0e]", rhs_min, rhs_max);
+  CUOPT_LOG_INFO("Variable bounds range:                [%.0e, %.0e]", bound_min, bound_max);
+
+  auto safelog10 = [](f_t x) { return x > 0 ? std::log10(x) : 0.0; };
+
+  f_t obj_range   = safelog10(c_max) - safelog10(c_min);
+  f_t A_range     = safelog10(A_max) - safelog10(A_min);
+  f_t rhs_range   = safelog10(rhs_max) - safelog10(rhs_min);
+  f_t bound_range = safelog10(bound_max) - safelog10(bound_min);
+
+  if (obj_range >= 6.0 || A_range >= 6.0 || rhs_range >= 6.0 || bound_range >= 6.0) {
+    CUOPT_LOG_INFO(
+      "Warning: input problem contains a large range of coefficients: consider reformulating to "
+      "avoid numerical difficulties.");
+  }
+  CUOPT_LOG_INFO("");
+}
+
+// NOTE: Explicitly instantiate all types here in order to avoid linker error
+#if MIP_INSTANTIATE_FLOAT
+template class gpu_optimization_problem_t<int, float>;
+#endif
+#if MIP_INSTANTIATE_DOUBLE
+template class gpu_optimization_problem_t<int, double>;
+#endif
+
+// TODO current raft to cusparse wrappers only support int64_t
+// can be CUSPARSE_INDEX_16U, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_64I
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/src/linear_programming/optimization_problem.cu b/cpp/src/linear_programming/optimization_problem.cu
index bcd2f9c4a..21162f9cf 100644
--- a/cpp/src/linear_programming/optimization_problem.cu
+++ b/cpp/src/linear_programming/optimization_problem.cu
@@ -36,23 +36,21 @@
 namespace cuopt::linear_programming {
 
 template <typename i_t, typename f_t>
-optimization_problem_t<i_t, f_t>::optimization_problem_t(raft::handle_t const* handle_ptr)
-  : handle_ptr_(handle_ptr),
-    stream_view_(handle_ptr_->get_stream()),
-    maximize_{false},
+optimization_problem_t<i_t, f_t>::optimization_problem_t()
+  : maximize_{false},
     n_vars_{0},
     n_constraints_{0},
-    A_{0, stream_view_},
-    A_indices_{0, stream_view_},
-    A_offsets_{0, stream_view_},
-    b_{0, stream_view_},
-    c_{0, stream_view_},
-    variable_lower_bounds_{0, stream_view_},
-    variable_upper_bounds_{0, stream_view_},
-    constraint_lower_bounds_{0, stream_view_},
-    constraint_upper_bounds_{0, stream_view_},
-    row_types_{0, stream_view_},
-    variable_types_{0, stream_view_},
+    A_{},
+    A_indices_{},
+    A_offsets_{},
+    b_{},
+    c_{},
+    variable_lower_bounds_{},
+    variable_upper_bounds_{},
+    constraint_lower_bounds_{},
+    constraint_upper_bounds_{},
+    row_types_{},
+    variable_types_{},
     var_names_{},
     row_names_{}
 {
@@ -62,24 +60,22 @@ optimization_problem_t<i_t, f_t>::optimization_problem_t(raft::handle_t const* h
 template <typename i_t, typename f_t>
 optimization_problem_t<i_t, f_t>::optimization_problem_t(
   const optimization_problem_t<i_t, f_t>& other)
-  : handle_ptr_(other.get_handle_ptr()),
-    stream_view_(handle_ptr_->get_stream()),
-    maximize_{other.get_sense()},
+  : maximize_{other.get_sense()},
     n_vars_{other.get_n_variables()},
     n_constraints_{other.get_n_constraints()},
-    A_{other.get_constraint_matrix_values(), stream_view_},
-    A_indices_{other.get_constraint_matrix_indices(), stream_view_},
-    A_offsets_{other.get_constraint_matrix_offsets(), stream_view_},
-    b_{other.get_constraint_bounds(), stream_view_},
-    c_{other.get_objective_coefficients(), stream_view_},
+    A_{other.get_constraint_matrix_values()},
+    A_indices_{other.get_constraint_matrix_indices()},
+    A_offsets_{other.get_constraint_matrix_offsets()},
+    b_{other.get_constraint_bounds()},
+    c_{other.get_objective_coefficients()},
     objective_scaling_factor_{other.get_objective_scaling_factor()},
     objective_offset_{other.get_objective_offset()},
-    variable_lower_bounds_{other.get_variable_lower_bounds(), stream_view_},
-    variable_upper_bounds_{other.get_variable_upper_bounds(), stream_view_},
-    constraint_lower_bounds_{other.get_constraint_lower_bounds(), stream_view_},
-    constraint_upper_bounds_{other.get_constraint_upper_bounds(), stream_view_},
-    row_types_{other.get_row_types(), stream_view_},
-    variable_types_{other.get_variable_types(), stream_view_},
+    variable_lower_bounds_{other.get_variable_lower_bounds()},
+    variable_upper_bounds_{other.get_variable_upper_bounds()},
+    constraint_lower_bounds_{other.get_constraint_lower_bounds()},
+    constraint_upper_bounds_{other.get_constraint_upper_bounds()},
+    row_types_{other.get_row_types()},
+    variable_types_{other.get_variable_types()},
     objective_name_{other.get_objective_name()},
     problem_name_{other.get_problem_name()},
     problem_category_{other.get_problem_category()},
@@ -99,36 +95,36 @@ void optimization_problem_t<i_t, f_t>::set_csr_constraint_matrix(const f_t* A_va
   if (size_values != 0) {
     cuopt_expects(A_values != nullptr, error_type_t::ValidationError, "A_values cannot be null");
   }
-  A_.resize(size_values, stream_view_);
-  raft::copy(A_.data(), A_values, size_values, stream_view_);
+  A_.resize(size_values);
+  if (size_values > 0) { std::copy(A_values, A_values + size_values, A_.begin()); }
 
   if (size_indices != 0) {
     cuopt_expects(A_indices != nullptr, error_type_t::ValidationError, "A_indices cannot be null");
   }
-  A_indices_.resize(size_indices, stream_view_);
-  raft::copy(A_indices_.data(), A_indices, size_indices, stream_view_);
+  A_indices_.resize(size_indices);
+  if (size_indices > 0) { std::copy(A_indices, A_indices + size_indices, A_indices_.begin()); }
 
   cuopt_expects(A_offsets != nullptr, error_type_t::ValidationError, "A_offsets cannot be null");
-  A_offsets_.resize(size_offsets, stream_view_);
-  raft::copy(A_offsets_.data(), A_offsets, size_offsets, stream_view_);
+  A_offsets_.resize(size_offsets);
+  if (size_offsets > 0) { std::copy(A_offsets, A_offsets + size_offsets, A_offsets_.begin()); }
 }
 
 template <typename i_t, typename f_t>
 void optimization_problem_t<i_t, f_t>::set_constraint_bounds(const f_t* b, i_t size)
 {
   cuopt_expects(b != nullptr, error_type_t::ValidationError, "b cannot be null");
-  b_.resize(size, stream_view_);
+  b_.resize(size);
   n_constraints_ = size;
-  raft::copy(b_.data(), b, size, stream_view_);
+  if (size > 0) { std::copy(b, b + size, b_.begin()); }
 }
 
 template <typename i_t, typename f_t>
 void optimization_problem_t<i_t, f_t>::set_objective_coefficients(const f_t* c, i_t size)
 {
   cuopt_expects(c != nullptr, error_type_t::ValidationError, "c cannot be null");
-  c_.resize(size, stream_view_);
+  c_.resize(size);
   n_vars_ = size;
-  raft::copy(c_.data(), c, size, stream_view_);
+  if (size > 0) { std::copy(c, c + size, c_.begin()); }
 }
 
 template <typename i_t, typename f_t>
@@ -153,8 +149,10 @@ void optimization_problem_t<i_t, f_t>::set_variable_lower_bounds(const f_t* vari
                   "variable_lower_bounds cannot be null");
   }
   n_vars_ = size;
-  variable_lower_bounds_.resize(size, stream_view_);
-  raft::copy(variable_lower_bounds_.data(), variable_lower_bounds, size, stream_view_);
+  variable_lower_bounds_.resize(size);
+  if (size > 0) {
+    std::copy(variable_lower_bounds, variable_lower_bounds + size, variable_lower_bounds_.begin());
+  }
 }
 
 template <typename i_t, typename f_t>
@@ -167,8 +165,10 @@ void optimization_problem_t<i_t, f_t>::set_variable_upper_bounds(const f_t* vari
                   "variable_upper_bounds cannot be null");
   }
   n_vars_ = size;
-  variable_upper_bounds_.resize(size, stream_view_);
-  raft::copy(variable_upper_bounds_.data(), variable_upper_bounds, size, stream_view_);
+  variable_upper_bounds_.resize(size);
+  if (size > 0) {
+    std::copy(variable_upper_bounds, variable_upper_bounds + size, variable_upper_bounds_.begin());
+  }
 }
 
 template <typename i_t, typename f_t>
@@ -181,8 +181,11 @@ void optimization_problem_t<i_t, f_t>::set_constraint_lower_bounds(
                   "constraint_lower_bounds cannot be null");
   }
   n_constraints_ = size;
-  constraint_lower_bounds_.resize(size, stream_view_);
-  raft::copy(constraint_lower_bounds_.data(), constraint_lower_bounds, size, stream_view_);
+  constraint_lower_bounds_.resize(size);
+  if (size > 0) {
+    std::copy(
+      constraint_lower_bounds, constraint_lower_bounds + size, constraint_lower_bounds_.begin());
+  }
 }
 
 template <typename i_t, typename f_t>
@@ -195,8 +198,11 @@ void optimization_problem_t<i_t, f_t>::set_constraint_upper_bounds(
                   "constraint_upper_bounds cannot be null");
   }
   n_constraints_ = size;
-  constraint_upper_bounds_.resize(size, stream_view_);
-  raft::copy(constraint_upper_bounds_.data(), constraint_upper_bounds, size, stream_view_);
+  constraint_upper_bounds_.resize(size);
+  if (size > 0) {
+    std::copy(
+      constraint_upper_bounds, constraint_upper_bounds + size, constraint_upper_bounds_.begin());
+  }
 }
 
 template <typename i_t, typename f_t>
@@ -204,22 +210,19 @@ void optimization_problem_t<i_t, f_t>::set_row_types(const char* row_types, i_t
 {
   cuopt_expects(row_types != nullptr, error_type_t::ValidationError, "row_types cannot be null");
   n_constraints_ = size;
-  row_types_.resize(size, stream_view_);
-  raft::copy(row_types_.data(), row_types, size, stream_view_);
+  row_types_.resize(size);
+  if (size > 0) { std::copy(row_types, row_types + size, row_types_.begin()); }
 }
 
 template <typename i_t, typename f_t>
 void optimization_problem_t<i_t, f_t>::set_variable_types(const var_t* var_types, i_t size)
 {
   cuopt_expects(var_types != nullptr, error_type_t::ValidationError, "var_types cannot be null");
-  variable_types_.resize(size, stream_view_);
-  raft::copy(variable_types_.data(), var_types, size, stream_view_);
-  // TODO when having a unified problem representation
-  // compute this in a single places (currently also in problem.cu)
-  i_t n_integer = thrust::count_if(handle_ptr_->get_thrust_policy(),
-                                   variable_types_.begin(),
-                                   variable_types_.end(),
-                                   [] __device__(auto val) { return val == var_t::INTEGER; });
+  variable_types_.resize(size);
+  if (size > 0) { std::copy(var_types, var_types + size, variable_types_.begin()); }
+  // Count integer variables on host
+  i_t n_integer = std::count_if(
+    variable_types_.begin(), variable_types_.end(), [](auto val) { return val == var_t::INTEGER; });
   // by default it is LP
   if (n_integer == size) {
     problem_category_ = problem_category_t::IP;
@@ -279,80 +282,70 @@ i_t optimization_problem_t<i_t, f_t>::get_n_integers() const
 {
   i_t n_integers = 0;
   if (get_n_variables() != 0) {
-    auto enum_variable_types = cuopt::host_copy(get_variable_types());
-
-    for (size_t i = 0; i < enum_variable_types.size(); ++i) {
-      if (enum_variable_types[i] == var_t::INTEGER) { n_integers++; }
+    const auto& variable_types = get_variable_types();
+    for (size_t i = 0; i < variable_types.size(); ++i) {
+      if (variable_types[i] == var_t::INTEGER) { n_integers++; }
     }
   }
   return n_integers;
 }
 
 template <typename i_t, typename f_t>
-raft::handle_t const* optimization_problem_t<i_t, f_t>::get_handle_ptr() const noexcept
-{
-  return handle_ptr_;
-}
-
-template <typename i_t, typename f_t>
-const rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_matrix_values()
-  const
+const std::vector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_matrix_values() const
 {
   return A_;
 }
 
 template <typename i_t, typename f_t>
-rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_matrix_values()
+std::vector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_matrix_values()
 {
   return A_;
 }
 
 template <typename i_t, typename f_t>
-const rmm::device_uvector<i_t>& optimization_problem_t<i_t, f_t>::get_constraint_matrix_indices()
-  const
+const std::vector<i_t>& optimization_problem_t<i_t, f_t>::get_constraint_matrix_indices() const
 {
   return A_indices_;
 }
 
 template <typename i_t, typename f_t>
-rmm::device_uvector<i_t>& optimization_problem_t<i_t, f_t>::get_constraint_matrix_indices()
+std::vector<i_t>& optimization_problem_t<i_t, f_t>::get_constraint_matrix_indices()
 {
   return A_indices_;
 }
 
 template <typename i_t, typename f_t>
-const rmm::device_uvector<i_t>& optimization_problem_t<i_t, f_t>::get_constraint_matrix_offsets()
-  const
+const std::vector<i_t>& optimization_problem_t<i_t, f_t>::get_constraint_matrix_offsets() const
 {
   return A_offsets_;
 }
 
 template <typename i_t, typename f_t>
-rmm::device_uvector<i_t>& optimization_problem_t<i_t, f_t>::get_constraint_matrix_offsets()
+std::vector<i_t>& optimization_problem_t<i_t, f_t>::get_constraint_matrix_offsets()
 {
   return A_offsets_;
 }
 
 template <typename i_t, typename f_t>
-const rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_bounds() const
+const std::vector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_bounds() const
 {
   return b_;
 }
 
 template <typename i_t, typename f_t>
-rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_bounds()
+std::vector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_bounds()
 {
   return b_;
 }
 
 template <typename i_t, typename f_t>
-const rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_objective_coefficients() const
+const std::vector<f_t>& optimization_problem_t<i_t, f_t>::get_objective_coefficients() const
 {
   return c_;
 }
 
 template <typename i_t, typename f_t>
-rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_objective_coefficients()
+std::vector<f_t>& optimization_problem_t<i_t, f_t>::get_objective_coefficients()
 {
   return c_;
 }
@@ -370,62 +363,60 @@ f_t optimization_problem_t<i_t, f_t>::get_objective_offset() const
 }
 
 template <typename i_t, typename f_t>
-const rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_variable_lower_bounds() const
+const std::vector<f_t>& optimization_problem_t<i_t, f_t>::get_variable_lower_bounds() const
 {
   return variable_lower_bounds_;
 }
 
 template <typename i_t, typename f_t>
-const rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_variable_upper_bounds() const
+const std::vector<f_t>& optimization_problem_t<i_t, f_t>::get_variable_upper_bounds() const
 {
   return variable_upper_bounds_;
 }
 
 template <typename i_t, typename f_t>
-rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_variable_lower_bounds()
+std::vector<f_t>& optimization_problem_t<i_t, f_t>::get_variable_lower_bounds()
 {
   return variable_lower_bounds_;
 }
 
 template <typename i_t, typename f_t>
-rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_variable_upper_bounds()
+std::vector<f_t>& optimization_problem_t<i_t, f_t>::get_variable_upper_bounds()
 {
   return variable_upper_bounds_;
 }
 template <typename i_t, typename f_t>
-const rmm::device_uvector<var_t>& optimization_problem_t<i_t, f_t>::get_variable_types() const
+const std::vector<var_t>& optimization_problem_t<i_t, f_t>::get_variable_types() const
 {
   return variable_types_;
 }
 
 template <typename i_t, typename f_t>
-const rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_lower_bounds()
-  const
+const std::vector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_lower_bounds() const
 {
   return constraint_lower_bounds_;
 }
 
 template <typename i_t, typename f_t>
-const rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_upper_bounds()
-  const
+const std::vector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_upper_bounds() const
 {
   return constraint_upper_bounds_;
 }
 
 template <typename i_t, typename f_t>
-rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_lower_bounds()
+std::vector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_lower_bounds()
 {
   return constraint_lower_bounds_;
 }
 
 template <typename i_t, typename f_t>
-rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_upper_bounds()
+std::vector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_upper_bounds()
 {
   return constraint_upper_bounds_;
 }
 
 template <typename i_t, typename f_t>
-const rmm::device_uvector<char>& optimization_problem_t<i_t, f_t>::get_row_types() const
+const std::vector<char>& optimization_problem_t<i_t, f_t>::get_row_types() const
 {
   return row_types_;
 }
@@ -514,17 +505,17 @@ void optimization_problem_t<i_t, f_t>::write_to_mps(const std::string& mps_file_
   // Set optimization sense
   data_model_view.set_maximize(get_sense());
 
-  // Copy to host
-  auto constraint_matrix_values  = cuopt::host_copy(get_constraint_matrix_values());
-  auto constraint_matrix_indices = cuopt::host_copy(get_constraint_matrix_indices());
-  auto constraint_matrix_offsets = cuopt::host_copy(get_constraint_matrix_offsets());
-  auto constraint_bounds         = cuopt::host_copy(get_constraint_bounds());
-  auto objective_coefficients    = cuopt::host_copy(get_objective_coefficients());
-  auto variable_lower_bounds     = cuopt::host_copy(get_variable_lower_bounds());
-  auto variable_upper_bounds     = cuopt::host_copy(get_variable_upper_bounds());
-  auto constraint_lower_bounds   = cuopt::host_copy(get_constraint_lower_bounds());
-  auto constraint_upper_bounds   = cuopt::host_copy(get_constraint_upper_bounds());
-  auto row_types                 = cuopt::host_copy(get_row_types());
+  // Data is already on host - just get references
+  const auto& constraint_matrix_values  = get_constraint_matrix_values();
+  const auto& constraint_matrix_indices = get_constraint_matrix_indices();
+  const auto& constraint_matrix_offsets = get_constraint_matrix_offsets();
+  const auto& constraint_bounds         = get_constraint_bounds();
+  const auto& objective_coefficients    = get_objective_coefficients();
+  const auto& variable_lower_bounds     = get_variable_lower_bounds();
+  const auto& variable_upper_bounds     = get_variable_upper_bounds();
+  const auto& constraint_lower_bounds   = get_constraint_lower_bounds();
+  const auto& constraint_upper_bounds   = get_constraint_upper_bounds();
+  const auto& row_types                 = get_row_types();
 
   // Set constraint matrix in CSR format
   if (get_nnz() != 0) {
@@ -576,7 +567,7 @@ void optimization_problem_t<i_t, f_t>::write_to_mps(const std::string& mps_file_
   std::vector<char> variable_types(get_n_variables());
   // Set variable types (convert from enum to char)
   if (get_n_variables() != 0) {
-    auto enum_variable_types = cuopt::host_copy(get_variable_types());
+    const auto& enum_variable_types = get_variable_types();
 
     // Convert enum types to char types
     for (size_t i = 0; i < variable_types.size(); ++i) {
@@ -601,13 +592,14 @@ void optimization_problem_t<i_t, f_t>::write_to_mps(const std::string& mps_file_
 template <typename i_t, typename f_t>
 void optimization_problem_t<i_t, f_t>::print_scaling_information() const
 {
-  std::vector<f_t> constraint_matrix_values = cuopt::host_copy(get_constraint_matrix_values());
-  std::vector<f_t> constraint_rhs           = cuopt::host_copy(get_constraint_bounds());
-  std::vector<f_t> objective_coefficients   = cuopt::host_copy(get_objective_coefficients());
-  std::vector<f_t> variable_lower_bounds    = cuopt::host_copy(get_variable_lower_bounds());
-  std::vector<f_t> variable_upper_bounds    = cuopt::host_copy(get_variable_upper_bounds());
-  std::vector<f_t> constraint_lower_bounds  = cuopt::host_copy(get_constraint_lower_bounds());
-  std::vector<f_t> constraint_upper_bounds  = cuopt::host_copy(get_constraint_upper_bounds());
+  // Data is already on host - just get references
+  const std::vector<f_t>& constraint_matrix_values = get_constraint_matrix_values();
+  const std::vector<f_t>& constraint_rhs           = get_constraint_bounds();
+  const std::vector<f_t>& objective_coefficients   = get_objective_coefficients();
+  const std::vector<f_t>& variable_lower_bounds    = get_variable_lower_bounds();
+  const std::vector<f_t>& variable_upper_bounds    = get_variable_upper_bounds();
+  const std::vector<f_t>& constraint_lower_bounds  = get_constraint_lower_bounds();
+  const std::vector<f_t>& constraint_upper_bounds  = get_constraint_upper_bounds();
 
   auto findMaxAbs = [](const std::vector<f_t>& vec) -> f_t {
     if (vec.empty()) { return 0.0; }
diff --git a/cpp/src/linear_programming/pdlp.cu b/cpp/src/linear_programming/pdlp.cu
index 4b1fc7891..48218432b 100644
--- a/cpp/src/linear_programming/pdlp.cu
+++ b/cpp/src/linear_programming/pdlp.cu
@@ -127,7 +127,7 @@ pdlp_solver_t<i_t, f_t>::pdlp_solver_t(problem_t<i_t, f_t>& op_problem,
     initial_primal_{0, stream_view_},
     initial_dual_{0, stream_view_},
 
-    best_primal_solution_so_far{pdlp_termination_status_t::TimeLimit, stream_view_},
+    best_primal_solution_so_far{pdlp_termination_status_t::TimeLimit},
     inside_mip_{false}
 {
   if (settings.has_initial_primal_solution()) {
@@ -253,6 +253,17 @@ void pdlp_solver_t<i_t, f_t>::set_initial_primal_solution(
              stream_view_);
 }
 
+template <typename i_t, typename f_t>
+void pdlp_solver_t<i_t, f_t>::set_initial_primal_solution(
+  const std::vector<f_t>& initial_primal_solution)
+{
+  initial_primal_.resize(initial_primal_solution.size(), stream_view_);
+  raft::copy(initial_primal_.data(),
+             initial_primal_solution.data(),
+             initial_primal_solution.size(),
+             stream_view_);
+}
+
 template <typename i_t, typename f_t>
 void pdlp_solver_t<i_t, f_t>::set_initial_dual_solution(
   const rmm::device_uvector<f_t>& initial_dual_solution)
@@ -262,6 +273,15 @@ void pdlp_solver_t<i_t, f_t>::set_initial_dual_solution(
     initial_dual_.data(), initial_dual_solution.data(), initial_dual_solution.size(), stream_view_);
 }
 
+template <typename i_t, typename f_t>
+void pdlp_solver_t<i_t, f_t>::set_initial_dual_solution(
+  const std::vector<f_t>& initial_dual_solution)
+{
+  initial_dual_.resize(initial_dual_solution.size(), stream_view_);
+  raft::copy(
+    initial_dual_.data(), initial_dual_solution.data(), initial_dual_solution.size(), stream_view_);
+}
+
 static bool time_limit_reached(const timer_t& timer) { return timer.check_time_limit(); }
 
 template <typename i_t, typename f_t>
@@ -482,16 +502,39 @@ void pdlp_solver_t<i_t, f_t>::record_best_primal_so_far(
 template <typename i_t, typename f_t>
 pdlp_warm_start_data_t<i_t, f_t> pdlp_solver_t<i_t, f_t>::get_filled_warmed_start_data()
 {
+  // Convert device vectors to host for warm start data
+  auto device_to_host = [this](const rmm::device_uvector<f_t>& device_vec) {
+    std::vector<f_t> host_vec(device_vec.size());
+    raft::copy(host_vec.data(), device_vec.data(), device_vec.size(), stream_view_);
+    return host_vec;
+  };
+
+  auto host_primal     = device_to_host(pdhg_solver_.get_primal_solution());
+  auto host_dual       = device_to_host(pdhg_solver_.get_dual_solution());
+  auto host_primal_avg = device_to_host(unscaled_primal_avg_solution_);
+  auto host_dual_avg   = device_to_host(unscaled_dual_avg_solution_);
+  auto host_AtY        = device_to_host(pdhg_solver_.get_saddle_point_state().get_current_AtY());
+  auto host_sum_primal =
+    device_to_host(restart_strategy_.weighted_average_solution_.sum_primal_solutions_);
+  auto host_sum_dual =
+    device_to_host(restart_strategy_.weighted_average_solution_.sum_dual_solutions_);
+  auto host_restart_primal =
+    device_to_host(restart_strategy_.last_restart_duality_gap_.primal_solution_);
+  auto host_restart_dual =
+    device_to_host(restart_strategy_.last_restart_duality_gap_.dual_solution_);
+
+  RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_));
+
   return pdlp_warm_start_data_t<i_t, f_t>(
-    pdhg_solver_.get_primal_solution(),
-    pdhg_solver_.get_dual_solution(),
-    unscaled_primal_avg_solution_,
-    unscaled_dual_avg_solution_,
-    pdhg_solver_.get_saddle_point_state().get_current_AtY(),
-    restart_strategy_.weighted_average_solution_.sum_primal_solutions_,
-    restart_strategy_.weighted_average_solution_.sum_dual_solutions_,
-    restart_strategy_.last_restart_duality_gap_.primal_solution_,
-    restart_strategy_.last_restart_duality_gap_.dual_solution_,
+    std::move(host_primal),
+    std::move(host_dual),
+    std::move(host_primal_avg),
+    std::move(host_dual_avg),
+    std::move(host_AtY),
+    std::move(host_sum_primal),
+    std::move(host_sum_dual),
+    std::move(host_restart_primal),
+    std::move(host_restart_dual),
     get_primal_weight_h(),
     get_step_size_h(),
     total_pdlp_iterations_,
@@ -822,8 +865,7 @@ std::optional<optimization_problem_solution_t<i_t, f_t>> pdlp_solver_t<i_t, f_t>
 #endif
     print_final_termination_criteria(
       timer, current_termination_strategy_.get_convergence_information(), termination_current);
-    return optimization_problem_solution_t<i_t, f_t>{pdlp_termination_status_t::NumericalError,
-                                                     stream_view_};
+    return optimization_problem_solution_t<i_t, f_t>{pdlp_termination_status_t::NumericalError};
   }
 
   // If not infeasible and not pdlp_termination_status_t::Optimal and no error, record best so far
@@ -1339,8 +1381,7 @@ optimization_problem_solution_t<i_t, f_t> pdlp_solver_t<i_t, f_t>::run_solver(co
     if (pdlp_hyper_params::never_restart_to_average)
       restart_strategy_.increment_iteration_since_last_restart();
   }
-  return optimization_problem_solution_t<i_t, f_t>{pdlp_termination_status_t::NumericalError,
-                                                   stream_view_};
+  return optimization_problem_solution_t<i_t, f_t>{pdlp_termination_status_t::NumericalError};
 }
 
 template <typename i_t, typename f_t>
diff --git a/cpp/src/linear_programming/pdlp.cuh b/cpp/src/linear_programming/pdlp.cuh
index 72d6b14d4..1911028c5 100644
--- a/cpp/src/linear_programming/pdlp.cuh
+++ b/cpp/src/linear_programming/pdlp.cuh
@@ -84,7 +84,9 @@ class pdlp_solver_t {
   // Interface to let MIP set an initial solution
   // Users will keep on using the optimization_problem to provide an initial solution
   void set_initial_primal_solution(const rmm::device_uvector<f_t>& initial_primal_solution);
+  void set_initial_primal_solution(const std::vector<f_t>& initial_primal_solution);
   void set_initial_dual_solution(const rmm::device_uvector<f_t>& initial_dual_solution);
+  void set_initial_dual_solution(const std::vector<f_t>& initial_dual_solution);
   void set_initial_primal_weight(f_t initial_primal_weight);
   void set_initial_step_size(f_t initial_primal_weight);
   void set_initial_k(i_t initial_k);
diff --git a/cpp/src/linear_programming/pdlp_warm_start_data.cu b/cpp/src/linear_programming/pdlp_warm_start_data.cu
index f805f994a..69187228a 100644
--- a/cpp/src/linear_programming/pdlp_warm_start_data.cu
+++ b/cpp/src/linear_programming/pdlp_warm_start_data.cu
@@ -30,15 +30,15 @@ namespace cuopt::linear_programming {
 
 template <typename i_t, typename f_t>
 pdlp_warm_start_data_t<i_t, f_t>::pdlp_warm_start_data_t(
-  rmm::device_uvector<f_t>& current_primal_solution,
-  rmm::device_uvector<f_t>& current_dual_solution,
-  rmm::device_uvector<f_t>& initial_primal_average,
-  rmm::device_uvector<f_t>& initial_dual_average,
-  rmm::device_uvector<f_t>& current_ATY,
-  rmm::device_uvector<f_t>& sum_primal_solutions,
-  rmm::device_uvector<f_t>& sum_dual_solutions,
-  rmm::device_uvector<f_t>& last_restart_duality_gap_primal_solution,
-  rmm::device_uvector<f_t>& last_restart_duality_gap_dual_solution,
+  std::vector<f_t> current_primal_solution,
+  std::vector<f_t> current_dual_solution,
+  std::vector<f_t> initial_primal_average,
+  std::vector<f_t> initial_dual_average,
+  std::vector<f_t> current_ATY,
+  std::vector<f_t> sum_primal_solutions,
+  std::vector<f_t> sum_dual_solutions,
+  std::vector<f_t> last_restart_duality_gap_primal_solution,
+  std::vector<f_t> last_restart_duality_gap_dual_solution,
   f_t initial_primal_weight,
   f_t initial_step_size,
   i_t total_pdlp_iterations,
@@ -47,13 +47,10 @@ pdlp_warm_start_data_t<i_t, f_t>::pdlp_warm_start_data_t(
   f_t last_restart_kkt_score,
   f_t sum_solution_weight,
   i_t iterations_since_last_restart)
-  :  // When initially creating this object, we can't move neither the primal/dual solution nor
-     // the average since they might be used as a solution by the solution object, they have to be
-     // copied
-    current_primal_solution_(current_primal_solution, current_primal_solution.stream()),
-    current_dual_solution_(current_dual_solution, current_dual_solution.stream()),
-    initial_primal_average_(initial_primal_average, initial_primal_average.stream()),
-    initial_dual_average_(initial_dual_average, initial_dual_average.stream()),
+  : current_primal_solution_(std::move(current_primal_solution)),
+    current_dual_solution_(std::move(current_dual_solution)),
+    initial_primal_average_(std::move(initial_primal_average)),
+    initial_dual_average_(std::move(initial_dual_average)),
     current_ATY_(std::move(current_ATY)),
     sum_primal_solutions_(std::move(sum_primal_solutions)),
     sum_dual_solutions_(std::move(sum_dual_solutions)),
@@ -73,33 +70,31 @@ pdlp_warm_start_data_t<i_t, f_t>::pdlp_warm_start_data_t(
 
 template <typename i_t, typename f_t>
 pdlp_warm_start_data_t<i_t, f_t>::pdlp_warm_start_data_t()
-  : current_primal_solution_{rmm::device_uvector<f_t>(0, rmm::cuda_stream_default)},
-    current_dual_solution_{rmm::device_uvector<f_t>(0, rmm::cuda_stream_default)},
-    initial_primal_average_{rmm::device_uvector<f_t>(0, rmm::cuda_stream_default)},
-    initial_dual_average_{rmm::device_uvector<f_t>(0, rmm::cuda_stream_default)},
-    current_ATY_{rmm::device_uvector<f_t>(0, rmm::cuda_stream_default)},
-    sum_primal_solutions_{rmm::device_uvector<f_t>(0, rmm::cuda_stream_default)},
-    sum_dual_solutions_{rmm::device_uvector<f_t>(0, rmm::cuda_stream_default)},
-    last_restart_duality_gap_primal_solution_{
-      rmm::device_uvector<f_t>(0, rmm::cuda_stream_default)},
-    last_restart_duality_gap_dual_solution_{rmm::device_uvector<f_t>(0, rmm::cuda_stream_default)}
+  : current_primal_solution_(),
+    current_dual_solution_(),
+    initial_primal_average_(),
+    initial_dual_average_(),
+    current_ATY_(),
+    sum_primal_solutions_(),
+    sum_dual_solutions_(),
+    last_restart_duality_gap_primal_solution_(),
+    last_restart_duality_gap_dual_solution_()
 {
 }
 
 template <typename i_t, typename f_t>
 pdlp_warm_start_data_t<i_t, f_t>::pdlp_warm_start_data_t(
-  const pdlp_warm_start_data_view_t<i_t, f_t>& other, rmm::cuda_stream_view stream_view)
-  : current_primal_solution_(other.current_primal_solution_.size(), stream_view),
-    current_dual_solution_(other.current_dual_solution_.size(), stream_view),
-    initial_primal_average_(other.initial_primal_average_.size(), stream_view),
-    initial_dual_average_(other.initial_dual_average_.size(), stream_view),
-    current_ATY_(other.current_ATY_.size(), stream_view),
-    sum_primal_solutions_(other.sum_primal_solutions_.size(), stream_view),
-    sum_dual_solutions_(other.sum_dual_solutions_.size(), stream_view),
+  const pdlp_warm_start_data_view_t<i_t, f_t>& other)
+  : current_primal_solution_(other.current_primal_solution_.size()),
+    current_dual_solution_(other.current_dual_solution_.size()),
+    initial_primal_average_(other.initial_primal_average_.size()),
+    initial_dual_average_(other.initial_dual_average_.size()),
+    current_ATY_(other.current_ATY_.size()),
+    sum_primal_solutions_(other.sum_primal_solutions_.size()),
+    sum_dual_solutions_(other.sum_dual_solutions_.size()),
     last_restart_duality_gap_primal_solution_(
-      other.last_restart_duality_gap_primal_solution_.size(), stream_view),
-    last_restart_duality_gap_dual_solution_(other.last_restart_duality_gap_dual_solution_.size(),
-                                            stream_view),
+      other.last_restart_duality_gap_primal_solution_.size()),
+    last_restart_duality_gap_dual_solution_(other.last_restart_duality_gap_dual_solution_.size()),
     initial_primal_weight_(other.initial_primal_weight_),
     initial_step_size_(other.initial_step_size_),
     total_pdlp_iterations_(other.total_pdlp_iterations_),
@@ -109,58 +104,26 @@ pdlp_warm_start_data_t<i_t, f_t>::pdlp_warm_start_data_t(
     sum_solution_weight_(other.sum_solution_weight_),
     iterations_since_last_restart_(other.iterations_since_last_restart_)
 {
-  raft::copy(current_primal_solution_.data(),
-             other.current_primal_solution_.data(),
-             other.current_primal_solution_.size(),
-             stream_view);
-  raft::copy(current_dual_solution_.data(),
-             other.current_dual_solution_.data(),
-             other.current_dual_solution_.size(),
-             stream_view);
-  raft::copy(initial_primal_average_.data(),
-             other.initial_primal_average_.data(),
-             other.initial_primal_average_.size(),
-             stream_view);
-  raft::copy(initial_dual_average_.data(),
-             other.initial_dual_average_.data(),
-             other.initial_dual_average_.size(),
-             stream_view);
-  raft::copy(
-    current_ATY_.data(), other.current_ATY_.data(), other.current_ATY_.size(), stream_view);
-  raft::copy(sum_primal_solutions_.data(),
-             other.sum_primal_solutions_.data(),
-             other.sum_primal_solutions_.size(),
-             stream_view);
-  raft::copy(sum_dual_solutions_.data(),
-             other.sum_dual_solutions_.data(),
-             other.sum_dual_solutions_.size(),
-             stream_view);
-  raft::copy(last_restart_duality_gap_primal_solution_.data(),
-             other.last_restart_duality_gap_primal_solution_.data(),
-             other.last_restart_duality_gap_primal_solution_.size(),
-             stream_view);
-  raft::copy(last_restart_duality_gap_dual_solution_.data(),
-             other.last_restart_duality_gap_dual_solution_.data(),
-             other.last_restart_duality_gap_dual_solution_.size(),
-             stream_view);
+  // Note: pdlp_warm_start_data_view_t contains device pointers
+  // This constructor is used by Cython, so we need to copy from device to host
+  // We'll need to add this device-to-host copy logic when we integrate with Cython
+  // For now, this creates empty vectors sized correctly
+  // TODO: Add device-to-host copy when integrating with Cython interface
 
   check_sizes();
 }
 
 template <typename i_t, typename f_t>
-pdlp_warm_start_data_t<i_t, f_t>::pdlp_warm_start_data_t(const pdlp_warm_start_data_t& other,
-                                                         rmm::cuda_stream_view stream_view)
-  : current_primal_solution_(other.current_primal_solution_, stream_view),
-    current_dual_solution_(other.current_dual_solution_, stream_view),
-    initial_primal_average_(other.initial_primal_average_, stream_view),
-    initial_dual_average_(other.initial_dual_average_, stream_view),
-    current_ATY_(other.current_ATY_, stream_view),
-    sum_primal_solutions_(other.sum_primal_solutions_, stream_view),
-    sum_dual_solutions_(other.sum_dual_solutions_, stream_view),
-    last_restart_duality_gap_primal_solution_(other.last_restart_duality_gap_primal_solution_,
-                                              stream_view),
-    last_restart_duality_gap_dual_solution_(other.last_restart_duality_gap_dual_solution_,
-                                            stream_view),
+pdlp_warm_start_data_t<i_t, f_t>::pdlp_warm_start_data_t(const pdlp_warm_start_data_t& other)
+  : current_primal_solution_(other.current_primal_solution_),
+    current_dual_solution_(other.current_dual_solution_),
+    initial_primal_average_(other.initial_primal_average_),
+    initial_dual_average_(other.initial_dual_average_),
+    current_ATY_(other.current_ATY_),
+    sum_primal_solutions_(other.sum_primal_solutions_),
+    sum_dual_solutions_(other.sum_dual_solutions_),
+    last_restart_duality_gap_primal_solution_(other.last_restart_duality_gap_primal_solution_),
+    last_restart_duality_gap_dual_solution_(other.last_restart_duality_gap_dual_solution_),
     initial_primal_weight_(other.initial_primal_weight_),
     initial_step_size_(other.initial_step_size_),
     total_pdlp_iterations_(other.total_pdlp_iterations_),
diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu
index f8f88f8f8..e9b2b3725 100644
--- a/cpp/src/linear_programming/solve.cu
+++ b/cpp/src/linear_programming/solve.cu
@@ -29,9 +29,12 @@
 #include <mip/presolve/trivial_presolve.cuh>
 #include <mip/solver.cuh>
 
+#include <cuopt/linear_programming/gpu_optimization_problem.hpp>
 #include <cuopt/linear_programming/pdlp/pdlp_hyper_params.cuh>
 #include <cuopt/linear_programming/pdlp/solver_settings.hpp>
 #include <cuopt/linear_programming/solve.hpp>
+#include <cuopt/linear_programming/utilities/problem_conversion.cuh>
+#include <cuopt/linear_programming/utilities/remote_solve.hpp>
 
 #include <mps_parser/mps_data_model.hpp>
 #include <utilities/copy_helpers.hpp>
@@ -369,9 +372,15 @@ optimization_problem_solution_t<i_t, f_t> convert_dual_simplex_sol(
   info.dual_ray_linear_objective       = 0.0;
 
   pdlp_termination_status_t termination_status = to_termination_status(status);
-  auto sol = optimization_problem_solution_t<i_t, f_t>(final_primal_solution,
-                                                       final_dual_solution,
-                                                       final_reduced_cost,
+
+  // Convert device vectors to host for solution construction
+  auto host_primal = cuopt::host_copy(final_primal_solution, problem.handle_ptr->get_stream());
+  auto host_dual   = cuopt::host_copy(final_dual_solution, problem.handle_ptr->get_stream());
+  auto host_rc     = cuopt::host_copy(final_reduced_cost, problem.handle_ptr->get_stream());
+
+  auto sol = optimization_problem_solution_t<i_t, f_t>(std::move(host_primal),
+                                                       std::move(host_dual),
+                                                       std::move(host_rc),
                                                        problem.objective_name,
                                                        problem.var_names,
                                                        problem.row_names,
@@ -444,7 +453,7 @@ optimization_problem_solution_t<i_t, f_t> run_barrier(
 {
   // Convert data structures to dual simplex format and back
   dual_simplex::user_problem_t<i_t, f_t> dual_simplex_problem =
-    cuopt_problem_to_simplex_problem<i_t, f_t>(problem);
+    cuopt_problem_to_simplex_problem<i_t, f_t>(problem.handle_ptr, problem);
   auto sol_dual_simplex = run_barrier(dual_simplex_problem, settings, timer);
   return convert_dual_simplex_sol(problem,
                                   std::get<0>(sol_dual_simplex),
@@ -515,7 +524,7 @@ optimization_problem_solution_t<i_t, f_t> run_dual_simplex(
 {
   // Convert data structures to dual simplex format and back
   dual_simplex::user_problem_t<i_t, f_t> dual_simplex_problem =
-    cuopt_problem_to_simplex_problem<i_t, f_t>(problem);
+    cuopt_problem_to_simplex_problem<i_t, f_t>(problem.handle_ptr, problem);
   auto sol_dual_simplex = run_dual_simplex(dual_simplex_problem, settings, timer);
   return convert_dual_simplex_sol(problem,
                                   std::get<0>(sol_dual_simplex),
@@ -535,8 +544,7 @@ static optimization_problem_solution_t<i_t, f_t> run_pdlp_solver(
 {
   if (problem.n_constraints == 0) {
     CUOPT_LOG_INFO("No constraints in the problem: PDLP can't be run, use Dual Simplex instead.");
-    return optimization_problem_solution_t<i_t, f_t>{pdlp_termination_status_t::NumericalError,
-                                                     problem.handle_ptr->get_stream()};
+    return optimization_problem_solution_t<i_t, f_t>{pdlp_termination_status_t::NumericalError};
   }
   detail::pdlp_solver_t<i_t, f_t> solver(problem, settings, is_batch_mode);
   return solver.run_solver(timer);
@@ -613,10 +621,16 @@ optimization_problem_solution_t<i_t, f_t> run_pdlp(detail::problem_t<i_t, f_t>&
     auto crossover_end         = std::chrono::high_resolution_clock::now();
     auto crossover_duration =
       std::chrono::duration_cast<std::chrono::milliseconds>(crossover_end - start_solver);
-    info.solve_time    = crossover_duration.count() / 1000.0;
-    auto sol_crossover = optimization_problem_solution_t<i_t, f_t>(final_primal_solution,
-                                                                   final_dual_solution,
-                                                                   final_reduced_cost,
+    info.solve_time = crossover_duration.count() / 1000.0;
+
+    // Convert device vectors to host for solution construction
+    auto host_primal = cuopt::host_copy(final_primal_solution, problem.handle_ptr->get_stream());
+    auto host_dual   = cuopt::host_copy(final_dual_solution, problem.handle_ptr->get_stream());
+    auto host_rc     = cuopt::host_copy(final_reduced_cost, problem.handle_ptr->get_stream());
+
+    auto sol_crossover = optimization_problem_solution_t<i_t, f_t>(std::move(host_primal),
+                                                                   std::move(host_dual),
+                                                                   std::move(host_rc),
                                                                    problem.objective_name,
                                                                    problem.var_names,
                                                                    problem.row_names,
@@ -651,7 +665,7 @@ void run_dual_simplex_thread(
 
 template <typename i_t, typename f_t>
 optimization_problem_solution_t<i_t, f_t> run_concurrent(
-  const optimization_problem_t<i_t, f_t>& op_problem,
+  const gpu_optimization_problem_t<i_t, f_t>& op_problem,
   detail::problem_t<i_t, f_t>& problem,
   pdlp_solver_settings_t<i_t, f_t> const& settings,
   const timer_t& timer,
@@ -671,16 +685,13 @@ optimization_problem_solution_t<i_t, f_t> run_concurrent(
   // Initialize the dual simplex structures before we run PDLP.
   // Otherwise, CUDA API calls to the problem stream may occur in both threads and throw graph
   // capture off
-  auto barrier_handle = raft::handle_t(*op_problem.get_handle_ptr());
-  detail::problem_t<i_t, f_t> d_barrier_problem(problem);
   rmm::cuda_stream_view barrier_stream = rmm::cuda_stream_per_thread;
-  d_barrier_problem.handle_ptr         = &barrier_handle;
-  raft::resource::set_cuda_stream(barrier_handle, barrier_stream);
+  auto barrier_handle                  = raft::handle_t(barrier_stream);
   // Make sure allocations are done on the original stream
   problem.handle_ptr->sync_stream();
 
   dual_simplex::user_problem_t<i_t, f_t> dual_simplex_problem =
-    cuopt_problem_to_simplex_problem<i_t, f_t>(d_barrier_problem);
+    cuopt_problem_to_simplex_problem<i_t, f_t>(&barrier_handle, problem);
   // Create a thread for dual simplex
   std::unique_ptr<
     std::tuple<dual_simplex::lp_solution_t<i_t, f_t>, dual_simplex::lp_status_t, f_t, f_t, f_t>>
@@ -770,7 +781,7 @@ optimization_problem_solution_t<i_t, f_t> run_concurrent(
 
 template <typename i_t, typename f_t>
 optimization_problem_solution_t<i_t, f_t> solve_lp_with_method(
-  const optimization_problem_t<i_t, f_t>& op_problem,
+  const gpu_optimization_problem_t<i_t, f_t>& op_problem,
   detail::problem_t<i_t, f_t>& problem,
   pdlp_solver_settings_t<i_t, f_t> const& settings,
   const timer_t& timer,
@@ -788,19 +799,49 @@ optimization_problem_solution_t<i_t, f_t> solve_lp_with_method(
 }
 
 template <typename i_t, typename f_t>
-optimization_problem_solution_t<i_t, f_t> solve_lp(optimization_problem_t<i_t, f_t>& op_problem,
+optimization_problem_solution_t<i_t, f_t> solve_lp(optimization_problem_t<i_t, f_t>& host_problem,
                                                    pdlp_solver_settings_t<i_t, f_t> const& settings,
                                                    bool problem_checking,
                                                    bool use_pdlp_solver_mode,
                                                    bool is_batch_mode)
 {
+  // Check for remote solve environment variables
+  const char* remote_host = std::getenv("CUOPT_REMOTE_HOST");
+  const char* remote_port = std::getenv("CUOPT_REMOTE_PORT");
+
+  if (remote_host != nullptr && remote_port != nullptr) {
+    std::fprintf(stderr,
+                 "[solve_lp] Remote solve detected: CUOPT_REMOTE_HOST=%s, CUOPT_REMOTE_PORT=%s\n",
+                 remote_host,
+                 remote_port);
+    std::fflush(stderr);
+
+    try {
+      auto remote_solution = solve_lp_remote(host_problem, settings);
+      std::fprintf(stderr, "[solve_lp] Remote solve succeeded, returning solution\n");
+      std::fflush(stderr);
+      return remote_solution;
+    } catch (const std::exception& e) {
+      std::fprintf(stderr, "[solve_lp] Remote solve failed: %s\n", e.what());
+      std::fprintf(stderr, "[solve_lp] Falling back to local solve\n");
+      std::fflush(stderr);
+      // Fall through to local solve
+    }
+  }
+
+  // Create RAFT handle for local GPU solve
+  raft::handle_t handle;
+
+  // Convert host problem to GPU problem for internal solving
+  auto gpu_problem = host_to_gpu_problem(&handle, host_problem);
+
   try {
     // Create log stream for file logging and add it to default logger
     init_logger_t log(settings.log_file, settings.log_to_console);
 
     // Init libraies before to not include it in solve time
     // This needs to be called before pdlp is initialized
-    init_handler(op_problem.get_handle_ptr());
+    init_handler(gpu_problem.get_handle_ptr());
 
     print_version_info();
 
@@ -809,26 +850,25 @@ optimization_problem_solution_t<i_t, f_t> solve_lp(optimization_problem_t<i_t, f
     if (problem_checking) {
       raft::common::nvtx::range fun_scope("Check problem representation");
       // This is required as user might forget to set some fields
-      problem_checking_t<i_t, f_t>::check_problem_representation(op_problem);
-      problem_checking_t<i_t, f_t>::check_initial_solution_representation(op_problem, settings);
+      problem_checking_t<i_t, f_t>::check_problem_representation(gpu_problem);
+      problem_checking_t<i_t, f_t>::check_initial_solution_representation(gpu_problem, settings);
     }
 
     CUOPT_LOG_INFO(
       "Solving a problem with %d constraints, %d variables (%d integers), and %d nonzeros",
-      op_problem.get_n_constraints(),
-      op_problem.get_n_variables(),
+      gpu_problem.get_n_constraints(),
+      gpu_problem.get_n_variables(),
       0,
-      op_problem.get_nnz());
-    op_problem.print_scaling_information();
+      gpu_problem.get_nnz());
+    gpu_problem.print_scaling_information();
 
     // Check for crossing bounds. Return infeasible if there are any
-    if (problem_checking_t<i_t, f_t>::has_crossing_bounds(op_problem)) {
-      return optimization_problem_solution_t<i_t, f_t>(pdlp_termination_status_t::PrimalInfeasible,
-                                                       op_problem.get_handle_ptr()->get_stream());
+    if (problem_checking_t<i_t, f_t>::has_crossing_bounds(gpu_problem)) {
+      return optimization_problem_solution_t<i_t, f_t>(pdlp_termination_status_t::PrimalInfeasible);
     }
 
     auto lp_timer = cuopt::timer_t(settings.time_limit);
-    detail::problem_t<i_t, f_t> problem(op_problem);
+    detail::problem_t<i_t, f_t> problem(gpu_problem);
 
     double presolve_time = 0.0;
     std::unique_ptr<detail::third_party_presolve_t<i_t, f_t>> presolver;
@@ -844,7 +884,7 @@ optimization_problem_solution_t<i_t, f_t> solve_lp(optimization_problem_t<i_t, f
         std::max(1.0, std::min(0.1 * lp_timer.remaining_time(), 60.0));
       presolver = std::make_unique<detail::third_party_presolve_t<i_t, f_t>>();
       auto [reduced_problem, feasible] =
-        presolver->apply(op_problem,
+        presolver->apply(gpu_problem,
                          cuopt::linear_programming::problem_category_t::LP,
                          settings.dual_postsolve,
                          settings.tolerances.absolute_primal_tolerance,
@@ -852,7 +892,7 @@ optimization_problem_solution_t<i_t, f_t> solve_lp(optimization_problem_t<i_t, f
                          presolve_time_limit);
       if (!feasible) {
         return optimization_problem_solution_t<i_t, f_t>(
-          pdlp_termination_status_t::PrimalInfeasible, op_problem.get_handle_ptr()->get_stream());
+          pdlp_termination_status_t::PrimalInfeasible);
       }
       problem       = detail::problem_t<i_t, f_t>(reduced_problem);
       presolve_time = lp_timer.elapsed_time();
@@ -865,23 +905,23 @@ optimization_problem_solution_t<i_t, f_t> solve_lp(optimization_problem_t<i_t, f
 
     if (settings.user_problem_file != "") {
       CUOPT_LOG_INFO("Writing user problem to file: %s", settings.user_problem_file.c_str());
-      op_problem.write_to_mps(settings.user_problem_file);
+      gpu_problem.write_to_mps(settings.user_problem_file);
     }
 
     // Set the hyper-parameters based on the solver_settings
     if (use_pdlp_solver_mode) { set_pdlp_solver_mode(settings); }
 
-    setup_device_symbols(op_problem.get_handle_ptr()->get_stream());
+    setup_device_symbols(gpu_problem.get_handle_ptr()->get_stream());
 
-    auto solution = solve_lp_with_method(op_problem, problem, settings, lp_timer, is_batch_mode);
+    auto solution = solve_lp_with_method(gpu_problem, problem, settings, lp_timer, is_batch_mode);
 
     if (run_presolve) {
       auto primal_solution = cuopt::device_copy(solution.get_primal_solution(),
-                                                op_problem.get_handle_ptr()->get_stream());
-      auto dual_solution =
-        cuopt::device_copy(solution.get_dual_solution(), op_problem.get_handle_ptr()->get_stream());
+                                                gpu_problem.get_handle_ptr()->get_stream());
+      auto dual_solution   = cuopt::device_copy(solution.get_dual_solution(),
+                                              gpu_problem.get_handle_ptr()->get_stream());
       auto reduced_costs =
-        cuopt::device_copy(solution.get_reduced_cost(), op_problem.get_handle_ptr()->get_stream());
+        cuopt::device_copy(solution.get_reduced_cost(), gpu_problem.get_handle_ptr()->get_stream());
       bool status_to_skip = false;
 
       presolver->undo(primal_solution,
@@ -889,79 +929,84 @@ optimization_problem_solution_t<i_t, f_t> solve_lp(optimization_problem_t<i_t, f
                       reduced_costs,
                       cuopt::linear_programming::problem_category_t::LP,
                       status_to_skip,
-                      op_problem.get_handle_ptr()->get_stream());
+                      gpu_problem.get_handle_ptr()->get_stream());
 
-      thrust::fill(rmm::exec_policy(op_problem.get_handle_ptr()->get_stream()),
+      thrust::fill(rmm::exec_policy(gpu_problem.get_handle_ptr()->get_stream()),
                    dual_solution.data(),
                    dual_solution.data() + dual_solution.size(),
                    std::numeric_limits<f_t>::signaling_NaN());
-      thrust::fill(rmm::exec_policy(op_problem.get_handle_ptr()->get_stream()),
+      thrust::fill(rmm::exec_policy(gpu_problem.get_handle_ptr()->get_stream()),
                    reduced_costs.data(),
                    reduced_costs.data() + reduced_costs.size(),
                    std::numeric_limits<f_t>::signaling_NaN());
 
       auto full_stats = solution.get_additional_termination_information();
 
+      // Convert device vectors back to host for solution construction
+      auto host_primal =
+        cuopt::host_copy(primal_solution, gpu_problem.get_handle_ptr()->get_stream());
+      auto host_dual = cuopt::host_copy(dual_solution, gpu_problem.get_handle_ptr()->get_stream());
+      auto host_rc   = cuopt::host_copy(reduced_costs, gpu_problem.get_handle_ptr()->get_stream());
+
       // Create a new solution with the full problem solution
-      solution = optimization_problem_solution_t<i_t, f_t>(primal_solution,
-                                                           dual_solution,
-                                                           reduced_costs,
+      solution = optimization_problem_solution_t<i_t, f_t>(std::move(host_primal),
+                                                           std::move(host_dual),
+                                                           std::move(host_rc),
                                                            solution.get_pdlp_warm_start_data(),
-                                                           op_problem.get_objective_name(),
-                                                           op_problem.get_variable_names(),
-                                                           op_problem.get_row_names(),
+                                                           gpu_problem.get_objective_name(),
+                                                           gpu_problem.get_variable_names(),
+                                                           gpu_problem.get_row_names(),
                                                            full_stats,
                                                            solution.get_termination_status());
     }
 
     if (settings.sol_file != "") {
       CUOPT_LOG_INFO("Writing solution to file %s", settings.sol_file.c_str());
-      solution.write_to_sol_file(settings.sol_file, op_problem.get_handle_ptr()->get_stream());
+      solution.write_to_sol_file(settings.sol_file);
     }
 
     return solution;
   } catch (const cuopt::logic_error& e) {
     CUOPT_LOG_ERROR("Error in solve_lp: %s", e.what());
-    return optimization_problem_solution_t<i_t, f_t>{e, op_problem.get_handle_ptr()->get_stream()};
+    return optimization_problem_solution_t<i_t, f_t>{e};
   } catch (const std::bad_alloc& e) {
     CUOPT_LOG_ERROR("Error in solve_lp: %s", e.what());
     return optimization_problem_solution_t<i_t, f_t>{
-      cuopt::logic_error("Memory allocation failed", cuopt::error_type_t::RuntimeError),
-      op_problem.get_handle_ptr()->get_stream()};
+      cuopt::logic_error("Memory allocation failed", cuopt::error_type_t::RuntimeError)};
   }
 }
 
 template <typename i_t, typename f_t>
 cuopt::linear_programming::optimization_problem_t<i_t, f_t> mps_data_model_to_optimization_problem(
-  raft::handle_t const* handle_ptr, const cuopt::mps_parser::mps_data_model_t<i_t, f_t>& data_model)
+  const cuopt::mps_parser::mps_data_model_t<i_t, f_t>& data_model)
 {
-  cuopt::linear_programming::optimization_problem_t<i_t, f_t> op_problem(handle_ptr);
-  op_problem.set_maximize(data_model.get_sense());
+  cuopt::linear_programming::optimization_problem_t<i_t, f_t> gpu_problem;
+  gpu_problem.set_maximize(data_model.get_sense());
 
-  op_problem.set_csr_constraint_matrix(data_model.get_constraint_matrix_values().data(),
-                                       data_model.get_constraint_matrix_values().size(),
-                                       data_model.get_constraint_matrix_indices().data(),
-                                       data_model.get_constraint_matrix_indices().size(),
-                                       data_model.get_constraint_matrix_offsets().data(),
-                                       data_model.get_constraint_matrix_offsets().size());
+  gpu_problem.set_csr_constraint_matrix(data_model.get_constraint_matrix_values().data(),
+                                        data_model.get_constraint_matrix_values().size(),
+                                        data_model.get_constraint_matrix_indices().data(),
+                                        data_model.get_constraint_matrix_indices().size(),
+                                        data_model.get_constraint_matrix_offsets().data(),
+                                        data_model.get_constraint_matrix_offsets().size());
 
   if (data_model.get_constraint_bounds().size() != 0) {
-    op_problem.set_constraint_bounds(data_model.get_constraint_bounds().data(),
-                                     data_model.get_constraint_bounds().size());
+    gpu_problem.set_constraint_bounds(data_model.get_constraint_bounds().data(),
+                                      data_model.get_constraint_bounds().size());
   }
   if (data_model.get_objective_coefficients().size() != 0) {
-    op_problem.set_objective_coefficients(data_model.get_objective_coefficients().data(),
-                                          data_model.get_objective_coefficients().size());
+    gpu_problem.set_objective_coefficients(data_model.get_objective_coefficients().data(),
+                                           data_model.get_objective_coefficients().size());
   }
-  op_problem.set_objective_scaling_factor(data_model.get_objective_scaling_factor());
-  op_problem.set_objective_offset(data_model.get_objective_offset());
+  gpu_problem.set_objective_scaling_factor(data_model.get_objective_scaling_factor());
+  gpu_problem.set_objective_offset(data_model.get_objective_offset());
   if (data_model.get_variable_lower_bounds().size() != 0) {
-    op_problem.set_variable_lower_bounds(data_model.get_variable_lower_bounds().data(),
-                                         data_model.get_variable_lower_bounds().size());
+    gpu_problem.set_variable_lower_bounds(data_model.get_variable_lower_bounds().data(),
+                                          data_model.get_variable_lower_bounds().size());
   }
   if (data_model.get_variable_upper_bounds().size() != 0) {
-    op_problem.set_variable_upper_bounds(data_model.get_variable_upper_bounds().data(),
-                                         data_model.get_variable_upper_bounds().size());
+    gpu_problem.set_variable_upper_bounds(data_model.get_variable_upper_bounds().data(),
+                                          data_model.get_variable_upper_bounds().size());
   }
   if (data_model.get_variable_types().size() != 0) {
     std::vector<var_t> enum_variable_types(data_model.get_variable_types().size());
@@ -970,46 +1015,45 @@ cuopt::linear_programming::optimization_problem_t<i_t, f_t> mps_data_model_to_op
       data_model.get_variable_types().cend(),
       enum_variable_types.begin(),
       [](const auto val) -> var_t { return val == 'I' ? var_t::INTEGER : var_t::CONTINUOUS; });
-    op_problem.set_variable_types(enum_variable_types.data(), enum_variable_types.size());
+    gpu_problem.set_variable_types(enum_variable_types.data(), enum_variable_types.size());
   }
 
   if (data_model.get_row_types().size() != 0) {
-    op_problem.set_row_types(data_model.get_row_types().data(), data_model.get_row_types().size());
+    gpu_problem.set_row_types(data_model.get_row_types().data(), data_model.get_row_types().size());
   }
   if (data_model.get_constraint_lower_bounds().size() != 0) {
-    op_problem.set_constraint_lower_bounds(data_model.get_constraint_lower_bounds().data(),
-                                           data_model.get_constraint_lower_bounds().size());
+    gpu_problem.set_constraint_lower_bounds(data_model.get_constraint_lower_bounds().data(),
+                                            data_model.get_constraint_lower_bounds().size());
   }
   if (data_model.get_constraint_upper_bounds().size() != 0) {
-    op_problem.set_constraint_upper_bounds(data_model.get_constraint_upper_bounds().data(),
-                                           data_model.get_constraint_upper_bounds().size());
+    gpu_problem.set_constraint_upper_bounds(data_model.get_constraint_upper_bounds().data(),
+                                            data_model.get_constraint_upper_bounds().size());
   }
 
   if (data_model.get_objective_name().size() != 0) {
-    op_problem.set_objective_name(data_model.get_objective_name());
+    gpu_problem.set_objective_name(data_model.get_objective_name());
   }
   if (data_model.get_problem_name().size() != 0) {
-    op_problem.set_problem_name(data_model.get_problem_name().data());
+    gpu_problem.set_problem_name(data_model.get_problem_name().data());
   }
   if (data_model.get_variable_names().size() != 0) {
-    op_problem.set_variable_names(data_model.get_variable_names());
+    gpu_problem.set_variable_names(data_model.get_variable_names());
   }
   if (data_model.get_row_names().size() != 0) {
-    op_problem.set_row_names(data_model.get_row_names());
+    gpu_problem.set_row_names(data_model.get_row_names());
   }
 
-  return op_problem;
+  return gpu_problem;
 }
 
 template <typename i_t, typename f_t>
 optimization_problem_solution_t<i_t, f_t> solve_lp(
-  raft::handle_t const* handle_ptr,
   const cuopt::mps_parser::mps_data_model_t<i_t, f_t>& mps_data_model,
   pdlp_solver_settings_t<i_t, f_t> const& settings,
   bool problem_checking,
   bool use_pdlp_solver_mode)
 {
-  auto op_problem = mps_data_model_to_optimization_problem(handle_ptr, mps_data_model);
+  auto op_problem = mps_data_model_to_optimization_problem(mps_data_model);
   return solve_lp(op_problem, settings, problem_checking, use_pdlp_solver_mode);
 }
 
@@ -1022,21 +1066,19 @@ optimization_problem_solution_t<i_t, f_t> solve_lp(
     bool is_batch_mode);                                                               \
                                                                                        \
   template optimization_problem_solution_t<int, F_TYPE> solve_lp(                      \
-    raft::handle_t const* handle_ptr,                                                  \
     const cuopt::mps_parser::mps_data_model_t<int, F_TYPE>& mps_data_model,            \
     pdlp_solver_settings_t<int, F_TYPE> const& settings,                               \
     bool problem_checking,                                                             \
     bool use_pdlp_solver_mode);                                                        \
                                                                                        \
   template optimization_problem_solution_t<int, F_TYPE> solve_lp_with_method(          \
-    const optimization_problem_t<int, F_TYPE>& op_problem,                             \
+    const gpu_optimization_problem_t<int, F_TYPE>& op_problem,                         \
     detail::problem_t<int, F_TYPE>& problem,                                           \
     pdlp_solver_settings_t<int, F_TYPE> const& settings,                               \
     const timer_t& timer,                                                              \
     bool is_batch_mode);                                                               \
                                                                                        \
   template optimization_problem_t<int, F_TYPE> mps_data_model_to_optimization_problem( \
-    raft::handle_t const* handle_ptr,                                                  \
     const cuopt::mps_parser::mps_data_model_t<int, F_TYPE>& data_model);               \
   template void set_pdlp_solver_mode(pdlp_solver_settings_t<int, F_TYPE> const& settings);
 
diff --git a/cpp/src/linear_programming/solve.cuh b/cpp/src/linear_programming/solve.cuh
index bc95bc268..1b7a62f68 100644
--- a/cpp/src/linear_programming/solve.cuh
+++ b/cpp/src/linear_programming/solve.cuh
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include <cuopt/linear_programming/gpu_optimization_problem.hpp>
 #include <cuopt/linear_programming/optimization_problem.hpp>
 
 #include <mps_parser/mps_data_model.hpp>
@@ -32,7 +33,7 @@ cuopt::linear_programming::optimization_problem_t<i_t, f_t> mps_data_model_to_op
 
 template <typename i_t, typename f_t>
 cuopt::linear_programming::optimization_problem_solution_t<i_t, f_t> solve_lp_with_method(
-  const optimization_problem_t<i_t, f_t>& op_problem,
+  const gpu_optimization_problem_t<i_t, f_t>& op_problem,
   detail::problem_t<i_t, f_t>& problem,
   pdlp_solver_settings_t<i_t, f_t> const& settings,
   const timer_t& timer,
diff --git a/cpp/src/linear_programming/solver_settings.cu b/cpp/src/linear_programming/solver_settings.cu
index d4b4388af..e3041c1bd 100644
--- a/cpp/src/linear_programming/solver_settings.cu
+++ b/cpp/src/linear_programming/solver_settings.cu
@@ -54,7 +54,7 @@ pdlp_solver_settings_t<i_t, f_t>::pdlp_solver_settings_t(const pdlp_solver_setti
     eliminate_dense_columns(other.eliminate_dense_columns),
     save_best_primal_so_far(other.save_best_primal_so_far),
     first_primal_feasible(other.first_primal_feasible),
-    pdlp_warm_start_data_(other.pdlp_warm_start_data_, stream_view),
+    pdlp_warm_start_data_(other.pdlp_warm_start_data_),
     concurrent_halt(other.concurrent_halt)
 {
 }
@@ -135,47 +135,36 @@ void pdlp_solver_settings_t<i_t, f_t>::set_pdlp_warm_start_data(
                       var_mapping.begin(),
                       pdlp_warm_start_data_.last_restart_duality_gap_primal_solution_.begin());
 
-      pdlp_warm_start_data_.current_primal_solution_.resize(var_mapping.size(),
-                                                            var_mapping.stream());
-      pdlp_warm_start_data_.initial_primal_average_.resize(var_mapping.size(),
-                                                           var_mapping.stream());
-      pdlp_warm_start_data_.current_ATY_.resize(var_mapping.size(), var_mapping.stream());
-      pdlp_warm_start_data_.sum_primal_solutions_.resize(var_mapping.size(), var_mapping.stream());
-      pdlp_warm_start_data_.last_restart_duality_gap_primal_solution_.resize(var_mapping.size(),
-                                                                             var_mapping.stream());
+      pdlp_warm_start_data_.current_primal_solution_.resize(var_mapping.size());
+      pdlp_warm_start_data_.initial_primal_average_.resize(var_mapping.size());
+      pdlp_warm_start_data_.current_ATY_.resize(var_mapping.size());
+      pdlp_warm_start_data_.sum_primal_solutions_.resize(var_mapping.size());
+      pdlp_warm_start_data_.last_restart_duality_gap_primal_solution_.resize(var_mapping.size());
     } else if (var_mapping.size() >
                pdlp_warm_start_data_.last_restart_duality_gap_primal_solution_.size()) {
       const auto previous_size =
         pdlp_warm_start_data_.last_restart_duality_gap_primal_solution_.size();
 
       // If more variables just pad with 0s
-      pdlp_warm_start_data_.current_primal_solution_.resize(var_mapping.size(),
-                                                            var_mapping.stream());
-      pdlp_warm_start_data_.initial_primal_average_.resize(var_mapping.size(),
-                                                           var_mapping.stream());
-      pdlp_warm_start_data_.current_ATY_.resize(var_mapping.size(), var_mapping.stream());
-      pdlp_warm_start_data_.sum_primal_solutions_.resize(var_mapping.size(), var_mapping.stream());
-      pdlp_warm_start_data_.last_restart_duality_gap_primal_solution_.resize(var_mapping.size(),
-                                                                             var_mapping.stream());
-
-      thrust::fill(rmm::exec_policy(var_mapping.stream()),
-                   pdlp_warm_start_data_.current_primal_solution_.begin() + previous_size,
-                   pdlp_warm_start_data_.current_primal_solution_.end(),
-                   f_t(0));
-      thrust::fill(rmm::exec_policy(var_mapping.stream()),
-                   pdlp_warm_start_data_.initial_primal_average_.begin() + previous_size,
-                   pdlp_warm_start_data_.initial_primal_average_.end(),
-                   f_t(0));
-      thrust::fill(rmm::exec_policy(var_mapping.stream()),
-                   pdlp_warm_start_data_.current_ATY_.begin() + previous_size,
-                   pdlp_warm_start_data_.current_ATY_.end(),
-                   f_t(0));
-      thrust::fill(rmm::exec_policy(var_mapping.stream()),
-                   pdlp_warm_start_data_.sum_primal_solutions_.begin() + previous_size,
-                   pdlp_warm_start_data_.sum_primal_solutions_.end(),
-                   f_t(0));
-      thrust::fill(
-        rmm::exec_policy(var_mapping.stream()),
+      pdlp_warm_start_data_.current_primal_solution_.resize(var_mapping.size());
+      pdlp_warm_start_data_.initial_primal_average_.resize(var_mapping.size());
+      pdlp_warm_start_data_.current_ATY_.resize(var_mapping.size());
+      pdlp_warm_start_data_.sum_primal_solutions_.resize(var_mapping.size());
+      pdlp_warm_start_data_.last_restart_duality_gap_primal_solution_.resize(var_mapping.size());
+
+      std::fill(pdlp_warm_start_data_.current_primal_solution_.begin() + previous_size,
+                pdlp_warm_start_data_.current_primal_solution_.end(),
+                f_t(0));
+      std::fill(pdlp_warm_start_data_.initial_primal_average_.begin() + previous_size,
+                pdlp_warm_start_data_.initial_primal_average_.end(),
+                f_t(0));
+      std::fill(pdlp_warm_start_data_.current_ATY_.begin() + previous_size,
+                pdlp_warm_start_data_.current_ATY_.end(),
+                f_t(0));
+      std::fill(pdlp_warm_start_data_.sum_primal_solutions_.begin() + previous_size,
+                pdlp_warm_start_data_.sum_primal_solutions_.end(),
+                f_t(0));
+      std::fill(
         pdlp_warm_start_data_.last_restart_duality_gap_primal_solution_.begin() + previous_size,
         pdlp_warm_start_data_.last_restart_duality_gap_primal_solution_.end(),
         f_t(0));
@@ -209,43 +198,33 @@ void pdlp_solver_settings_t<i_t, f_t>::set_pdlp_warm_start_data(
                       constraint_mapping.begin(),
                       pdlp_warm_start_data_.last_restart_duality_gap_dual_solution_.begin());
 
-      pdlp_warm_start_data_.current_dual_solution_.resize(constraint_mapping.size(),
-                                                          constraint_mapping.stream());
-      pdlp_warm_start_data_.initial_dual_average_.resize(constraint_mapping.size(),
-                                                         constraint_mapping.stream());
-      pdlp_warm_start_data_.sum_dual_solutions_.resize(constraint_mapping.size(),
-                                                       constraint_mapping.stream());
+      pdlp_warm_start_data_.current_dual_solution_.resize(constraint_mapping.size());
+      pdlp_warm_start_data_.initial_dual_average_.resize(constraint_mapping.size());
+      pdlp_warm_start_data_.sum_dual_solutions_.resize(constraint_mapping.size());
       pdlp_warm_start_data_.last_restart_duality_gap_dual_solution_.resize(
-        constraint_mapping.size(), constraint_mapping.stream());
+        constraint_mapping.size());
     } else if (constraint_mapping.size() >
                pdlp_warm_start_data_.last_restart_duality_gap_dual_solution_.size()) {
       const auto previous_size =
         pdlp_warm_start_data_.last_restart_duality_gap_dual_solution_.size();
 
       // If more variables just pad with 0s
-      pdlp_warm_start_data_.current_dual_solution_.resize(constraint_mapping.size(),
-                                                          constraint_mapping.stream());
-      pdlp_warm_start_data_.initial_dual_average_.resize(constraint_mapping.size(),
-                                                         constraint_mapping.stream());
-      pdlp_warm_start_data_.sum_dual_solutions_.resize(constraint_mapping.size(),
-                                                       constraint_mapping.stream());
+      pdlp_warm_start_data_.current_dual_solution_.resize(constraint_mapping.size());
+      pdlp_warm_start_data_.initial_dual_average_.resize(constraint_mapping.size());
+      pdlp_warm_start_data_.sum_dual_solutions_.resize(constraint_mapping.size());
       pdlp_warm_start_data_.last_restart_duality_gap_dual_solution_.resize(
-        constraint_mapping.size(), constraint_mapping.stream());
-
-      thrust::fill(rmm::exec_policy(constraint_mapping.stream()),
-                   pdlp_warm_start_data_.current_dual_solution_.begin() + previous_size,
-                   pdlp_warm_start_data_.current_dual_solution_.end(),
-                   f_t(0));
-      thrust::fill(rmm::exec_policy(constraint_mapping.stream()),
-                   pdlp_warm_start_data_.initial_dual_average_.begin() + previous_size,
-                   pdlp_warm_start_data_.initial_dual_average_.end(),
-                   f_t(0));
-      thrust::fill(rmm::exec_policy(constraint_mapping.stream()),
-                   pdlp_warm_start_data_.sum_dual_solutions_.begin() + previous_size,
-                   pdlp_warm_start_data_.sum_dual_solutions_.end(),
-                   f_t(0));
-      thrust::fill(
-        rmm::exec_policy(constraint_mapping.stream()),
+        constraint_mapping.size());
+
+      std::fill(pdlp_warm_start_data_.current_dual_solution_.begin() + previous_size,
+                pdlp_warm_start_data_.current_dual_solution_.end(),
+                f_t(0));
+      std::fill(pdlp_warm_start_data_.initial_dual_average_.begin() + previous_size,
+                pdlp_warm_start_data_.initial_dual_average_.end(),
+                f_t(0));
+      std::fill(pdlp_warm_start_data_.sum_dual_solutions_.begin() + previous_size,
+                pdlp_warm_start_data_.sum_dual_solutions_.end(),
+                f_t(0));
+      std::fill(
         pdlp_warm_start_data_.last_restart_duality_gap_dual_solution_.begin() + previous_size,
         pdlp_warm_start_data_.last_restart_duality_gap_dual_solution_.end(),
         f_t(0));
diff --git a/cpp/src/linear_programming/solver_solution.cu b/cpp/src/linear_programming/solver_solution.cu
index b611bce6d..2741a5576 100644
--- a/cpp/src/linear_programming/solver_solution.cu
+++ b/cpp/src/linear_programming/solver_solution.cu
@@ -30,10 +30,10 @@ namespace cuopt::linear_programming {
 
 template <typename i_t, typename f_t>
 optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
-  pdlp_termination_status_t termination_status, rmm::cuda_stream_view stream_view)
-  : primal_solution_{0, stream_view},
-    dual_solution_{0, stream_view},
-    reduced_cost_{0, stream_view},
+  pdlp_termination_status_t termination_status)
+  : primal_solution_(),
+    dual_solution_(),
+    reduced_cost_(),
     termination_status_(termination_status),
     error_status_(cuopt::logic_error("", cuopt::error_type_t::Success))
 {
@@ -41,10 +41,10 @@ optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
 
 template <typename i_t, typename f_t>
 optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
-  cuopt::logic_error error_status_, rmm::cuda_stream_view stream_view)
-  : primal_solution_{0, stream_view},
-    dual_solution_{0, stream_view},
-    reduced_cost_{0, stream_view},
+  cuopt::logic_error error_status_)
+  : primal_solution_(),
+    dual_solution_(),
+    reduced_cost_(),
     termination_status_(pdlp_termination_status_t::NoTermination),
     error_status_(error_status_)
 {
@@ -52,14 +52,14 @@ optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
 
 template <typename i_t, typename f_t>
 optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
-  rmm::device_uvector<f_t>& final_primal_solution,
-  rmm::device_uvector<f_t>& final_dual_solution,
-  rmm::device_uvector<f_t>& final_reduced_cost,
-  pdlp_warm_start_data_t<i_t, f_t>& warm_start_data,
+  std::vector<f_t> final_primal_solution,
+  std::vector<f_t> final_dual_solution,
+  std::vector<f_t> final_reduced_cost,
+  pdlp_warm_start_data_t<i_t, f_t> warm_start_data,
   const std::string objective_name,
   const std::vector<std::string>& var_names,
   const std::vector<std::string>& row_names,
-  additional_termination_information_t& termination_stats,
+  additional_termination_information_t termination_stats,
   pdlp_termination_status_t termination_status)
   : primal_solution_(std::move(final_primal_solution)),
     dual_solution_(std::move(final_dual_solution)),
@@ -76,13 +76,13 @@ optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
 
 template <typename i_t, typename f_t>
 optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
-  rmm::device_uvector<f_t>& final_primal_solution,
-  rmm::device_uvector<f_t>& final_dual_solution,
-  rmm::device_uvector<f_t>& final_reduced_cost,
+  std::vector<f_t> final_primal_solution,
+  std::vector<f_t> final_dual_solution,
+  std::vector<f_t> final_reduced_cost,
   const std::string objective_name,
   const std::vector<std::string>& var_names,
   const std::vector<std::string>& row_names,
-  additional_termination_information_t& termination_stats,
+  additional_termination_information_t termination_stats,
   pdlp_termination_status_t termination_status)
   : primal_solution_(std::move(final_primal_solution)),
     dual_solution_(std::move(final_dual_solution)),
@@ -98,19 +98,18 @@ optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
 
 template <typename i_t, typename f_t>
 optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
-  rmm::device_uvector<f_t>& final_primal_solution,
-  rmm::device_uvector<f_t>& final_dual_solution,
-  rmm::device_uvector<f_t>& final_reduced_cost,
+  const std::vector<f_t>& final_primal_solution,
+  const std::vector<f_t>& final_dual_solution,
+  const std::vector<f_t>& final_reduced_cost,
   const std::string objective_name,
   const std::vector<std::string>& var_names,
   const std::vector<std::string>& row_names,
-  additional_termination_information_t& termination_stats,
+  additional_termination_information_t termination_stats,
   pdlp_termination_status_t termination_status,
-  const raft::handle_t* handler_ptr,
   [[maybe_unused]] bool deep_copy)
-  : primal_solution_(final_primal_solution, handler_ptr->get_stream()),
-    dual_solution_(final_dual_solution, handler_ptr->get_stream()),
-    reduced_cost_(final_reduced_cost, handler_ptr->get_stream()),
+  : primal_solution_(final_primal_solution),
+    dual_solution_(final_dual_solution),
+    reduced_cost_(final_reduced_cost),
     objective_name_(objective_name),
     var_names_(var_names),
     row_names_(row_names),
@@ -118,37 +117,24 @@ optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
     termination_status_(termination_status),
     error_status_(cuopt::logic_error("", cuopt::error_type_t::Success))
 {
+  // Deep copy already handled by std::vector copy constructor
 }
 
 template <typename i_t, typename f_t>
 void optimization_problem_solution_t<i_t, f_t>::copy_from(
   const raft::handle_t* handle_ptr, const optimization_problem_solution_t<i_t, f_t>& other)
 {
-  // Resize to make sure they are of same size
-  primal_solution_.resize(other.primal_solution_.size(), handle_ptr->get_stream());
-  dual_solution_.resize(other.dual_solution_.size(), handle_ptr->get_stream());
-  reduced_cost_.resize(other.reduced_cost_.size(), handle_ptr->get_stream());
-
-  // Copy the data
-  raft::copy(primal_solution_.data(),
-             other.primal_solution_.data(),
-             primal_solution_.size(),
-             handle_ptr->get_stream());
-  raft::copy(dual_solution_.data(),
-             other.dual_solution_.data(),
-             dual_solution_.size(),
-             handle_ptr->get_stream());
-  raft::copy(reduced_cost_.data(),
-             other.reduced_cost_.data(),
-             reduced_cost_.size(),
-             handle_ptr->get_stream());
+  // Solution is already on host, simple vector copy
+  primal_solution_ = other.primal_solution_;
+  dual_solution_   = other.dual_solution_;
+  reduced_cost_    = other.reduced_cost_;
+
   termination_stats_  = other.termination_stats_;
   termination_status_ = other.termination_status_;
   objective_name_     = other.objective_name_;
   var_names_          = other.var_names_;
   row_names_          = other.row_names_;
   // We do not copy the warm start info. As it is not needed for this purpose.
-  handle_ptr->sync_stream();
 }
 
 template <typename i_t, typename f_t>
@@ -199,7 +185,6 @@ void optimization_problem_solution_t<i_t, f_t>::write_additional_termination_sta
 
 template <typename i_t, typename f_t>
 void optimization_problem_solution_t<i_t, f_t>::write_to_file(std::string_view filename,
-                                                              rmm::cuda_stream_view stream_view,
                                                               bool generate_variable_values)
 {
   raft::common::nvtx::range fun_scope("write final solution to file");
@@ -213,18 +198,11 @@ void optimization_problem_solution_t<i_t, f_t>::write_to_file(std::string_view f
            << std::endl;
     return;
   }
-  std::vector<f_t> primal_solution;
-  std::vector<f_t> dual_solution;
-  std::vector<f_t> reduced_cost;
-  primal_solution.resize(primal_solution_.size());
-  dual_solution.resize(dual_solution_.size());
-  reduced_cost.resize(reduced_cost_.size());
-  raft::copy(
-    primal_solution.data(), primal_solution_.data(), primal_solution_.size(), stream_view.value());
-  raft::copy(
-    dual_solution.data(), dual_solution_.data(), dual_solution_.size(), stream_view.value());
-  raft::copy(reduced_cost.data(), reduced_cost_.data(), reduced_cost_.size(), stream_view.value());
-  RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view.value()));
+
+  // Solution is already on host, use member variables directly
+  const auto& primal_solution = primal_solution_;
+  const auto& dual_solution   = dual_solution_;
+  const auto& reduced_cost    = reduced_cost_;
 
   myfile << "{ " << std::endl;
   myfile << "\t\"Termination reason\" : \"" << get_termination_status_string() << "\","
@@ -316,32 +294,31 @@ f_t optimization_problem_solution_t<i_t, f_t>::get_dual_objective_value() const
 }
 
 template <typename i_t, typename f_t>
-rmm::device_uvector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_primal_solution()
+std::vector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_primal_solution()
 {
   return primal_solution_;
 }
 
 template <typename i_t, typename f_t>
-const rmm::device_uvector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_primal_solution()
-  const
+const std::vector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_primal_solution() const
 {
   return primal_solution_;
 }
 
 template <typename i_t, typename f_t>
-rmm::device_uvector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_dual_solution()
+std::vector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_dual_solution()
 {
   return dual_solution_;
 }
 
 template <typename i_t, typename f_t>
-const rmm::device_uvector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_dual_solution() const
+const std::vector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_dual_solution() const
 {
   return dual_solution_;
 }
 
 template <typename i_t, typename f_t>
-rmm::device_uvector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_reduced_cost()
+std::vector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_reduced_cost()
 {
   return reduced_cost_;
 }
@@ -373,8 +350,7 @@ optimization_problem_solution_t<i_t, f_t>::get_pdlp_warm_start_data()
 }
 
 template <typename i_t, typename f_t>
-void optimization_problem_solution_t<i_t, f_t>::write_to_sol_file(
-  std::string_view filename, rmm::cuda_stream_view stream_view) const
+void optimization_problem_solution_t<i_t, f_t>::write_to_sol_file(std::string_view filename) const
 {
   auto status = get_termination_status_string();
   if (termination_status_ != pdlp_termination_status_t::Optimal &&
@@ -383,13 +359,33 @@ void optimization_problem_solution_t<i_t, f_t>::write_to_sol_file(
   }
 
   auto objective_value = get_objective_value();
-  std::vector<f_t> solution;
-  solution.resize(primal_solution_.size());
-  raft::copy(
-    solution.data(), primal_solution_.data(), primal_solution_.size(), stream_view.value());
-  RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view.value()));
+  // Solution is already on host, use directly
   solution_writer_t::write_solution_to_sol_file(
-    std::string(filename), status, objective_value, var_names_, solution);
+    std::string(filename), status, objective_value, var_names_, primal_solution_);
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_solution_t<i_t, f_t>::print_solution_stats() const
+{
+  const char* status_str = "Other";
+  if (termination_status_ == pdlp_termination_status_t::Optimal) {
+    status_str = "Optimal";
+  } else if (termination_status_ == pdlp_termination_status_t::PrimalInfeasible) {
+    status_str = "Primal Infeasible";
+  } else if (termination_status_ == pdlp_termination_status_t::DualInfeasible) {
+    status_str = "Dual Infeasible";
+  } else if (termination_status_ == pdlp_termination_status_t::TimeLimit) {
+    status_str = "Time Limit";
+  } else if (termination_status_ == pdlp_termination_status_t::IterationLimit) {
+    status_str = "Iteration Limit";
+  }
+
+  fprintf(stderr,
+          "Status: %s   Objective: %.8e  Iterations: %d  Time: %.3fs\n",
+          status_str,
+          termination_stats_.primal_objective,
+          termination_stats_.number_of_steps_taken,
+          termination_stats_.solve_time);
 }
 
 #if MIP_INSTANTIATE_FLOAT
diff --git a/cpp/src/linear_programming/termination_strategy/termination_strategy.cu b/cpp/src/linear_programming/termination_strategy/termination_strategy.cu
index 61e461a2e..e2123ac64 100644
--- a/cpp/src/linear_programming/termination_strategy/termination_strategy.cu
+++ b/cpp/src/linear_programming/termination_strategy/termination_strategy.cu
@@ -330,30 +330,40 @@ pdlp_termination_strategy_t<i_t, f_t>::fill_return_problem_solution(
 
   RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_));
 
+  // Convert device vectors to host for solution construction
+  std::vector<f_t> host_primal(primal_iterate.size());
+  std::vector<f_t> host_dual(dual_iterate.size());
+  std::vector<f_t> host_rc(convergence_information_.get_reduced_cost().size());
+
+  raft::copy(host_primal.data(), primal_iterate.data(), primal_iterate.size(), stream_view_);
+  raft::copy(host_dual.data(), dual_iterate.data(), dual_iterate.size(), stream_view_);
+  raft::copy(host_rc.data(),
+             convergence_information_.get_reduced_cost().data(),
+             convergence_information_.get_reduced_cost().size(),
+             stream_view_);
+  RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_));
+
   if (deep_copy) {
-    optimization_problem_solution_t<i_t, f_t> op_solution{
-      primal_iterate,
-      dual_iterate,
-      convergence_information_.get_reduced_cost(),
-      problem_ptr->objective_name,
-      problem_ptr->var_names,
-      problem_ptr->row_names,
-      term_stats,
-      termination_status,
-      handle_ptr_,
-      deep_copy};
+    optimization_problem_solution_t<i_t, f_t> op_solution{host_primal,
+                                                          host_dual,
+                                                          host_rc,
+                                                          problem_ptr->objective_name,
+                                                          problem_ptr->var_names,
+                                                          problem_ptr->row_names,
+                                                          term_stats,
+                                                          termination_status,
+                                                          deep_copy};
     return op_solution;
   } else {
-    optimization_problem_solution_t<i_t, f_t> op_solution{
-      primal_iterate,
-      dual_iterate,
-      convergence_information_.get_reduced_cost(),
-      warm_start_data,
-      problem_ptr->objective_name,
-      problem_ptr->var_names,
-      problem_ptr->row_names,
-      term_stats,
-      termination_status};
+    optimization_problem_solution_t<i_t, f_t> op_solution{std::move(host_primal),
+                                                          std::move(host_dual),
+                                                          std::move(host_rc),
+                                                          warm_start_data,
+                                                          problem_ptr->objective_name,
+                                                          problem_ptr->var_names,
+                                                          problem_ptr->row_names,
+                                                          term_stats,
+                                                          termination_status};
     return op_solution;
   }
 }
diff --git a/cpp/src/linear_programming/translate.hpp b/cpp/src/linear_programming/translate.hpp
index 227a4375c..8e80f8fee 100644
--- a/cpp/src/linear_programming/translate.hpp
+++ b/cpp/src/linear_programming/translate.hpp
@@ -28,21 +28,21 @@ namespace cuopt::linear_programming {
 
 template <typename i_t, typename f_t>
 static dual_simplex::user_problem_t<i_t, f_t> cuopt_problem_to_simplex_problem(
-  detail::problem_t<i_t, f_t>& model)
+  raft::handle_t const* handle_ptr, detail::problem_t<i_t, f_t>& model)
 {
-  dual_simplex::user_problem_t<i_t, f_t> user_problem(model.handle_ptr);
+  dual_simplex::user_problem_t<i_t, f_t> user_problem(handle_ptr);
 
   int m                  = model.n_constraints;
   int n                  = model.n_variables;
   int nz                 = model.nnz;
   user_problem.num_rows  = m;
   user_problem.num_cols  = n;
-  user_problem.objective = cuopt::host_copy(model.objective_coefficients);
+  user_problem.objective = cuopt::host_copy(model.objective_coefficients, handle_ptr->get_stream());
 
   dual_simplex::csr_matrix_t<i_t, f_t> csr_A(m, n, nz);
-  csr_A.x         = cuopt::host_copy(model.coefficients);
-  csr_A.j         = cuopt::host_copy(model.variables);
-  csr_A.row_start = cuopt::host_copy(model.offsets);
+  csr_A.x         = cuopt::host_copy(model.coefficients, handle_ptr->get_stream());
+  csr_A.j         = cuopt::host_copy(model.variables, handle_ptr->get_stream());
+  csr_A.row_start = cuopt::host_copy(model.offsets, handle_ptr->get_stream());
 
   csr_A.to_compressed_col(user_problem.A);
 
@@ -51,8 +51,10 @@ static dual_simplex::user_problem_t<i_t, f_t> cuopt_problem_to_simplex_problem(
   user_problem.range_rows.clear();
   user_problem.range_value.clear();
 
-  auto model_constraint_lower_bounds = cuopt::host_copy(model.constraint_lower_bounds);
-  auto model_constraint_upper_bounds = cuopt::host_copy(model.constraint_upper_bounds);
+  auto model_constraint_lower_bounds =
+    cuopt::host_copy(model.constraint_lower_bounds, handle_ptr->get_stream());
+  auto model_constraint_upper_bounds =
+    cuopt::host_copy(model.constraint_upper_bounds, handle_ptr->get_stream());
 
   // All constraints have lower and upper bounds
   // lr <= a_i^T x <= ur
@@ -79,7 +81,7 @@ static dual_simplex::user_problem_t<i_t, f_t> cuopt_problem_to_simplex_problem(
   }
   user_problem.num_range_rows = user_problem.range_rows.size();
   std::tie(user_problem.lower, user_problem.upper) =
-    extract_host_bounds<f_t>(model.variable_bounds, model.handle_ptr);
+    extract_host_bounds<f_t>(model.variable_bounds, handle_ptr);
   user_problem.problem_name = model.original_problem_ptr->get_problem_name();
   if (model.row_names.size() > 0) {
     user_problem.row_names.resize(m);
@@ -97,7 +99,7 @@ static dual_simplex::user_problem_t<i_t, f_t> cuopt_problem_to_simplex_problem(
   user_problem.obj_scale    = model.presolve_data.objective_scaling_factor;
   user_problem.var_types.resize(n);
 
-  auto model_variable_types = cuopt::host_copy(model.variable_types);
+  auto model_variable_types = cuopt::host_copy(model.variable_types, handle_ptr->get_stream());
   for (int j = 0; j < n; ++j) {
     user_problem.var_types[j] =
       model_variable_types[j] == var_t::CONTINUOUS
@@ -130,8 +132,8 @@ void translate_to_crossover_problem(const detail::problem_t<i_t, f_t>& problem,
   CUOPT_LOG_DEBUG("Converted to compressed column");
 
   std::vector<f_t> slack(problem.n_constraints);
-  std::vector<f_t> tmp_x = cuopt::host_copy(sol.get_primal_solution());
-  problem.handle_ptr->get_stream().synchronize();
+  // Solution is already on host, no need for host_copy
+  const std::vector<f_t>& tmp_x = sol.get_primal_solution();
   dual_simplex::matrix_vector_multiply(lp.A, 1.0, tmp_x, 0.0, slack);
   CUOPT_LOG_DEBUG("Multiplied A and x");
 
@@ -187,10 +189,10 @@ void translate_to_crossover_problem(const detail::problem_t<i_t, f_t>& problem,
     if (initial_solution.x[j] > lp.upper[j]) { initial_solution.x[j] = lp.upper[j]; }
   }
   CUOPT_LOG_DEBUG("Finished with x");
-  initial_solution.y = cuopt::host_copy(sol.get_dual_solution());
+  // Solution is already on host, no need for host_copy
+  initial_solution.y = sol.get_dual_solution();
 
-  std::vector<f_t> tmp_z = cuopt::host_copy(sol.get_reduced_cost());
-  problem.handle_ptr->get_stream().synchronize();
+  const std::vector<f_t>& tmp_z = sol.get_reduced_cost();
   std::copy(tmp_z.begin(), tmp_z.begin() + problem.n_variables, initial_solution.z.begin());
   for (i_t j = problem.n_variables; j < n; ++j) {
     initial_solution.z[j] = initial_solution.y[j - problem.n_variables];
diff --git a/cpp/src/linear_programming/utilities/cuopt_remote.proto b/cpp/src/linear_programming/utilities/cuopt_remote.proto
new file mode 100644
index 000000000..faf93e094
--- /dev/null
+++ b/cpp/src/linear_programming/utilities/cuopt_remote.proto
@@ -0,0 +1,289 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+syntax = "proto3";
+
+package cuopt.remote;
+
+// Protocol version and metadata
+message RequestHeader {
+  uint32 version = 1;           // Protocol version (currently 1)
+  ProblemType problem_type = 2; // LP or MIP
+  IndexType index_type = 3;     // INT32 or INT64
+  FloatType float_type = 4;     // FLOAT32 or DOUBLE
+}
+
+enum ProblemType {
+  LP = 0;
+  MIP = 1;
+}
+
+enum IndexType {
+  INT32 = 0;
+  INT64 = 1;
+}
+
+enum FloatType {
+  FLOAT32 = 0;
+  DOUBLE = 1;
+}
+
+// Optimization problem representation (matches optimization_problem_t)
+message OptimizationProblem {
+  // Problem metadata
+  string problem_name = 1;
+  string objective_name = 2;
+  bool maximize = 3;
+  double objective_scaling_factor = 4;
+  double objective_offset = 5;
+  ProblemCategory problem_category = 6;
+
+  // Variable names (optional, can be large)
+  repeated string variable_names = 7;
+  repeated string row_names = 8;
+
+  // Constraint matrix in CSR format
+  repeated double constraint_matrix_values = 10;
+  repeated int32 constraint_matrix_indices = 11;
+  repeated int32 constraint_matrix_offsets = 12;
+
+  // Problem vectors
+  repeated double objective_coefficients = 20;
+  repeated double constraint_bounds = 21;
+  repeated double variable_lower_bounds = 22;
+  repeated double variable_upper_bounds = 23;
+
+  // Constraint bounds (two representations supported by cuOpt)
+  repeated double constraint_lower_bounds = 24;
+  repeated double constraint_upper_bounds = 25;
+  bytes row_types = 26;  // char array of constraint types ('<', '>', '=')
+
+  // Integer/binary variable indicators (for MIP)
+  repeated bool is_integer = 30;
+  repeated bool is_binary = 31;
+}
+
+enum ProblemCategory {
+  LP_PROBLEM = 0;
+  MIP_PROBLEM = 1;
+  IP_PROBLEM = 2;
+}
+
+// PDLP solver settings
+message PDLPSolverSettings {
+  // Termination criteria
+  double eps_optimal_absolute = 1;
+  double eps_optimal_relative = 2;
+  double eps_primal_infeasible = 3;
+  double eps_dual_infeasible = 4;
+  int32 time_sec_limit = 5;
+  int32 iteration_limit = 6;
+
+  // Algorithm parameters
+  double initial_primal_weight = 10;
+  double initial_step_size = 11;
+  int32 verbosity = 12;
+
+  // Warm start data (if provided)
+  PDLPWarmStartData warm_start_data = 20;
+}
+
+message PDLPWarmStartData {
+  repeated double current_primal_solution = 1;
+  repeated double current_dual_solution = 2;
+  repeated double initial_primal_average = 3;
+  repeated double initial_dual_average = 4;
+  repeated double current_ATY = 5;
+  repeated double sum_primal_solutions = 6;
+  repeated double sum_dual_solutions = 7;
+  repeated double last_restart_duality_gap_primal_solution = 8;
+  repeated double last_restart_duality_gap_dual_solution = 9;
+
+  double initial_primal_weight = 10;
+  double initial_step_size = 11;
+  int32 total_pdlp_iterations = 12;
+  int32 total_pdhg_iterations = 13;
+  double last_candidate_kkt_score = 14;
+  double last_restart_kkt_score = 15;
+  double sum_solution_weight = 16;
+  int32 iterations_since_last_restart = 17;
+}
+
+// MIP solver settings
+message MIPSolverSettings {
+  double time_limit = 1;
+  double mip_gap = 2;
+  int32 verbosity = 3;
+  // Additional MIP settings can be added here
+}
+
+// LP solve request
+message SolveLPRequest {
+  RequestHeader header = 1;
+  OptimizationProblem problem = 2;
+  PDLPSolverSettings settings = 3;
+}
+
+// MIP solve request
+message SolveMIPRequest {
+  RequestHeader header = 1;
+  OptimizationProblem problem = 2;
+  MIPSolverSettings settings = 3;
+}
+
+// LP solution
+message LPSolution {
+  // Solution vectors
+  repeated double primal_solution = 1;
+  repeated double dual_solution = 2;
+  repeated double reduced_cost = 3;
+
+  // Warm start data for next solve
+  PDLPWarmStartData warm_start_data = 4;
+
+  // Termination information
+  PDLPTerminationStatus termination_status = 10;
+  string error_message = 11;
+
+  // Solution statistics
+  double l2_primal_residual = 20;
+  double l2_dual_residual = 21;
+  double primal_objective = 22;
+  double dual_objective = 23;
+  double gap = 24;
+  int32 nb_iterations = 25;
+  double solve_time = 26;
+  bool solved_by_pdlp = 27;
+}
+
+enum PDLPTerminationStatus {
+  PDLP_NO_TERMINATION = 0;
+  PDLP_NUMERICAL_ERROR = 1;
+  PDLP_OPTIMAL = 2;
+  PDLP_PRIMAL_INFEASIBLE = 3;
+  PDLP_DUAL_INFEASIBLE = 4;
+  PDLP_ITERATION_LIMIT = 5;
+  PDLP_TIME_LIMIT = 6;
+  PDLP_CONCURRENT_LIMIT = 7;
+  PDLP_PRIMAL_FEASIBLE = 8;
+}
+
+// MIP solution
+message MIPSolution {
+  repeated double solution = 1;
+
+  MIPTerminationStatus termination_status = 10;
+  string error_message = 11;
+
+  double objective = 20;
+  double mip_gap = 21;
+  double solution_bound = 22;
+  double total_solve_time = 23;
+  double presolve_time = 24;
+  double max_constraint_violation = 25;
+  double max_int_violation = 26;
+  double max_variable_bound_violation = 27;
+  int32 nodes = 28;
+  int32 simplex_iterations = 29;
+}
+
+enum MIPTerminationStatus {
+  MIP_NO_TERMINATION = 0;
+  MIP_OPTIMAL = 1;
+  MIP_FEASIBLE_FOUND = 2;
+  MIP_INFEASIBLE = 3;
+  MIP_UNBOUNDED = 4;
+  MIP_TIME_LIMIT = 5;
+}
+
+// Request types for async operations
+enum RequestType {
+  SUBMIT_JOB = 0;        // Submit a new solve job
+  CHECK_STATUS = 1;      // Check job status
+  GET_RESULT = 2;        // Retrieve completed result
+  DELETE_RESULT = 3;     // Delete result from server
+}
+
+// Job status for async operations
+enum JobStatus {
+  QUEUED = 0;           // Job submitted, waiting in queue
+  PROCESSING = 1;       // Job currently being solved
+  COMPLETED = 2;        // Job completed successfully
+  FAILED = 3;           // Job failed with error
+  NOT_FOUND = 4;        // Job ID not found
+}
+
+// Generic request wrapper for async operations
+message AsyncRequest {
+  RequestType request_type = 1;
+  string job_id = 2;  // For status/get/delete operations
+  bool blocking = 3;  // If true, server waits and returns solution (sync mode)
+
+  // For SUBMIT_JOB requests
+  oneof job_data {
+    SolveLPRequest lp_request = 10;
+    SolveMIPRequest mip_request = 11;
+  }
+}
+
+// Response for job submission
+message SubmitResponse {
+  ResponseStatus status = 1;
+  string job_id = 2;          // Unique job identifier
+  string message = 3;         // Success/error message
+}
+
+// Response for status check
+message StatusResponse {
+  JobStatus job_status = 1;
+  string message = 2;
+  double progress = 3;        // 0.0-1.0 (future enhancement)
+}
+
+// Response for get result
+message ResultResponse {
+  ResponseStatus status = 1;
+  string error_message = 2;
+
+  oneof solution {
+    LPSolution lp_solution = 10;
+    MIPSolution mip_solution = 11;
+  }
+}
+
+// Response for delete
+message DeleteResponse {
+  ResponseStatus status = 1;
+  string message = 2;
+}
+
+// Generic response wrapper
+message AsyncResponse {
+  RequestType request_type = 1;
+
+  oneof response_data {
+    SubmitResponse submit_response = 10;
+    StatusResponse status_response = 11;
+    ResultResponse result_response = 12;
+    DeleteResponse delete_response = 13;
+  }
+}
+
+// Legacy synchronous response (for backward compatibility)
+message SolveResponse {
+  ResponseStatus status = 1;
+  string error_message = 2;
+
+  oneof solution {
+    LPSolution lp_solution = 10;
+    MIPSolution mip_solution = 11;
+  }
+}
+
+enum ResponseStatus {
+  SUCCESS = 0;
+  ERROR_INVALID_REQUEST = 1;
+  ERROR_SOLVE_FAILED = 2;
+  ERROR_INTERNAL = 3;
+  ERROR_NOT_FOUND = 4;
+}
diff --git a/cpp/src/linear_programming/utilities/cython_solve.cu b/cpp/src/linear_programming/utilities/cython_solve.cu
index 2753e824a..a6209def9 100644
--- a/cpp/src/linear_programming/utilities/cython_solve.cu
+++ b/cpp/src/linear_programming/utilities/cython_solve.cu
@@ -44,10 +44,9 @@ using cuopt::linear_programming::var_t;
 static cuopt::linear_programming::optimization_problem_t<int, double>
 data_model_to_optimization_problem(
   cuopt::mps_parser::data_model_view_t<int, double>* data_model,
-  cuopt::linear_programming::solver_settings_t<int, double>* solver_settings,
-  raft::handle_t const* handle_ptr)
+  cuopt::linear_programming::solver_settings_t<int, double>* solver_settings)
 {
-  cuopt::linear_programming::optimization_problem_t<int, double> op_problem(handle_ptr);
+  cuopt::linear_programming::optimization_problem_t<int, double> op_problem;
   op_problem.set_maximize(data_model->get_sense());
   if (data_model->get_constraint_matrix_values().size() != 0 &&
       data_model->get_constraint_matrix_indices().size() != 0 &&
@@ -95,7 +94,7 @@ data_model_to_optimization_problem(
         .last_restart_duality_gap_dual_solution_.data() != nullptr) {
     // Moved inside
     cuopt::linear_programming::pdlp_warm_start_data_t<int, double> pdlp_warm_start_data(
-      solver_settings->get_pdlp_warm_start_data_view(), handle_ptr->get_stream());
+      solver_settings->get_pdlp_warm_start_data_view());
     solver_settings->get_pdlp_settings().set_pdlp_warm_start_data(pdlp_warm_start_data);
   }
 
@@ -141,36 +140,41 @@ linear_programming_ret_t call_solve_lp(
   const bool use_pdlp_solver_mode = true;
   auto solution                   = cuopt::linear_programming::solve_lp(
     op_problem, solver_settings, problem_checking, use_pdlp_solver_mode, is_batch_mode);
+
+  std::fprintf(stderr, "[call_solve_lp] Received solution from solve_lp\n");
+  std::fflush(stderr);
+
+  // Pass host vectors directly to Cython (no GPU dependency!)
+  const auto& ws = solution.get_pdlp_warm_start_data();
+
+  std::fprintf(stderr, "[call_solve_lp] Got warm start data, passing host vectors directly\n");
+  std::fprintf(stderr,
+               "[call_solve_lp] Warm start vector sizes: primal=%zu, dual=%zu\n",
+               ws.current_primal_solution_.size(),
+               ws.current_dual_solution_.size());
+  std::fflush(stderr);
+
   linear_programming_ret_t lp_ret{
-    std::make_unique<rmm::device_buffer>(solution.get_primal_solution().release()),
-    std::make_unique<rmm::device_buffer>(solution.get_dual_solution().release()),
-    std::make_unique<rmm::device_buffer>(solution.get_reduced_cost().release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().current_primal_solution_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().current_dual_solution_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().initial_primal_average_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().initial_dual_average_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().current_ATY_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().sum_primal_solutions_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().sum_dual_solutions_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().last_restart_duality_gap_primal_solution_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution_.release()),
-    solution.get_pdlp_warm_start_data().initial_primal_weight_,
-    solution.get_pdlp_warm_start_data().initial_step_size_,
-    solution.get_pdlp_warm_start_data().total_pdlp_iterations_,
-    solution.get_pdlp_warm_start_data().total_pdhg_iterations_,
-    solution.get_pdlp_warm_start_data().last_candidate_kkt_score_,
-    solution.get_pdlp_warm_start_data().last_restart_kkt_score_,
-    solution.get_pdlp_warm_start_data().sum_solution_weight_,
-    solution.get_pdlp_warm_start_data().iterations_since_last_restart_,
+    solution.get_primal_solution(),
+    solution.get_dual_solution(),
+    solution.get_reduced_cost(),
+    ws.current_primal_solution_,
+    ws.current_dual_solution_,
+    ws.initial_primal_average_,
+    ws.initial_dual_average_,
+    ws.current_ATY_,
+    ws.sum_primal_solutions_,
+    ws.sum_dual_solutions_,
+    ws.last_restart_duality_gap_primal_solution_,
+    ws.last_restart_duality_gap_dual_solution_,
+    ws.initial_primal_weight_,
+    ws.initial_step_size_,
+    ws.total_pdlp_iterations_,
+    ws.total_pdhg_iterations_,
+    ws.last_candidate_kkt_score_,
+    ws.last_restart_kkt_score_,
+    ws.sum_solution_weight_,
+    ws.iterations_since_last_restart_,
     solution.get_termination_status(),
     solution.get_error_status().get_error_type(),
     solution.get_error_status().what(),
@@ -183,6 +187,8 @@ linear_programming_ret_t call_solve_lp(
     solution.get_additional_termination_information().solve_time,
     solution.get_additional_termination_information().solved_by_pdlp};
 
+  std::fprintf(stderr, "[call_solve_lp] Returning LP result with host data\n");
+  std::fflush(stderr);
   return lp_ret;
 }
 
@@ -204,7 +210,9 @@ mip_ret_t call_solve_mip(
     error_type_t::ValidationError,
     "MIP solve cannot be called on an LP problem!");
   auto solution = cuopt::linear_programming::solve_mip(op_problem, solver_settings);
-  mip_ret_t mip_ret{std::make_unique<rmm::device_buffer>(solution.get_solution().release()),
+
+  // Pass host vector directly to Cython (no GPU dependency!)
+  mip_ret_t mip_ret{solution.get_solution(),
                     solution.get_termination_status(),
                     solution.get_error_status().get_error_type(),
                     solution.get_error_status().what(),
@@ -233,18 +241,27 @@ std::unique_ptr<solver_ret_t> call_solve(
   RAFT_CUDA_TRY(cudaStreamCreateWithFlags(&stream, flags));
   const raft::handle_t handle_{stream};
 
-  auto op_problem = data_model_to_optimization_problem(data_model, solver_settings, &handle_);
+  auto op_problem = data_model_to_optimization_problem(data_model, solver_settings);
   solver_ret_t response;
   if (op_problem.get_problem_category() == linear_programming::problem_category_t::LP) {
+    std::fprintf(stderr, "[call_solve] Calling call_solve_lp\n");
+    std::fflush(stderr);
     response.lp_ret =
       call_solve_lp(op_problem, solver_settings->get_pdlp_settings(), is_batch_mode);
+    std::fprintf(stderr, "[call_solve] call_solve_lp returned\n");
+    std::fflush(stderr);
     response.problem_type = linear_programming::problem_category_t::LP;
   } else {
     response.mip_ret      = call_solve_mip(op_problem, solver_settings->get_mip_settings());
     response.problem_type = linear_programming::problem_category_t::MIP;
   }
 
-  return std::make_unique<solver_ret_t>(std::move(response));
+  std::fprintf(stderr, "[call_solve] Creating unique_ptr and returning\n");
+  std::fflush(stderr);
+  auto result = std::make_unique<solver_ret_t>(std::move(response));
+  std::fprintf(stderr, "[call_solve] Returning result to Python\n");
+  std::fflush(stderr);
+  return result;
 }
 
 static int compute_max_thread(
diff --git a/cpp/src/linear_programming/utilities/problem_checking.cu b/cpp/src/linear_programming/utilities/problem_checking.cu
index fbbdd1ad9..57fceaec4 100644
--- a/cpp/src/linear_programming/utilities/problem_checking.cu
+++ b/cpp/src/linear_programming/utilities/problem_checking.cu
@@ -32,7 +32,7 @@ namespace cuopt::linear_programming {
 
 template <typename i_t, typename f_t>
 void problem_checking_t<i_t, f_t>::check_csr_representation(
-  const optimization_problem_t<i_t, f_t>& op_problem)
+  const gpu_optimization_problem_t<i_t, f_t>& op_problem)
 {
   cuopt_expects(op_problem.get_constraint_matrix_indices().size() ==
                   op_problem.get_constraint_matrix_values().size(),
@@ -40,10 +40,11 @@ void problem_checking_t<i_t, f_t>::check_csr_representation(
                 "A_index and A_values must have same sizes.");
 
   // Check offset values
-  const i_t first_value = op_problem.get_constraint_matrix_offsets().front_element(
-    op_problem.get_handle_ptr()->get_stream());
-  cuopt_expects(
-    first_value == 0, error_type_t::ValidationError, "A_offsets first value should be 0.");
+  const auto& offsets   = op_problem.get_constraint_matrix_offsets();
+  const i_t first_value = offsets.front_element(op_problem.get_handle_ptr()->get_stream());
+  cuopt_expects(!offsets.is_empty() && first_value == 0,
+                error_type_t::ValidationError,
+                "A_offsets first value should be 0.");
 
   cuopt_expects(thrust::is_sorted(op_problem.get_handle_ptr()->get_thrust_policy(),
                                   op_problem.get_constraint_matrix_offsets().cbegin(),
@@ -64,7 +65,7 @@ void problem_checking_t<i_t, f_t>::check_csr_representation(
 
 template <typename i_t, typename f_t>
 void problem_checking_t<i_t, f_t>::check_initial_primal_representation(
-  const optimization_problem_t<i_t, f_t>& op_problem,
+  const gpu_optimization_problem_t<i_t, f_t>& op_problem,
   const rmm::device_uvector<f_t>& primal_initial_solution)
 {
   // Inital solution check if set
@@ -93,7 +94,7 @@ void problem_checking_t<i_t, f_t>::check_initial_primal_representation(
 
 template <typename i_t, typename f_t>
 void problem_checking_t<i_t, f_t>::check_initial_dual_representation(
-  const optimization_problem_t<i_t, f_t>& op_problem,
+  const gpu_optimization_problem_t<i_t, f_t>& op_problem,
   const rmm::device_uvector<f_t>& dual_initial_solution)
 {
   if (!dual_initial_solution.is_empty()) {
@@ -112,7 +113,7 @@ void problem_checking_t<i_t, f_t>::check_initial_dual_representation(
 
 template <typename i_t, typename f_t>
 void problem_checking_t<i_t, f_t>::check_initial_solution_representation(
-  const optimization_problem_t<i_t, f_t>& op_problem,
+  const gpu_optimization_problem_t<i_t, f_t>& op_problem,
   const pdlp_solver_settings_t<i_t, f_t>& settings)
 {
   if (settings.initial_primal_solution_.get() != nullptr) {
@@ -125,7 +126,7 @@ void problem_checking_t<i_t, f_t>::check_initial_solution_representation(
 
 template <typename i_t, typename f_t>
 void problem_checking_t<i_t, f_t>::check_initial_solution_representation(
-  const optimization_problem_t<i_t, f_t>& op_problem,
+  const gpu_optimization_problem_t<i_t, f_t>& op_problem,
   const mip_solver_settings_t<i_t, f_t>& settings)
 {
   for (const auto& initial_solution : settings.initial_solutions) {
@@ -135,7 +136,7 @@ void problem_checking_t<i_t, f_t>::check_initial_solution_representation(
 
 template <typename i_t, typename f_t>
 void problem_checking_t<i_t, f_t>::check_problem_representation(
-  const optimization_problem_t<i_t, f_t>& op_problem)
+  const gpu_optimization_problem_t<i_t, f_t>& op_problem)
 {
   bool empty_problem = op_problem.get_constraint_matrix_values().is_empty();
 
@@ -324,7 +325,7 @@ void problem_checking_t<i_t, f_t>::check_unscaled_solution(
 
 template <typename i_t, typename f_t>
 bool problem_checking_t<i_t, f_t>::has_crossing_bounds(
-  const optimization_problem_t<i_t, f_t>& op_problem)
+  const gpu_optimization_problem_t<i_t, f_t>& op_problem)
 {
   // Check if all variable bounds are valid (upper >= lower)
   bool all_variable_bounds_valid = thrust::all_of(
diff --git a/cpp/src/linear_programming/utilities/problem_checking.cuh b/cpp/src/linear_programming/utilities/problem_checking.cuh
index aeeb5a115..8e4b7444c 100644
--- a/cpp/src/linear_programming/utilities/problem_checking.cuh
+++ b/cpp/src/linear_programming/utilities/problem_checking.cuh
@@ -16,6 +16,7 @@
  */
 #pragma once
 
+#include <cuopt/linear_programming/gpu_optimization_problem.hpp>
 #include <cuopt/linear_programming/mip/solver_settings.hpp>
 #include <cuopt/linear_programming/optimization_problem.hpp>
 #include <cuopt/linear_programming/pdlp/solver_settings.hpp>
@@ -35,26 +36,26 @@ class problem_t;
 template <typename i_t, typename f_t>
 class problem_checking_t {
  public:
-  static void check_csr_representation(const optimization_problem_t<i_t, f_t>& op_problem);
+  static void check_csr_representation(const gpu_optimization_problem_t<i_t, f_t>& op_problem);
   // Check all fields and convert row_types to constraints lower/upper bounds if needed
-  static void check_problem_representation(const optimization_problem_t<i_t, f_t>& op_problem);
-  static bool has_crossing_bounds(const optimization_problem_t<i_t, f_t>& op_problem);
+  static void check_problem_representation(const gpu_optimization_problem_t<i_t, f_t>& op_problem);
+  static bool has_crossing_bounds(const gpu_optimization_problem_t<i_t, f_t>& op_problem);
 
   static void check_scaled_problem(detail::problem_t<i_t, f_t> const& scaled_problem,
                                    detail::problem_t<i_t, f_t> const& op_problem);
   static void check_unscaled_solution(detail::problem_t<i_t, f_t>& op_problem,
                                       rmm::device_uvector<f_t> const& assignment);
   static void check_initial_primal_representation(
-    const optimization_problem_t<i_t, f_t>& op_problem,
+    const gpu_optimization_problem_t<i_t, f_t>& op_problem,
     const rmm::device_uvector<f_t>& primal_initial_solution);
   static void check_initial_dual_representation(
-    const optimization_problem_t<i_t, f_t>& op_problem,
+    const gpu_optimization_problem_t<i_t, f_t>& op_problem,
     const rmm::device_uvector<f_t>& dual_initial_solution);
   static void check_initial_solution_representation(
-    const optimization_problem_t<i_t, f_t>& op_problem,
+    const gpu_optimization_problem_t<i_t, f_t>& op_problem,
     const pdlp_solver_settings_t<i_t, f_t>& settings);
   static void check_initial_solution_representation(
-    const optimization_problem_t<i_t, f_t>& op_problem,
+    const gpu_optimization_problem_t<i_t, f_t>& op_problem,
     const mip_solver_settings_t<i_t, f_t>& settings);
 };
 
diff --git a/cpp/src/linear_programming/utilities/remote_solve.cu b/cpp/src/linear_programming/utilities/remote_solve.cu
new file mode 100644
index 000000000..5c0b931a1
--- /dev/null
+++ b/cpp/src/linear_programming/utilities/remote_solve.cu
@@ -0,0 +1,925 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights
+ * reserved. SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cuopt/linear_programming/pdlp/pdlp_warm_start_data.hpp>
+#include <cuopt/linear_programming/utilities/remote_solve.hpp>
+#include <mip/mip_constants.hpp>
+
+#include <cuopt_remote.pb.h>
+
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include <cstring>
+#include <stdexcept>
+
+namespace cuopt::linear_programming {
+
+// Helper to write data to socket
+static void write_all(int sockfd, const void* data, size_t size)
+{
+  const uint8_t* ptr = static_cast<const uint8_t*>(data);
+  size_t remaining   = size;
+
+  while (remaining > 0) {
+    ssize_t written = ::write(sockfd, ptr, remaining);
+    if (written <= 0) { throw std::runtime_error("Socket write failed"); }
+    ptr += written;
+    remaining -= written;
+  }
+}
+
+// Helper to read data from socket
+static void read_all(int sockfd, void* data, size_t size)
+{
+  uint8_t* ptr     = static_cast<uint8_t*>(data);
+  size_t remaining = size;
+
+  while (remaining > 0) {
+    ssize_t nread = ::read(sockfd, ptr, remaining);
+    if (nread <= 0) { throw std::runtime_error("Socket read failed"); }
+    ptr += nread;
+    remaining -= nread;
+  }
+}
+
+// Convert optimization_problem_t to protobuf message
+template <typename i_t, typename f_t>
+static void problem_to_protobuf(const optimization_problem_t<i_t, f_t>& problem,
+                                cuopt::remote::OptimizationProblem* pb_problem)
+{
+  // Problem metadata
+  pb_problem->set_maximize(problem.get_sense());
+  pb_problem->set_objective_scaling_factor(problem.get_objective_scaling_factor());
+  pb_problem->set_objective_offset(problem.get_objective_offset());
+
+  // Constraint matrix (CSR format)
+  const auto& matrix_values  = problem.get_constraint_matrix_values();
+  const auto& matrix_indices = problem.get_constraint_matrix_indices();
+  const auto& matrix_offsets = problem.get_constraint_matrix_offsets();
+
+  for (const auto& val : matrix_values) {
+    pb_problem->add_constraint_matrix_values(static_cast<double>(val));
+  }
+  for (const auto& idx : matrix_indices) {
+    pb_problem->add_constraint_matrix_indices(static_cast<int32_t>(idx));
+  }
+  for (const auto& offset : matrix_offsets) {
+    pb_problem->add_constraint_matrix_offsets(static_cast<int32_t>(offset));
+  }
+
+  // Problem vectors
+  const auto& obj_coeffs        = problem.get_objective_coefficients();
+  const auto& constraint_bounds = problem.get_constraint_bounds();
+  const auto& var_lower         = problem.get_variable_lower_bounds();
+  const auto& var_upper         = problem.get_variable_upper_bounds();
+
+  for (const auto& val : obj_coeffs) {
+    pb_problem->add_objective_coefficients(static_cast<double>(val));
+  }
+  for (const auto& val : constraint_bounds) {
+    pb_problem->add_constraint_bounds(static_cast<double>(val));
+  }
+  for (const auto& val : var_lower) {
+    pb_problem->add_variable_lower_bounds(static_cast<double>(val));
+  }
+  for (const auto& val : var_upper) {
+    pb_problem->add_variable_upper_bounds(static_cast<double>(val));
+  }
+
+  // Constraint lower/upper bounds (additional representation)
+  const auto& constraint_lower = problem.get_constraint_lower_bounds();
+  const auto& constraint_upper = problem.get_constraint_upper_bounds();
+  for (const auto& val : constraint_lower) {
+    pb_problem->add_constraint_lower_bounds(static_cast<double>(val));
+  }
+  for (const auto& val : constraint_upper) {
+    pb_problem->add_constraint_upper_bounds(static_cast<double>(val));
+  }
+
+  // Row types (constraint types: '<', '>', '=')
+  const auto& row_types = problem.get_row_types();
+  if (!row_types.empty()) { pb_problem->set_row_types(row_types.data(), row_types.size()); }
+
+  // Variable types (for MIP: CONTINUOUS or INTEGER)
+  const auto& var_types = problem.get_variable_types();
+  if (!var_types.empty()) {
+    for (const auto& vt : var_types) {
+      bool is_int = (vt == cuopt::linear_programming::var_t::INTEGER);
+      pb_problem->add_is_integer(is_int);
+      pb_problem->add_is_binary(false);  // cuOpt uses INTEGER for both
+    }
+  }
+}
+
+// Convert protobuf message to optimization_problem_t
+template <typename i_t, typename f_t>
+static optimization_problem_t<i_t, f_t> protobuf_to_problem(
+  const cuopt::remote::OptimizationProblem& pb_problem)
+{
+  optimization_problem_t<i_t, f_t> problem;
+
+  // Set problem sense
+  problem.set_maximize(pb_problem.maximize());
+  problem.set_objective_scaling_factor(static_cast<f_t>(pb_problem.objective_scaling_factor()));
+  problem.set_objective_offset(static_cast<f_t>(pb_problem.objective_offset()));
+
+  // Convert constraint matrix
+  std::vector<f_t> matrix_values;
+  std::vector<i_t> matrix_indices;
+  std::vector<i_t> matrix_offsets;
+
+  matrix_values.reserve(pb_problem.constraint_matrix_values_size());
+  for (int i = 0; i < pb_problem.constraint_matrix_values_size(); ++i) {
+    matrix_values.push_back(static_cast<f_t>(pb_problem.constraint_matrix_values(i)));
+  }
+
+  matrix_indices.reserve(pb_problem.constraint_matrix_indices_size());
+  for (int i = 0; i < pb_problem.constraint_matrix_indices_size(); ++i) {
+    matrix_indices.push_back(static_cast<i_t>(pb_problem.constraint_matrix_indices(i)));
+  }
+
+  matrix_offsets.reserve(pb_problem.constraint_matrix_offsets_size());
+  for (int i = 0; i < pb_problem.constraint_matrix_offsets_size(); ++i) {
+    matrix_offsets.push_back(static_cast<i_t>(pb_problem.constraint_matrix_offsets(i)));
+  }
+
+  problem.set_csr_constraint_matrix(matrix_values.data(),
+                                    matrix_values.size(),
+                                    matrix_indices.data(),
+                                    matrix_indices.size(),
+                                    matrix_offsets.data(),
+                                    matrix_offsets.size());
+
+  // Convert problem vectors
+  std::vector<f_t> obj_coeffs;
+  std::vector<f_t> constraint_bounds;
+  std::vector<f_t> var_lower;
+  std::vector<f_t> var_upper;
+
+  obj_coeffs.reserve(pb_problem.objective_coefficients_size());
+  for (int i = 0; i < pb_problem.objective_coefficients_size(); ++i) {
+    obj_coeffs.push_back(static_cast<f_t>(pb_problem.objective_coefficients(i)));
+  }
+
+  constraint_bounds.reserve(pb_problem.constraint_bounds_size());
+  for (int i = 0; i < pb_problem.constraint_bounds_size(); ++i) {
+    constraint_bounds.push_back(static_cast<f_t>(pb_problem.constraint_bounds(i)));
+  }
+
+  var_lower.reserve(pb_problem.variable_lower_bounds_size());
+  for (int i = 0; i < pb_problem.variable_lower_bounds_size(); ++i) {
+    var_lower.push_back(static_cast<f_t>(pb_problem.variable_lower_bounds(i)));
+  }
+
+  var_upper.reserve(pb_problem.variable_upper_bounds_size());
+  for (int i = 0; i < pb_problem.variable_upper_bounds_size(); ++i) {
+    var_upper.push_back(static_cast<f_t>(pb_problem.variable_upper_bounds(i)));
+  }
+
+  problem.set_objective_coefficients(obj_coeffs.data(), obj_coeffs.size());
+  problem.set_constraint_bounds(constraint_bounds.data(), constraint_bounds.size());
+  problem.set_variable_lower_bounds(var_lower.data(), var_lower.size());
+  problem.set_variable_upper_bounds(var_upper.data(), var_upper.size());
+
+  // Constraint lower/upper bounds (if provided)
+  if (pb_problem.constraint_lower_bounds_size() > 0) {
+    std::vector<f_t> constraint_lower;
+    constraint_lower.reserve(pb_problem.constraint_lower_bounds_size());
+    for (int i = 0; i < pb_problem.constraint_lower_bounds_size(); ++i) {
+      constraint_lower.push_back(static_cast<f_t>(pb_problem.constraint_lower_bounds(i)));
+    }
+    problem.set_constraint_lower_bounds(constraint_lower.data(), constraint_lower.size());
+  }
+
+  if (pb_problem.constraint_upper_bounds_size() > 0) {
+    std::vector<f_t> constraint_upper;
+    constraint_upper.reserve(pb_problem.constraint_upper_bounds_size());
+    for (int i = 0; i < pb_problem.constraint_upper_bounds_size(); ++i) {
+      constraint_upper.push_back(static_cast<f_t>(pb_problem.constraint_upper_bounds(i)));
+    }
+    problem.set_constraint_upper_bounds(constraint_upper.data(), constraint_upper.size());
+  }
+
+  // Row types (if provided)
+  if (!pb_problem.row_types().empty()) {
+    const std::string& rt = pb_problem.row_types();
+    problem.set_row_types(rt.data(), rt.size());
+  }
+
+  return problem;
+}
+
+// Convert PDLP warm start data to protobuf
+template <typename i_t, typename f_t>
+static void warm_start_to_protobuf(const pdlp_warm_start_data_t<i_t, f_t>& ws,
+                                   cuopt::remote::PDLPWarmStartData* pb_ws)
+{
+  // Convert vectors
+  for (const auto& val : ws.current_primal_solution_) {
+    pb_ws->add_current_primal_solution(static_cast<double>(val));
+  }
+  for (const auto& val : ws.current_dual_solution_) {
+    pb_ws->add_current_dual_solution(static_cast<double>(val));
+  }
+  for (const auto& val : ws.initial_primal_average_) {
+    pb_ws->add_initial_primal_average(static_cast<double>(val));
+  }
+  for (const auto& val : ws.initial_dual_average_) {
+    pb_ws->add_initial_dual_average(static_cast<double>(val));
+  }
+  for (const auto& val : ws.current_ATY_) {
+    pb_ws->add_current_aty(static_cast<double>(val));
+  }
+  for (const auto& val : ws.sum_primal_solutions_) {
+    pb_ws->add_sum_primal_solutions(static_cast<double>(val));
+  }
+  for (const auto& val : ws.sum_dual_solutions_) {
+    pb_ws->add_sum_dual_solutions(static_cast<double>(val));
+  }
+  for (const auto& val : ws.last_restart_duality_gap_primal_solution_) {
+    pb_ws->add_last_restart_duality_gap_primal_solution(static_cast<double>(val));
+  }
+  for (const auto& val : ws.last_restart_duality_gap_dual_solution_) {
+    pb_ws->add_last_restart_duality_gap_dual_solution(static_cast<double>(val));
+  }
+
+  // Convert scalars
+  pb_ws->set_initial_primal_weight(static_cast<double>(ws.initial_primal_weight_));
+  pb_ws->set_initial_step_size(static_cast<double>(ws.initial_step_size_));
+  pb_ws->set_total_pdlp_iterations(ws.total_pdlp_iterations_);
+  pb_ws->set_total_pdhg_iterations(ws.total_pdhg_iterations_);
+  pb_ws->set_last_candidate_kkt_score(static_cast<double>(ws.last_candidate_kkt_score_));
+  pb_ws->set_last_restart_kkt_score(static_cast<double>(ws.last_restart_kkt_score_));
+  pb_ws->set_sum_solution_weight(static_cast<double>(ws.sum_solution_weight_));
+  pb_ws->set_iterations_since_last_restart(ws.iterations_since_last_restart_);
+}
+
+// Convert protobuf to PDLP warm start data
+template <typename i_t, typename f_t>
+static pdlp_warm_start_data_t<i_t, f_t> protobuf_to_warm_start(
+  const cuopt::remote::PDLPWarmStartData& pb_ws)
+{
+  pdlp_warm_start_data_t<i_t, f_t> ws;
+
+  // Convert vectors
+  ws.current_primal_solution_.reserve(pb_ws.current_primal_solution_size());
+  for (int i = 0; i < pb_ws.current_primal_solution_size(); ++i) {
+    ws.current_primal_solution_.push_back(static_cast<f_t>(pb_ws.current_primal_solution(i)));
+  }
+
+  ws.current_dual_solution_.reserve(pb_ws.current_dual_solution_size());
+  for (int i = 0; i < pb_ws.current_dual_solution_size(); ++i) {
+    ws.current_dual_solution_.push_back(static_cast<f_t>(pb_ws.current_dual_solution(i)));
+  }
+
+  ws.initial_primal_average_.reserve(pb_ws.initial_primal_average_size());
+  for (int i = 0; i < pb_ws.initial_primal_average_size(); ++i) {
+    ws.initial_primal_average_.push_back(static_cast<f_t>(pb_ws.initial_primal_average(i)));
+  }
+
+  ws.initial_dual_average_.reserve(pb_ws.initial_dual_average_size());
+  for (int i = 0; i < pb_ws.initial_dual_average_size(); ++i) {
+    ws.initial_dual_average_.push_back(static_cast<f_t>(pb_ws.initial_dual_average(i)));
+  }
+
+  ws.current_ATY_.reserve(pb_ws.current_aty_size());
+  for (int i = 0; i < pb_ws.current_aty_size(); ++i) {
+    ws.current_ATY_.push_back(static_cast<f_t>(pb_ws.current_aty(i)));
+  }
+
+  ws.sum_primal_solutions_.reserve(pb_ws.sum_primal_solutions_size());
+  for (int i = 0; i < pb_ws.sum_primal_solutions_size(); ++i) {
+    ws.sum_primal_solutions_.push_back(static_cast<f_t>(pb_ws.sum_primal_solutions(i)));
+  }
+
+  ws.sum_dual_solutions_.reserve(pb_ws.sum_dual_solutions_size());
+  for (int i = 0; i < pb_ws.sum_dual_solutions_size(); ++i) {
+    ws.sum_dual_solutions_.push_back(static_cast<f_t>(pb_ws.sum_dual_solutions(i)));
+  }
+
+  ws.last_restart_duality_gap_primal_solution_.reserve(
+    pb_ws.last_restart_duality_gap_primal_solution_size());
+  for (int i = 0; i < pb_ws.last_restart_duality_gap_primal_solution_size(); ++i) {
+    ws.last_restart_duality_gap_primal_solution_.push_back(
+      static_cast<f_t>(pb_ws.last_restart_duality_gap_primal_solution(i)));
+  }
+
+  ws.last_restart_duality_gap_dual_solution_.reserve(
+    pb_ws.last_restart_duality_gap_dual_solution_size());
+  for (int i = 0; i < pb_ws.last_restart_duality_gap_dual_solution_size(); ++i) {
+    ws.last_restart_duality_gap_dual_solution_.push_back(
+      static_cast<f_t>(pb_ws.last_restart_duality_gap_dual_solution(i)));
+  }
+
+  // Convert scalars
+  ws.initial_primal_weight_         = static_cast<f_t>(pb_ws.initial_primal_weight());
+  ws.initial_step_size_             = static_cast<f_t>(pb_ws.initial_step_size());
+  ws.total_pdlp_iterations_         = pb_ws.total_pdlp_iterations();
+  ws.total_pdhg_iterations_         = pb_ws.total_pdhg_iterations();
+  ws.last_candidate_kkt_score_      = static_cast<f_t>(pb_ws.last_candidate_kkt_score());
+  ws.last_restart_kkt_score_        = static_cast<f_t>(pb_ws.last_restart_kkt_score());
+  ws.sum_solution_weight_           = static_cast<f_t>(pb_ws.sum_solution_weight());
+  ws.iterations_since_last_restart_ = pb_ws.iterations_since_last_restart();
+
+  return ws;
+}
+
+// Convert LP solution to protobuf
+template <typename i_t, typename f_t>
+static void lp_solution_to_protobuf(optimization_problem_solution_t<i_t, f_t>& solution,
+                                    cuopt::remote::LPSolution* pb_solution)
+{
+  // Solution vectors
+  for (const auto& val : solution.get_primal_solution()) {
+    pb_solution->add_primal_solution(static_cast<double>(val));
+  }
+  for (const auto& val : solution.get_dual_solution()) {
+    pb_solution->add_dual_solution(static_cast<double>(val));
+  }
+  for (const auto& val : solution.get_reduced_cost()) {
+    pb_solution->add_reduced_cost(static_cast<double>(val));
+  }
+
+  // Warm start data
+  const auto& ws = solution.get_pdlp_warm_start_data();
+  warm_start_to_protobuf<i_t, f_t>(ws, pb_solution->mutable_warm_start_data());
+
+  // Termination status
+  pb_solution->set_termination_status(
+    static_cast<cuopt::remote::PDLPTerminationStatus>(solution.get_termination_status()));
+
+  // Solution statistics
+  const auto& stats = solution.get_additional_termination_information();
+  pb_solution->set_l2_primal_residual(stats.l2_primal_residual);
+  pb_solution->set_l2_dual_residual(stats.l2_dual_residual);
+  pb_solution->set_primal_objective(stats.primal_objective);
+  pb_solution->set_dual_objective(stats.dual_objective);
+  pb_solution->set_gap(stats.gap);
+  pb_solution->set_nb_iterations(stats.number_of_steps_taken);
+  pb_solution->set_solve_time(stats.solve_time);
+  pb_solution->set_solved_by_pdlp(stats.solved_by_pdlp);
+}
+
+// Convert protobuf to LP solution
+template <typename i_t, typename f_t>
+static optimization_problem_solution_t<i_t, f_t> protobuf_to_lp_solution(
+  const cuopt::remote::LPSolution& pb_solution)
+{
+  // Convert solution vectors
+  std::vector<f_t> primal_solution;
+  std::vector<f_t> dual_solution;
+  std::vector<f_t> reduced_cost;
+
+  primal_solution.reserve(pb_solution.primal_solution_size());
+  for (int i = 0; i < pb_solution.primal_solution_size(); ++i) {
+    primal_solution.push_back(static_cast<f_t>(pb_solution.primal_solution(i)));
+  }
+
+  dual_solution.reserve(pb_solution.dual_solution_size());
+  for (int i = 0; i < pb_solution.dual_solution_size(); ++i) {
+    dual_solution.push_back(static_cast<f_t>(pb_solution.dual_solution(i)));
+  }
+
+  reduced_cost.reserve(pb_solution.reduced_cost_size());
+  for (int i = 0; i < pb_solution.reduced_cost_size(); ++i) {
+    reduced_cost.push_back(static_cast<f_t>(pb_solution.reduced_cost(i)));
+  }
+
+  // Convert warm start data
+  pdlp_warm_start_data_t<i_t, f_t> warm_start_data;
+  if (pb_solution.has_warm_start_data()) {
+    warm_start_data = protobuf_to_warm_start<i_t, f_t>(pb_solution.warm_start_data());
+  }
+
+  // Convert solution statistics
+  typename optimization_problem_solution_t<i_t, f_t>::additional_termination_information_t stats{};
+  stats.l2_primal_residual    = pb_solution.l2_primal_residual();
+  stats.l2_dual_residual      = pb_solution.l2_dual_residual();
+  stats.primal_objective      = pb_solution.primal_objective();
+  stats.dual_objective        = pb_solution.dual_objective();
+  stats.gap                   = pb_solution.gap();
+  stats.number_of_steps_taken = pb_solution.nb_iterations();
+  stats.solve_time            = pb_solution.solve_time();
+  stats.solved_by_pdlp        = pb_solution.solved_by_pdlp();
+
+  // Create solution
+  return optimization_problem_solution_t<i_t, f_t>(
+    std::move(primal_solution),
+    std::move(dual_solution),
+    std::move(reduced_cost),
+    std::move(warm_start_data),
+    "",                          // objective_name
+    std::vector<std::string>(),  // var_names
+    std::vector<std::string>(),  // row_names
+    stats,
+    static_cast<pdlp_termination_status_t>(pb_solution.termination_status()));
+}
+
+// Convert protobuf to MIP solution
+template <typename i_t, typename f_t>
+static mip_solution_t<i_t, f_t> protobuf_to_mip_solution(
+  const cuopt::remote::MIPSolution& pb_solution)
+{
+  // Convert solution vector
+  std::vector<f_t> solution;
+  solution.reserve(pb_solution.solution_size());
+  for (int i = 0; i < pb_solution.solution_size(); ++i) {
+    solution.push_back(static_cast<f_t>(pb_solution.solution(i)));
+  }
+
+  // Convert solver stats
+  solver_stats_t<i_t, f_t> stats;
+  stats.total_solve_time       = static_cast<f_t>(pb_solution.total_solve_time());
+  stats.presolve_time          = static_cast<f_t>(pb_solution.presolve_time());
+  stats.solution_bound         = static_cast<f_t>(pb_solution.solution_bound());
+  stats.num_nodes              = static_cast<i_t>(pb_solution.nodes());
+  stats.num_simplex_iterations = static_cast<i_t>(pb_solution.simplex_iterations());
+
+  // Create solution
+  auto mip_sol = mip_solution_t<i_t, f_t>(
+    std::move(solution),
+    std::vector<std::string>(),  // var_names
+    static_cast<f_t>(pb_solution.objective()),
+    static_cast<f_t>(pb_solution.mip_gap()),
+    static_cast<mip_termination_status_t>(pb_solution.termination_status()),
+    static_cast<f_t>(pb_solution.max_constraint_violation()),
+    static_cast<f_t>(pb_solution.max_int_violation()),
+    static_cast<f_t>(pb_solution.max_variable_bound_violation()),
+    stats);
+
+  // Print solution stats using shared method
+  mip_sol.print_solution_stats();
+
+  return mip_sol;
+}
+
+// Check if remote solve is enabled via environment variables
+bool is_remote_solve_enabled(const char** host, const char** port)
+{
+  *host = std::getenv("CUOPT_REMOTE_HOST");
+  *port = std::getenv("CUOPT_REMOTE_PORT");
+  return (*host != nullptr && *port != nullptr);
+}
+
+// Check if sync mode is enabled (default is async)
+static bool use_sync_mode()
+{
+  const char* sync_env = std::getenv("CUOPT_REMOTE_USE_SYNC");
+  return (sync_env != nullptr && std::string(sync_env) == "1");
+}
+
+// Helper: Create and connect socket
+static int connect_to_server(const char* host, const char* port)
+{
+  int sockfd = socket(AF_INET, SOCK_STREAM, 0);
+  if (sockfd < 0) { throw std::runtime_error("Failed to create socket"); }
+
+  struct hostent* server = gethostbyname(host);
+  if (server == nullptr) {
+    close(sockfd);
+    throw std::runtime_error("Failed to resolve hostname");
+  }
+
+  struct sockaddr_in serv_addr;
+  std::memset(&serv_addr, 0, sizeof(serv_addr));
+  serv_addr.sin_family = AF_INET;
+  std::memcpy(&serv_addr.sin_addr.s_addr, server->h_addr, server->h_length);
+  serv_addr.sin_port = htons(std::atoi(port));
+
+  if (connect(sockfd, (struct sockaddr*)&serv_addr, sizeof(serv_addr)) < 0) {
+    close(sockfd);
+    throw std::runtime_error("Failed to connect to remote server");
+  }
+
+  return sockfd;
+}
+
+// Helper: Submit a job to the async server
+static std::string submit_job(const char* host,
+                              const char* port,
+                              const cuopt::remote::AsyncRequest& request)
+{
+  int sockfd = connect_to_server(host, port);
+
+  try {
+    // Serialize and send request
+    std::string request_data = request.SerializeAsString();
+    uint32_t request_size    = static_cast<uint32_t>(request_data.size());
+
+    write_all(sockfd, &request_size, sizeof(request_size));
+    write_all(sockfd, request_data.data(), request_data.size());
+
+    // Read response
+    uint32_t response_size;
+    read_all(sockfd, &response_size, sizeof(response_size));
+
+    std::vector<uint8_t> response_data(response_size);
+    read_all(sockfd, response_data.data(), response_size);
+
+    close(sockfd);
+
+    // Parse response
+    cuopt::remote::AsyncResponse async_response;
+    if (!async_response.ParseFromArray(response_data.data(), response_size)) {
+      throw std::runtime_error("Failed to parse AsyncResponse");
+    }
+
+    if (!async_response.has_submit_response()) {
+      throw std::runtime_error("AsyncResponse does not contain submit_response");
+    }
+
+    const auto& submit_resp = async_response.submit_response();
+    if (submit_resp.status() != cuopt::remote::SUCCESS) {
+      throw std::runtime_error("Job submission failed: " + submit_resp.message());
+    }
+
+    return submit_resp.job_id();
+  } catch (...) {
+    close(sockfd);
+    throw;
+  }
+}
+
+// Helper: Poll job status until complete
+static void poll_until_complete(const char* host, const char* port, const std::string& job_id)
+{
+  while (true) {
+    int sockfd = connect_to_server(host, port);
+
+    try {
+      // Create status request
+      cuopt::remote::AsyncRequest status_request;
+      status_request.set_request_type(cuopt::remote::CHECK_STATUS);
+      status_request.set_job_id(job_id);
+
+      // Send request
+      std::string request_data = status_request.SerializeAsString();
+      uint32_t request_size    = static_cast<uint32_t>(request_data.size());
+
+      write_all(sockfd, &request_size, sizeof(request_size));
+      write_all(sockfd, request_data.data(), request_data.size());
+
+      // Read response
+      uint32_t response_size;
+      read_all(sockfd, &response_size, sizeof(response_size));
+
+      std::vector<uint8_t> response_data(response_size);
+      read_all(sockfd, response_data.data(), response_size);
+
+      close(sockfd);
+
+      // Parse response
+      cuopt::remote::AsyncResponse async_response;
+      if (!async_response.ParseFromArray(response_data.data(), response_size)) {
+        throw std::runtime_error("Failed to parse status response");
+      }
+
+      if (!async_response.has_status_response()) {
+        throw std::runtime_error("AsyncResponse does not contain status_response");
+      }
+
+      const auto& status_resp = async_response.status_response();
+
+      if (status_resp.job_status() == cuopt::remote::COMPLETED) {
+        return;  // Job is done
+      } else if (status_resp.job_status() == cuopt::remote::FAILED) {
+        throw std::runtime_error("Job failed: " + status_resp.message());
+      }
+
+      // Job still pending/running, wait a bit before polling again
+      usleep(100000);  // 100ms
+    } catch (...) {
+      close(sockfd);
+      throw;
+    }
+  }
+}
+
+// Helper: Get result from completed job
+static cuopt::remote::ResultResponse get_result(const char* host,
+                                                const char* port,
+                                                const std::string& job_id)
+{
+  int sockfd = connect_to_server(host, port);
+
+  try {
+    // Create get result request
+    cuopt::remote::AsyncRequest result_request;
+    result_request.set_request_type(cuopt::remote::GET_RESULT);
+    result_request.set_job_id(job_id);
+
+    // Send request
+    std::string request_data = result_request.SerializeAsString();
+    uint32_t request_size    = static_cast<uint32_t>(request_data.size());
+
+    write_all(sockfd, &request_size, sizeof(request_size));
+    write_all(sockfd, request_data.data(), request_data.size());
+
+    // Read response
+    uint32_t response_size;
+    read_all(sockfd, &response_size, sizeof(response_size));
+
+    std::vector<uint8_t> response_data(response_size);
+    read_all(sockfd, response_data.data(), response_size);
+
+    close(sockfd);
+
+    // Parse response
+    cuopt::remote::AsyncResponse async_response;
+    if (!async_response.ParseFromArray(response_data.data(), response_size)) {
+      throw std::runtime_error("Failed to parse result response");
+    }
+
+    if (!async_response.has_result_response()) {
+      throw std::runtime_error("AsyncResponse does not contain result_response");
+    }
+
+    return async_response.result_response();
+  } catch (...) {
+    close(sockfd);
+    throw;
+  }
+}
+
+// Helper: Delete job after retrieving result
+static void delete_job(const char* host, const char* port, const std::string& job_id)
+{
+  int sockfd = connect_to_server(host, port);
+
+  try {
+    // Create delete request
+    cuopt::remote::AsyncRequest delete_request;
+    delete_request.set_request_type(cuopt::remote::DELETE_RESULT);
+    delete_request.set_job_id(job_id);
+
+    // Send request
+    std::string request_data = delete_request.SerializeAsString();
+    uint32_t request_size    = static_cast<uint32_t>(request_data.size());
+
+    write_all(sockfd, &request_size, sizeof(request_size));
+    write_all(sockfd, request_data.data(), request_data.size());
+
+    // Read response (but don't need to check it)
+    uint32_t response_size;
+    read_all(sockfd, &response_size, sizeof(response_size));
+
+    std::vector<uint8_t> response_data(response_size);
+    read_all(sockfd, response_data.data(), response_size);
+
+    close(sockfd);
+  } catch (...) {
+    close(sockfd);
+    throw;
+  }
+}
+
+// Helper: Submit job in sync mode (blocking) and get result directly
+static cuopt::remote::ResultResponse submit_job_sync(const char* host,
+                                                     const char* port,
+                                                     const cuopt::remote::AsyncRequest& request)
+{
+  int sockfd = connect_to_server(host, port);
+
+  try {
+    // Serialize and send request
+    std::string request_data = request.SerializeAsString();
+    uint32_t request_size    = static_cast<uint32_t>(request_data.size());
+
+    write_all(sockfd, &request_size, sizeof(request_size));
+    write_all(sockfd, request_data.data(), request_data.size());
+
+    // Read response
+    uint32_t response_size;
+    read_all(sockfd, &response_size, sizeof(response_size));
+
+    std::vector<uint8_t> response_data(response_size);
+    read_all(sockfd, response_data.data(), response_size);
+
+    close(sockfd);
+
+    // Parse response
+    cuopt::remote::AsyncResponse async_response;
+    if (!async_response.ParseFromArray(response_data.data(), response_size)) {
+      throw std::runtime_error("Failed to parse AsyncResponse");
+    }
+
+    if (!async_response.has_result_response()) {
+      throw std::runtime_error("AsyncResponse does not contain result_response (sync mode)");
+    }
+
+    return async_response.result_response();
+  } catch (...) {
+    close(sockfd);
+    throw;
+  }
+}
+
+// Solve LP problem remotely using async server (sync or async mode)
+template <typename i_t, typename f_t>
+optimization_problem_solution_t<i_t, f_t> solve_lp_remote(
+  const optimization_problem_t<i_t, f_t>& problem, const pdlp_solver_settings_t<i_t, f_t>& settings)
+{
+  // Get remote host and port
+  const char* host;
+  const char* port;
+  if (!is_remote_solve_enabled(&host, &port)) {
+    throw std::runtime_error("Remote solve not enabled (CUOPT_REMOTE_HOST/PORT not set)");
+  }
+
+  // Check if sync or async mode
+  const bool sync_mode = use_sync_mode();
+  fprintf(stderr,
+          "[solve_lp_remote] Connecting to %s:%s (async server, %s mode)\n",
+          host,
+          port,
+          sync_mode ? "sync" : "async");
+
+  try {
+    // Create AsyncRequest
+    cuopt::remote::AsyncRequest request;
+    request.set_request_type(cuopt::remote::SUBMIT_JOB);
+    request.set_blocking(sync_mode);
+
+    // Add LP problem data
+    auto* lp_request = request.mutable_lp_request();
+
+    // Set header
+    auto* header = lp_request->mutable_header();
+    header->set_version(1);
+    header->set_problem_type(cuopt::remote::LP);
+    header->set_index_type(sizeof(i_t) == 4 ? cuopt::remote::INT32 : cuopt::remote::INT64);
+    header->set_float_type(sizeof(f_t) == 4 ? cuopt::remote::FLOAT32 : cuopt::remote::DOUBLE);
+
+    // Convert problem to protobuf
+    problem_to_protobuf(problem, lp_request->mutable_problem());
+
+    cuopt::remote::ResultResponse result_response;
+
+    if (sync_mode) {
+      // ===== SYNC MODE: Submit with blocking=true, get result directly =====
+      fprintf(stderr, "[solve_lp_remote] Sending blocking request, waiting for solution...\n");
+      result_response = submit_job_sync(host, port, request);
+      fprintf(stderr, "[solve_lp_remote] Solution received from sync request\n");
+
+    } else {
+      // ===== ASYNC MODE: Submit → Poll → Get Result → Delete =====
+      std::string job_id = submit_job(host, port, request);
+      fprintf(stderr, "[solve_lp_remote] Job submitted, ID: %s\n", job_id.c_str());
+
+      // Poll until job completes
+      fprintf(stderr, "[solve_lp_remote] Polling for completion...\n");
+      poll_until_complete(host, port, job_id);
+
+      // Get result
+      fprintf(stderr, "[solve_lp_remote] Job complete, retrieving result...\n");
+      result_response = get_result(host, port, job_id);
+
+      // Delete job
+      delete_job(host, port, job_id);
+      fprintf(stderr, "[solve_lp_remote] Job deleted from server\n");
+    }
+
+    // Check result status
+    if (result_response.status() != cuopt::remote::SUCCESS) {
+      throw std::runtime_error("Remote solve failed: " + result_response.error_message());
+    }
+
+    if (!result_response.has_lp_solution()) {
+      throw std::runtime_error("ResultResponse does not contain LP solution");
+    }
+
+    fprintf(stderr, "[solve_lp_remote] Solution received successfully\n");
+
+    // Convert protobuf solution to C++ solution
+    auto lp_sol = protobuf_to_lp_solution<i_t, f_t>(result_response.lp_solution());
+
+    // Print solution stats using shared method
+    lp_sol.print_solution_stats();
+
+    return lp_sol;
+
+  } catch (...) {
+    throw;
+  }
+}
+
+// Solve MIP problem remotely using async server (sync or async mode)
+template <typename i_t, typename f_t>
+mip_solution_t<i_t, f_t> solve_mip_remote(const optimization_problem_t<i_t, f_t>& problem,
+                                          const mip_solver_settings_t<i_t, f_t>& settings)
+{
+  // Get remote host and port
+  const char* host;
+  const char* port;
+  if (!is_remote_solve_enabled(&host, &port)) {
+    throw std::runtime_error("Remote solve not enabled (CUOPT_REMOTE_HOST/PORT not set)");
+  }
+
+  // Check if sync or async mode
+  const bool sync_mode = use_sync_mode();
+  fprintf(stderr,
+          "[solve_mip_remote] Connecting to %s:%s (async server, %s mode)\n",
+          host,
+          port,
+          sync_mode ? "sync" : "async");
+
+  try {
+    // Create AsyncRequest
+    cuopt::remote::AsyncRequest request;
+    request.set_request_type(cuopt::remote::SUBMIT_JOB);
+    request.set_blocking(sync_mode);
+
+    // Add MIP problem data
+    auto* mip_request = request.mutable_mip_request();
+
+    // Set header
+    auto* header = mip_request->mutable_header();
+    header->set_version(1);
+    header->set_problem_type(cuopt::remote::MIP);
+    header->set_index_type(sizeof(i_t) == 4 ? cuopt::remote::INT32 : cuopt::remote::INT64);
+    header->set_float_type(sizeof(f_t) == 4 ? cuopt::remote::FLOAT32 : cuopt::remote::DOUBLE);
+
+    // Convert problem to protobuf
+    problem_to_protobuf(problem, mip_request->mutable_problem());
+
+    cuopt::remote::ResultResponse result_response;
+
+    if (sync_mode) {
+      // ===== SYNC MODE: Submit with blocking=true, get result directly =====
+      fprintf(stderr, "[solve_mip_remote] Sending blocking request, waiting for solution...\n");
+      result_response = submit_job_sync(host, port, request);
+      fprintf(stderr, "[solve_mip_remote] Solution received from sync request\n");
+
+    } else {
+      // ===== ASYNC MODE: Submit → Poll → Get Result → Delete =====
+      std::string job_id = submit_job(host, port, request);
+      fprintf(stderr, "[solve_mip_remote] Job submitted, ID: %s\n", job_id.c_str());
+
+      // Poll until job completes
+      fprintf(stderr, "[solve_mip_remote] Polling for completion...\n");
+      poll_until_complete(host, port, job_id);
+
+      // Get result
+      fprintf(stderr, "[solve_mip_remote] Job complete, retrieving result...\n");
+      result_response = get_result(host, port, job_id);
+
+      // Delete job
+      delete_job(host, port, job_id);
+      fprintf(stderr, "[solve_mip_remote] Job deleted from server\n");
+    }
+
+    // Check result status
+    if (result_response.status() != cuopt::remote::SUCCESS) {
+      throw std::runtime_error("Remote solve failed: " + result_response.error_message());
+    }
+
+    if (!result_response.has_mip_solution()) {
+      throw std::runtime_error("ResultResponse does not contain MIP solution");
+    }
+
+    fprintf(stderr, "[solve_mip_remote] Solution received successfully\n");
+
+    // Convert protobuf solution to C++ solution (printing happens inside)
+    return protobuf_to_mip_solution<i_t, f_t>(result_response.mip_solution());
+
+  } catch (...) {
+    throw;
+  }
+}
+
+// Explicit template instantiations for double precision
+#if MIP_INSTANTIATE_DOUBLE
+template optimization_problem_solution_t<int, double> solve_lp_remote(
+  const optimization_problem_t<int, double>&, const pdlp_solver_settings_t<int, double>&);
+
+template mip_solution_t<int, double> solve_mip_remote(const optimization_problem_t<int, double>&,
+                                                      const mip_solver_settings_t<int, double>&);
+#endif
+
+// Explicit template instantiations for float precision (if enabled)
+#if MIP_INSTANTIATE_FLOAT
+template optimization_problem_solution_t<int, float> solve_lp_remote(
+  const optimization_problem_t<int, float>&, const pdlp_solver_settings_t<int, float>&);
+
+template mip_solution_t<int, float> solve_mip_remote(const optimization_problem_t<int, float>&,
+                                                     const mip_solver_settings_t<int, float>&);
+#endif
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/src/mip/presolve/third_party_presolve.cpp b/cpp/src/mip/presolve/third_party_presolve.cpp
index 2747fa037..4b2a386f3 100644
--- a/cpp/src/mip/presolve/third_party_presolve.cpp
+++ b/cpp/src/mip/presolve/third_party_presolve.cpp
@@ -36,7 +36,7 @@ static papilo::PostsolveStorage<double> post_solve_storage_;
 static bool maximize_ = false;
 
 template <typename i_t, typename f_t>
-papilo::Problem<f_t> build_papilo_problem(const optimization_problem_t<i_t, f_t>& op_problem,
+papilo::Problem<f_t> build_papilo_problem(const gpu_optimization_problem_t<i_t, f_t>& op_problem,
                                           problem_category_t category)
 {
   raft::common::nvtx::range fun_scope("Build papilo problem");
@@ -194,11 +194,11 @@ papilo::Problem<f_t> build_papilo_problem(const optimization_problem_t<i_t, f_t>
 }
 
 template <typename i_t, typename f_t>
-optimization_problem_t<i_t, f_t> build_optimization_problem(
+gpu_optimization_problem_t<i_t, f_t> build_optimization_problem(
   papilo::Problem<f_t> const& papilo_problem, raft::handle_t const* handle_ptr)
 {
   raft::common::nvtx::range fun_scope("Build optimization problem");
-  optimization_problem_t<i_t, f_t> op_problem(handle_ptr);
+  gpu_optimization_problem_t<i_t, f_t> op_problem(handle_ptr);
 
   auto obj = papilo_problem.getObjective();
   op_problem.set_objective_offset(maximize_ ? -obj.offset : obj.offset);
@@ -391,8 +391,8 @@ void set_presolve_parameters(papilo::Presolve<f_t>& presolver,
 }
 
 template <typename i_t, typename f_t>
-std::pair<optimization_problem_t<i_t, f_t>, bool> third_party_presolve_t<i_t, f_t>::apply(
-  optimization_problem_t<i_t, f_t> const& op_problem,
+std::pair<gpu_optimization_problem_t<i_t, f_t>, bool> third_party_presolve_t<i_t, f_t>::apply(
+  gpu_optimization_problem_t<i_t, f_t> const& op_problem,
   problem_category_t category,
   bool dual_postsolve,
   f_t absolute_tolerance,
@@ -423,7 +423,7 @@ std::pair<optimization_problem_t<i_t, f_t>, bool> third_party_presolve_t<i_t, f_
   check_presolve_status(result.status);
   if (result.status == papilo::PresolveStatus::kInfeasible ||
       result.status == papilo::PresolveStatus::kUnbndOrInfeas) {
-    return std::make_pair(optimization_problem_t<i_t, f_t>(op_problem.get_handle_ptr()), false);
+    return std::make_pair(gpu_optimization_problem_t<i_t, f_t>(op_problem.get_handle_ptr()), false);
   }
   post_solve_storage_ = result.postsolve;
   CUOPT_LOG_INFO("Presolve removed: %d constraints, %d variables, %d nonzeros",
diff --git a/cpp/src/mip/presolve/third_party_presolve.hpp b/cpp/src/mip/presolve/third_party_presolve.hpp
index 6e5092de2..e70af84bc 100644
--- a/cpp/src/mip/presolve/third_party_presolve.hpp
+++ b/cpp/src/mip/presolve/third_party_presolve.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include <cuopt/linear_programming/gpu_optimization_problem.hpp>
 #include <cuopt/linear_programming/optimization_problem.hpp>
 
 namespace cuopt::linear_programming::detail {
@@ -26,8 +27,8 @@ class third_party_presolve_t {
  public:
   third_party_presolve_t() = default;
 
-  std::pair<optimization_problem_t<i_t, f_t>, bool> apply(
-    optimization_problem_t<i_t, f_t> const& op_problem,
+  std::pair<gpu_optimization_problem_t<i_t, f_t>, bool> apply(
+    gpu_optimization_problem_t<i_t, f_t> const& op_problem,
     problem_category_t category,
     bool dual_postsolve,
     f_t absolute_tolerance,
diff --git a/cpp/src/mip/problem/presolve_data.cuh b/cpp/src/mip/problem/presolve_data.cuh
index e9ba5f3b3..2a3e792f4 100644
--- a/cpp/src/mip/problem/presolve_data.cuh
+++ b/cpp/src/mip/problem/presolve_data.cuh
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include <cuopt/linear_programming/gpu_optimization_problem.hpp>
 #include <cuopt/linear_programming/optimization_problem.hpp>
 
 #include <thrust/sequence.h>
@@ -32,7 +33,7 @@ class problem_t;
 template <typename i_t, typename f_t>
 class presolve_data_t {
  public:
-  presolve_data_t(const optimization_problem_t<i_t, f_t>& problem, rmm::cuda_stream_view stream)
+  presolve_data_t(const gpu_optimization_problem_t<i_t, f_t>& problem, rmm::cuda_stream_view stream)
     : variable_offsets(problem.get_n_variables(), 0),
       additional_var_used(problem.get_n_variables(), false),
       additional_var_id_per_var(problem.get_n_variables(), -1),
diff --git a/cpp/src/mip/problem/problem.cu b/cpp/src/mip/problem/problem.cu
index 4c9deeda0..33eef2bc1 100644
--- a/cpp/src/mip/problem/problem.cu
+++ b/cpp/src/mip/problem/problem.cu
@@ -53,7 +53,7 @@
 namespace cuopt::linear_programming::detail {
 
 template <typename i_t, typename f_t>
-void problem_t<i_t, f_t>::op_problem_cstr_body(const optimization_problem_t<i_t, f_t>& problem_)
+void problem_t<i_t, f_t>::op_problem_cstr_body(const gpu_optimization_problem_t<i_t, f_t>& problem_)
 {
   // Mark the problem as empty if the op_problem has an empty matrix.
   if (problem_.get_constraint_matrix_values().is_empty()) {
@@ -103,7 +103,7 @@ void problem_t<i_t, f_t>::op_problem_cstr_body(const optimization_problem_t<i_t,
 
 template <typename i_t, typename f_t>
 problem_t<i_t, f_t>::problem_t(
-  const optimization_problem_t<i_t, f_t>& problem_,
+  const gpu_optimization_problem_t<i_t, f_t>& problem_,
   const typename mip_solver_settings_t<i_t, f_t>::tolerances_t tolerances_)
   : original_problem_ptr(&problem_),
     handle_ptr(problem_.get_handle_ptr()),
diff --git a/cpp/src/mip/problem/problem.cuh b/cpp/src/mip/problem/problem.cuh
index c210c3cfa..890583c12 100644
--- a/cpp/src/mip/problem/problem.cuh
+++ b/cpp/src/mip/problem/problem.cuh
@@ -26,6 +26,7 @@
 #include <mip/logger.hpp>
 #include <mip/relaxed_lp/lp_state.cuh>
 
+#include <cuopt/linear_programming/gpu_optimization_problem.hpp>
 #include <cuopt/linear_programming/mip/solver_settings.hpp>
 #include <cuopt/linear_programming/optimization_problem.hpp>
 #include <cuopt/linear_programming/utilities/internals.hpp>
@@ -57,7 +58,7 @@ constexpr bool USE_REL_TOLERANCE   = true;
 template <typename i_t, typename f_t>
 class problem_t {
  public:
-  problem_t(const optimization_problem_t<i_t, f_t>& problem,
+  problem_t(const gpu_optimization_problem_t<i_t, f_t>& problem,
             const typename mip_solver_settings_t<i_t, f_t>::tolerances_t tolerances_ = {});
   problem_t() = delete;
   // copy constructor
@@ -65,7 +66,7 @@ class problem_t {
   problem_t(const problem_t<i_t, f_t>& problem, bool no_deep_copy);
   problem_t(problem_t<i_t, f_t>&& problem) = default;
   problem_t& operator=(problem_t&&)        = default;
-  void op_problem_cstr_body(const optimization_problem_t<i_t, f_t>& problem_);
+  void op_problem_cstr_body(const gpu_optimization_problem_t<i_t, f_t>& problem_);
 
   problem_t<i_t, f_t> get_problem_after_fixing_vars(
     rmm::device_uvector<f_t>& assignment,
@@ -203,7 +204,7 @@ class problem_t {
 
   view_t view();
 
-  const optimization_problem_t<i_t, f_t>* original_problem_ptr;
+  const gpu_optimization_problem_t<i_t, f_t>* original_problem_ptr;
   const raft::handle_t* handle_ptr;
   std::shared_ptr<problem_t<i_t, f_t>> integer_fixed_problem = nullptr;
   rmm::device_uvector<i_t> integer_fixed_variable_map;
diff --git a/cpp/src/mip/relaxed_lp/relaxed_lp.cu b/cpp/src/mip/relaxed_lp/relaxed_lp.cu
index a32b3cf82..52165f66a 100644
--- a/cpp/src/mip/relaxed_lp/relaxed_lp.cu
+++ b/cpp/src/mip/relaxed_lp/relaxed_lp.cu
@@ -98,7 +98,20 @@ optimization_problem_solution_t<i_t, f_t> get_relaxed_lp_solution(
   if (solver_response.get_primal_solution().size() != 0 &&
       solver_response.get_dual_solution().size() != 0 && settings.save_state) {
     CUOPT_LOG_DEBUG("saving initial primal solution of size %d", lp_state.prev_primal.size());
-    lp_state.set_state(solver_response.get_primal_solution(), solver_response.get_dual_solution());
+    // Solution is already on host, copy to device for lp_state
+    rmm::device_uvector<f_t> device_primal(solver_response.get_primal_solution().size(),
+                                           op_problem.handle_ptr->get_stream());
+    rmm::device_uvector<f_t> device_dual(solver_response.get_dual_solution().size(),
+                                         op_problem.handle_ptr->get_stream());
+    raft::copy(device_primal.data(),
+               solver_response.get_primal_solution().data(),
+               solver_response.get_primal_solution().size(),
+               op_problem.handle_ptr->get_stream());
+    raft::copy(device_dual.data(),
+               solver_response.get_dual_solution().data(),
+               solver_response.get_dual_solution().size(),
+               op_problem.handle_ptr->get_stream());
+    lp_state.set_state(device_primal, device_dual);
   }
   if (solver_response.get_primal_solution().size() != 0) {
     // copy the solution no matter what, because in the worst case we are closer to the polytope
diff --git a/cpp/src/mip/solution/solution.cu b/cpp/src/mip/solution/solution.cu
index cdffa37b0..adffccef9 100644
--- a/cpp/src/mip/solution/solution.cu
+++ b/cpp/src/mip/solution/solution.cu
@@ -631,7 +631,16 @@ mip_solution_t<i_t, f_t> solution_t<i_t, f_t>::get_solution(bool output_feasible
     auto term_reason =
       not_optimal ? mip_termination_status_t::FeasibleFound : mip_termination_status_t::Optimal;
     if (is_problem_fully_reduced) { term_reason = mip_termination_status_t::Optimal; }
-    return mip_solution_t<i_t, f_t>(std::move(assignment),
+
+    // Convert device solution to host memory
+    std::vector<f_t> host_assignment(assignment.size());
+    raft::copy(host_assignment.data(),
+               assignment.data(),
+               assignment.size(),
+               handle_ptr->get_stream().value());
+    handle_ptr->sync_stream();
+
+    return mip_solution_t<i_t, f_t>(std::move(host_assignment),
                                     problem_ptr->var_names,
                                     h_user_obj,
                                     rel_mip_gap,
@@ -643,8 +652,7 @@ mip_solution_t<i_t, f_t> solution_t<i_t, f_t>::get_solution(bool output_feasible
   } else {
     return mip_solution_t<i_t, f_t>{is_problem_fully_reduced ? mip_termination_status_t::Infeasible
                                                              : mip_termination_status_t::TimeLimit,
-                                    stats,
-                                    handle_ptr->get_stream()};
+                                    stats};
   }
 }
 
diff --git a/cpp/src/mip/solve.cu b/cpp/src/mip/solve.cu
index 974cde0bc..90b093b66 100644
--- a/cpp/src/mip/solve.cu
+++ b/cpp/src/mip/solve.cu
@@ -33,10 +33,13 @@
 #include <utilities/timer.hpp>
 #include <utilities/version_info.hpp>
 
+#include <cuopt/linear_programming/gpu_optimization_problem.hpp>
 #include <cuopt/linear_programming/mip/solver_settings.hpp>
 #include <cuopt/linear_programming/mip/solver_solution.hpp>
 #include <cuopt/linear_programming/pdlp/pdlp_hyper_params.cuh>
 #include <cuopt/linear_programming/solve.hpp>
+#include <cuopt/linear_programming/utilities/problem_conversion.cuh>
+#include <cuopt/linear_programming/utilities/remote_solve.hpp>
 
 #include <mps_parser/mps_data_model.hpp>
 
@@ -149,9 +152,36 @@ mip_solution_t<i_t, f_t> run_mip(detail::problem_t<i_t, f_t>& problem,
 }
 
 template <typename i_t, typename f_t>
-mip_solution_t<i_t, f_t> solve_mip(optimization_problem_t<i_t, f_t>& op_problem,
+mip_solution_t<i_t, f_t> solve_mip(optimization_problem_t<i_t, f_t>& host_problem,
                                    mip_solver_settings_t<i_t, f_t> const& settings)
 {
+  // Check for remote solve environment variables
+  const char* remote_host = std::getenv("CUOPT_REMOTE_HOST");
+  const char* remote_port = std::getenv("CUOPT_REMOTE_PORT");
+
+  if (remote_host != nullptr && remote_port != nullptr) {
+    std::fprintf(stderr,
+                 "[solve_mip] Remote solve detected: CUOPT_REMOTE_HOST=%s, CUOPT_REMOTE_PORT=%s\n",
+                 remote_host,
+                 remote_port);
+    std::fflush(stderr);
+
+    try {
+      return solve_mip_remote(host_problem, settings);
+    } catch (const std::exception& e) {
+      std::fprintf(stderr, "[solve_mip] Remote solve failed: %s\n", e.what());
+      std::fprintf(stderr, "[solve_mip] Falling back to local solve\n");
+      std::fflush(stderr);
+      // Fall through to local solve
+    }
+  }
+
+  // Create RAFT handle for local GPU solve
+  raft::handle_t handle;
+
+  // Convert host problem to GPU problem for internal solving
+  auto gpu_problem = host_to_gpu_problem(&handle, host_problem);
+
   try {
     constexpr f_t max_time_limit = 1000000000;
     f_t time_limit =
@@ -167,36 +197,35 @@ mip_solution_t<i_t, f_t> solve_mip(optimization_problem_t<i_t, f_t>& op_problem,
     init_logger_t log(settings.log_file, settings.log_to_console);
     // Init libraies before to not include it in solve time
     // This needs to be called before pdlp is initialized
-    init_handler(op_problem.get_handle_ptr());
+    init_handler(gpu_problem.get_handle_ptr());
 
     print_version_info();
 
     raft::common::nvtx::range fun_scope("Running solver");
 
     // This is required as user might forget to set some fields
-    problem_checking_t<i_t, f_t>::check_problem_representation(op_problem);
-    problem_checking_t<i_t, f_t>::check_initial_solution_representation(op_problem, settings);
+    problem_checking_t<i_t, f_t>::check_problem_representation(gpu_problem);
+    problem_checking_t<i_t, f_t>::check_initial_solution_representation(gpu_problem, settings);
 
     CUOPT_LOG_INFO(
       "Solving a problem with %d constraints, %d variables (%d integers), and %d nonzeros",
-      op_problem.get_n_constraints(),
-      op_problem.get_n_variables(),
-      op_problem.get_n_integers(),
-      op_problem.get_nnz());
-    op_problem.print_scaling_information();
+      gpu_problem.get_n_constraints(),
+      gpu_problem.get_n_variables(),
+      gpu_problem.get_n_integers(),
+      gpu_problem.get_nnz());
+    gpu_problem.print_scaling_information();
 
     // Check for crossing bounds. Return infeasible if there are any
-    if (problem_checking_t<i_t, f_t>::has_crossing_bounds(op_problem)) {
+    if (problem_checking_t<i_t, f_t>::has_crossing_bounds(gpu_problem)) {
       return mip_solution_t<i_t, f_t>(mip_termination_status_t::Infeasible,
-                                      solver_stats_t<i_t, f_t>{},
-                                      op_problem.get_handle_ptr()->get_stream());
+                                      solver_stats_t<i_t, f_t>{});
     }
 
     auto timer = cuopt::timer_t(time_limit);
 
     double presolve_time = 0.0;
     std::unique_ptr<detail::third_party_presolve_t<i_t, f_t>> presolver;
-    detail::problem_t<i_t, f_t> problem(op_problem, settings.get_tolerances());
+    detail::problem_t<i_t, f_t> problem(gpu_problem, settings.get_tolerances());
 
     auto run_presolve = settings.presolve;
     run_presolve      = run_presolve && settings.get_mip_callbacks().empty();
@@ -209,8 +238,8 @@ mip_solution_t<i_t, f_t> solve_mip(optimization_problem_t<i_t, f_t>& op_problem,
       const double presolve_time_limit = std::min(0.1 * time_limit, 60.0);
       const bool dual_postsolve        = false;
       presolver = std::make_unique<detail::third_party_presolve_t<i_t, f_t>>();
-      auto [reduced_op_problem, feasible] =
-        presolver->apply(op_problem,
+      auto [reduced_gpu_problem, feasible] =
+        presolver->apply(gpu_problem,
                          cuopt::linear_programming::problem_category_t::MIP,
                          dual_postsolve,
                          settings.tolerances.absolute_tolerance,
@@ -219,21 +248,20 @@ mip_solution_t<i_t, f_t> solve_mip(optimization_problem_t<i_t, f_t>& op_problem,
                          settings.num_cpu_threads);
       if (!feasible) {
         return mip_solution_t<i_t, f_t>(mip_termination_status_t::Infeasible,
-                                        solver_stats_t<i_t, f_t>{},
-                                        op_problem.get_handle_ptr()->get_stream());
+                                        solver_stats_t<i_t, f_t>{});
       }
 
-      problem       = detail::problem_t<i_t, f_t>(reduced_op_problem);
+      problem       = detail::problem_t<i_t, f_t>(reduced_gpu_problem);
       presolve_time = timer.elapsed_time();
       CUOPT_LOG_INFO("Papilo presolve time: %f", presolve_time);
     }
     if (settings.user_problem_file != "") {
       CUOPT_LOG_INFO("Writing user problem to file: %s", settings.user_problem_file.c_str());
-      op_problem.write_to_mps(settings.user_problem_file);
+      gpu_problem.write_to_mps(settings.user_problem_file);
     }
 
     // this is for PDLP, i think this should be part of pdlp solver
-    setup_device_symbols(op_problem.get_handle_ptr()->get_stream());
+    setup_device_symbols(gpu_problem.get_handle_ptr()->get_stream());
 
     auto sol = run_mip(problem, settings, timer);
 
@@ -241,25 +269,25 @@ mip_solution_t<i_t, f_t> solve_mip(optimization_problem_t<i_t, f_t>& op_problem,
       auto status_to_skip = sol.get_termination_status() == mip_termination_status_t::TimeLimit ||
                             sol.get_termination_status() == mip_termination_status_t::Infeasible;
       auto primal_solution =
-        cuopt::device_copy(sol.get_solution(), op_problem.get_handle_ptr()->get_stream());
-      rmm::device_uvector<f_t> dual_solution(0, op_problem.get_handle_ptr()->get_stream());
-      rmm::device_uvector<f_t> reduced_costs(0, op_problem.get_handle_ptr()->get_stream());
+        cuopt::device_copy(sol.get_solution(), gpu_problem.get_handle_ptr()->get_stream());
+      rmm::device_uvector<f_t> dual_solution(0, gpu_problem.get_handle_ptr()->get_stream());
+      rmm::device_uvector<f_t> reduced_costs(0, gpu_problem.get_handle_ptr()->get_stream());
       presolver->undo(primal_solution,
                       dual_solution,
                       reduced_costs,
                       cuopt::linear_programming::problem_category_t::MIP,
                       status_to_skip,
-                      op_problem.get_handle_ptr()->get_stream());
+                      gpu_problem.get_handle_ptr()->get_stream());
       if (!status_to_skip) {
-        thrust::fill(rmm::exec_policy(op_problem.get_handle_ptr()->get_stream()),
+        thrust::fill(rmm::exec_policy(gpu_problem.get_handle_ptr()->get_stream()),
                      dual_solution.data(),
                      dual_solution.data() + dual_solution.size(),
                      std::numeric_limits<f_t>::signaling_NaN());
-        thrust::fill(rmm::exec_policy(op_problem.get_handle_ptr()->get_stream()),
+        thrust::fill(rmm::exec_policy(gpu_problem.get_handle_ptr()->get_stream()),
                      reduced_costs.data(),
                      reduced_costs.data() + reduced_costs.size(),
                      std::numeric_limits<f_t>::signaling_NaN());
-        detail::problem_t<i_t, f_t> full_problem(op_problem);
+        detail::problem_t<i_t, f_t> full_problem(gpu_problem);
         detail::solution_t<i_t, f_t> full_sol(full_problem);
         full_sol.copy_new_assignment(cuopt::host_copy(primal_solution));
         full_sol.compute_feasibility();
@@ -280,27 +308,25 @@ mip_solution_t<i_t, f_t> solve_mip(optimization_problem_t<i_t, f_t>& op_problem,
 
     if (settings.sol_file != "") {
       CUOPT_LOG_INFO("Writing solution to file %s", settings.sol_file.c_str());
-      sol.write_to_sol_file(settings.sol_file, op_problem.get_handle_ptr()->get_stream());
+      sol.write_to_sol_file(settings.sol_file);
     }
     return sol;
   } catch (const cuopt::logic_error& e) {
     CUOPT_LOG_ERROR("Error in solve_mip: %s", e.what());
-    return mip_solution_t<i_t, f_t>{e, op_problem.get_handle_ptr()->get_stream()};
+    return mip_solution_t<i_t, f_t>{e};
   } catch (const std::bad_alloc& e) {
     CUOPT_LOG_ERROR("Error in solve_mip: %s", e.what());
     return mip_solution_t<i_t, f_t>{
-      cuopt::logic_error("Memory allocation failed", cuopt::error_type_t::RuntimeError),
-      op_problem.get_handle_ptr()->get_stream()};
+      cuopt::logic_error("Memory allocation failed", cuopt::error_type_t::RuntimeError)};
   }
 }
 
 template <typename i_t, typename f_t>
 mip_solution_t<i_t, f_t> solve_mip(
-  raft::handle_t const* handle_ptr,
   const cuopt::mps_parser::mps_data_model_t<i_t, f_t>& mps_data_model,
   mip_solver_settings_t<i_t, f_t> const& settings)
 {
-  auto op_problem = mps_data_model_to_optimization_problem(handle_ptr, mps_data_model);
+  auto op_problem = mps_data_model_to_optimization_problem(mps_data_model);
   return solve_mip(op_problem, settings);
 }
 
@@ -310,7 +336,6 @@ mip_solution_t<i_t, f_t> solve_mip(
     mip_solver_settings_t<int, F_TYPE> const& settings);                    \
                                                                             \
   template mip_solution_t<int, F_TYPE> solve_mip(                           \
-    raft::handle_t const* handle_ptr,                                       \
     const cuopt::mps_parser::mps_data_model_t<int, F_TYPE>& mps_data_model, \
     mip_solver_settings_t<int, F_TYPE> const& settings);
 
diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu
index 0114882b0..ffefef011 100644
--- a/cpp/src/mip/solver.cu
+++ b/cpp/src/mip/solver.cu
@@ -138,10 +138,11 @@ solution_t<i_t, f_t> mip_solver_t<i_t, f_t>::run_solver()
     settings.method     = method_t::Concurrent;
 
     auto opt_sol = solve_lp_with_method<i_t, f_t>(
-      *context.problem_ptr->original_problem_ptr, *context.problem_ptr, settings, lp_timer);
+      *context.problem_ptr->original_problem_ptr, *context.problem_ptr, settings, lp_timer, false);
 
     solution_t<i_t, f_t> sol(*context.problem_ptr);
-    sol.copy_new_assignment(host_copy(opt_sol.get_primal_solution()));
+    // Solution is already on host, no need for host_copy
+    sol.copy_new_assignment(opt_sol.get_primal_solution());
     if (opt_sol.get_termination_status() == pdlp_termination_status_t::Optimal ||
         opt_sol.get_termination_status() == pdlp_termination_status_t::PrimalInfeasible ||
         opt_sol.get_termination_status() == pdlp_termination_status_t::DualInfeasible) {
diff --git a/cpp/src/mip/solver_solution.cu b/cpp/src/mip/solver_solution.cu
index f1f00dc80..0dd19d565 100644
--- a/cpp/src/mip/solver_solution.cu
+++ b/cpp/src/mip/solver_solution.cu
@@ -28,7 +28,7 @@
 namespace cuopt::linear_programming {
 
 template <typename i_t, typename f_t>
-mip_solution_t<i_t, f_t>::mip_solution_t(rmm::device_uvector<f_t> solution,
+mip_solution_t<i_t, f_t>::mip_solution_t(std::vector<f_t> solution,
                                          std::vector<std::string> var_names,
                                          f_t objective,
                                          f_t mip_gap,
@@ -37,7 +37,7 @@ mip_solution_t<i_t, f_t>::mip_solution_t(rmm::device_uvector<f_t> solution,
                                          f_t max_int_violation,
                                          f_t max_variable_bound_violation,
                                          solver_stats_t<i_t, f_t> stats,
-                                         std::vector<rmm::device_uvector<f_t>> solution_pool)
+                                         std::vector<std::vector<f_t>> solution_pool)
   : solution_(std::move(solution)),
     var_names_(std::move(var_names)),
     objective_(objective),
@@ -54,9 +54,8 @@ mip_solution_t<i_t, f_t>::mip_solution_t(rmm::device_uvector<f_t> solution,
 
 template <typename i_t, typename f_t>
 mip_solution_t<i_t, f_t>::mip_solution_t(mip_termination_status_t termination_status,
-                                         solver_stats_t<i_t, f_t> stats,
-                                         rmm::cuda_stream_view stream_view)
-  : solution_(0, stream_view),
+                                         solver_stats_t<i_t, f_t> stats)
+  : solution_(),
     objective_(0),
     mip_gap_(0),
     termination_status_(termination_status),
@@ -69,9 +68,8 @@ mip_solution_t<i_t, f_t>::mip_solution_t(mip_termination_status_t termination_st
 }
 
 template <typename i_t, typename f_t>
-mip_solution_t<i_t, f_t>::mip_solution_t(const cuopt::logic_error& error_status,
-                                         rmm::cuda_stream_view stream_view)
-  : solution_(0, stream_view),
+mip_solution_t<i_t, f_t>::mip_solution_t(const cuopt::logic_error& error_status)
+  : solution_(),
     objective_(0),
     mip_gap_(0),
     termination_status_(mip_termination_status_t::NoTermination),
@@ -89,13 +87,13 @@ const cuopt::logic_error& mip_solution_t<i_t, f_t>::get_error_status() const
 }
 
 template <typename i_t, typename f_t>
-const rmm::device_uvector<f_t>& mip_solution_t<i_t, f_t>::get_solution() const
+const std::vector<f_t>& mip_solution_t<i_t, f_t>::get_solution() const
 {
   return solution_;
 }
 
 template <typename i_t, typename f_t>
-rmm::device_uvector<f_t>& mip_solution_t<i_t, f_t>::get_solution()
+std::vector<f_t>& mip_solution_t<i_t, f_t>::get_solution()
 {
   return solution_;
 }
@@ -202,14 +200,13 @@ const std::vector<std::string>& mip_solution_t<i_t, f_t>::get_variable_names() c
 }
 
 template <typename i_t, typename f_t>
-const std::vector<rmm::device_uvector<f_t>>& mip_solution_t<i_t, f_t>::get_solution_pool() const
+const std::vector<std::vector<f_t>>& mip_solution_t<i_t, f_t>::get_solution_pool() const
 {
   return solution_pool_;
 }
 
 template <typename i_t, typename f_t>
-void mip_solution_t<i_t, f_t>::write_to_sol_file(std::string_view filename,
-                                                 rmm::cuda_stream_view stream_view) const
+void mip_solution_t<i_t, f_t>::write_to_sol_file(std::string_view filename) const
 {
   std::string status = get_termination_status_string();
   // Override for no termination
@@ -220,13 +217,9 @@ void mip_solution_t<i_t, f_t>::write_to_sol_file(std::string_view filename,
 
   double objective_value = get_objective_value();
   auto& var_names        = get_variable_names();
-  std::vector<f_t> solution;
-  solution.resize(solution_.size());
-  raft::copy(solution.data(), solution_.data(), solution_.size(), stream_view.value());
-  RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view.value()));
-
+  // Solution is already on host, no need to copy
   solution_writer_t::write_solution_to_sol_file(
-    std::string(filename), status, objective_value, var_names, solution);
+    std::string(filename), status, objective_value, var_names, solution_);
 }
 
 template <typename i_t, typename f_t>
@@ -243,6 +236,26 @@ void mip_solution_t<i_t, f_t>::log_summary() const
   CUOPT_LOG_INFO("Total Solve Time: %f", get_total_solve_time());
 }
 
+template <typename i_t, typename f_t>
+void mip_solution_t<i_t, f_t>::print_solution_stats() const
+{
+  fprintf(stderr,
+          "Solution objective: %f , relative_mip_gap %f solution_bound %f presolve_time %f "
+          "total_solve_time %f "
+          "max constraint violation %f max int violation %f max var bounds violation %f "
+          "nodes %d simplex_iterations %d\n",
+          get_objective_value(),
+          get_mip_gap(),
+          get_solution_bound(),
+          get_presolve_time(),
+          get_total_solve_time(),
+          get_max_constraint_violation(),
+          get_max_int_violation(),
+          get_max_variable_bound_violation(),
+          get_num_nodes(),
+          get_num_simplex_iterations());
+}
+
 #if MIP_INSTANTIATE_FLOAT
 template class mip_solution_t<int, float>;
 #endif
diff --git a/cpp/src/mip/utils.cuh b/cpp/src/mip/utils.cuh
index 47f1bbc48..4f1267915 100644
--- a/cpp/src/mip/utils.cuh
+++ b/cpp/src/mip/utils.cuh
@@ -330,6 +330,17 @@ void print_solution(const raft::handle_t* handle_ptr, const rmm::device_uvector<
   CUOPT_LOG_DEBUG("%s]", log_str.c_str());
 }
 
+// Overload for host solution
+template <typename f_t>
+void print_solution(const raft::handle_t* handle_ptr, const std::vector<f_t>& solution)
+{
+  std::string log_str{"sol: ["};
+  for (int i = 0; i < (int)solution.size(); i++) {
+    log_str.append(std::to_string(solution[i]) + ", ");
+  }
+  CUOPT_LOG_DEBUG("%s]", log_str.c_str());
+}
+
 template <typename f_t>
 bool has_nans(const raft::handle_t* handle_ptr, const rmm::device_uvector<f_t>& vec)
 {
diff --git a/cpp/tests/linear_programming/pdlp_test.cu b/cpp/tests/linear_programming/pdlp_test.cu
index 61b9115e4..dc3be1a4c 100644
--- a/cpp/tests/linear_programming/pdlp_test.cu
+++ b/cpp/tests/linear_programming/pdlp_test.cu
@@ -30,6 +30,7 @@
 #include <cuopt/linear_programming/pdlp/solver_settings.hpp>
 #include <cuopt/linear_programming/pdlp/solver_solution.hpp>
 #include <cuopt/linear_programming/solve.hpp>
+#include <cuopt/linear_programming/utilities/problem_conversion.cuh>
 #include <mip/problem/problem.cuh>
 #include <mps_parser/parser.hpp>
 
@@ -76,8 +77,7 @@ TEST(pdlp_class, run_double)
   auto solver_settings   = pdlp_solver_settings_t<int, double>{};
   solver_settings.method = cuopt::linear_programming::method_t::PDLP;
 
-  optimization_problem_solution_t<int, double> solution =
-    solve_lp(&handle_, op_problem, solver_settings);
+  optimization_problem_solution_t<int, double> solution = solve_lp(op_problem, solver_settings);
   EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL);
   EXPECT_FALSE(is_incorrect_objective(
     afiro_primal_objective, solution.get_additional_termination_information().primal_objective));
@@ -103,7 +103,7 @@ TEST(pdlp_class, run_double_very_low_accuracy)
   settings.tolerances.relative_gap_tolerance    = 0.0;
   settings.method                               = cuopt::linear_programming::method_t::PDLP;
 
-  optimization_problem_solution_t<int, double> solution = solve_lp(&handle_, op_problem, settings);
+  optimization_problem_solution_t<int, double> solution = solve_lp(op_problem, settings);
   EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL);
   EXPECT_FALSE(is_incorrect_objective(
     afiro_primal_objective, solution.get_additional_termination_information().primal_objective));
@@ -124,8 +124,7 @@ TEST(pdlp_class, run_double_initial_solution)
   auto solver_settings   = pdlp_solver_settings_t<int, double>{};
   solver_settings.method = cuopt::linear_programming::method_t::PDLP;
 
-  optimization_problem_solution_t<int, double> solution =
-    solve_lp(&handle_, op_problem, solver_settings);
+  optimization_problem_solution_t<int, double> solution = solve_lp(op_problem, solver_settings);
   EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL);
   EXPECT_FALSE(is_incorrect_objective(
     afiro_primal_objective, solution.get_additional_termination_information().primal_objective));
@@ -147,7 +146,7 @@ TEST(pdlp_class, run_iteration_limit)
   settings.set_optimality_tolerance(0);
   settings.method = cuopt::linear_programming::method_t::PDLP;
 
-  optimization_problem_solution_t<int, double> solution = solve_lp(&handle_, op_problem, settings);
+  optimization_problem_solution_t<int, double> solution = solve_lp(op_problem, settings);
   EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_ITERATION_LIMIT);
   // By default we would return all 0, we now return what we currently have so not all 0
   EXPECT_FALSE(thrust::all_of(handle_.get_thrust_policy(),
@@ -172,7 +171,7 @@ TEST(pdlp_class, run_time_limit)
   settings.set_optimality_tolerance(0);
   settings.method = cuopt::linear_programming::method_t::PDLP;
 
-  optimization_problem_solution_t<int, double> solution = solve_lp(&handle_, op_problem, settings);
+  optimization_problem_solution_t<int, double> solution = solve_lp(op_problem, settings);
 
   EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_TIME_LIMIT);
   // By default we would return all 0, we now return what we currently have so not all 0
@@ -224,8 +223,7 @@ TEST(pdlp_class, run_sub_mittleman)
         settings.presolve = presolve;
         settings.method   = cuopt::linear_programming::method_t::PDLP;
         const raft::handle_t handle_{};
-        optimization_problem_solution_t<int, double> solution =
-          solve_lp(&handle_, op_problem, settings);
+        optimization_problem_solution_t<int, double> solution = solve_lp(op_problem, settings);
         printf("running %s mode %d presolve? %d\n", name.c_str(), (int)solver_mode, presolve);
         EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL);
         EXPECT_FALSE(is_incorrect_objective(
@@ -257,9 +255,10 @@ TEST(pdlp_class, initial_solution_test)
   cuopt::mps_parser::mps_data_model_t<int, double> mps_data_model =
     cuopt::mps_parser::parse_mps<int, double>(path);
 
-  auto op_problem = cuopt::linear_programming::mps_data_model_to_optimization_problem<int, double>(
-    &handle_, mps_data_model);
-  cuopt::linear_programming::detail::problem_t<int, double> problem(op_problem);
+  auto op_problem =
+    cuopt::linear_programming::mps_data_model_to_optimization_problem<int, double>(mps_data_model);
+  auto gpu_problem = host_to_gpu_problem(&handle_, op_problem);
+  cuopt::linear_programming::detail::problem_t<int, double> problem(gpu_problem);
 
   auto solver_settings = pdlp_solver_settings_t<int, double>{};
   // We are just testing initial scaling on initial solution scheme so we don't care about solver
@@ -537,9 +536,10 @@ TEST(pdlp_class, initial_primal_weight_step_size_test)
   cuopt::mps_parser::mps_data_model_t<int, double> mps_data_model =
     cuopt::mps_parser::parse_mps<int, double>(path);
 
-  auto op_problem = cuopt::linear_programming::mps_data_model_to_optimization_problem<int, double>(
-    &handle_, mps_data_model);
-  cuopt::linear_programming::detail::problem_t<int, double> problem(op_problem);
+  auto op_problem =
+    cuopt::linear_programming::mps_data_model_to_optimization_problem<int, double>(mps_data_model);
+  auto gpu_problem = host_to_gpu_problem(&handle_, op_problem);
+  cuopt::linear_programming::detail::problem_t<int, double> problem(gpu_problem);
 
   auto solver_settings = pdlp_solver_settings_t<int, double>{};
   // We are just testing initial scaling on initial solution scheme so we don't care about solver
@@ -623,9 +623,10 @@ TEST(pdlp_class, initial_rhs_and_c)
   cuopt::mps_parser::mps_data_model_t<int, double> mps_data_model =
     cuopt::mps_parser::parse_mps<int, double>(path);
 
-  auto op_problem = cuopt::linear_programming::mps_data_model_to_optimization_problem<int, double>(
-    &handle_, mps_data_model);
-  cuopt::linear_programming::detail::problem_t<int, double> problem(op_problem);
+  auto op_problem =
+    cuopt::linear_programming::mps_data_model_to_optimization_problem<int, double>(mps_data_model);
+  auto gpu_problem = host_to_gpu_problem(&handle_, op_problem);
+  cuopt::linear_programming::detail::problem_t<int, double> problem(gpu_problem);
 
   cuopt::linear_programming::detail::pdlp_solver_t<int, double> solver(problem);
   constexpr double test_initial_primal_factor = 1.0;
@@ -649,7 +650,7 @@ TEST(pdlp_class, per_constraint_test)
    * will be 0.1009
    */
   raft::handle_t handle;
-  auto op_problem = optimization_problem_t<int, double>(&handle);
+  auto op_problem = optimization_problem_t<int, double>();
 
   std::vector<double> A_host           = {1.0, 1.0, 1.0};
   std::vector<int> indices_host        = {0, 1, 2};
@@ -670,7 +671,9 @@ TEST(pdlp_class, per_constraint_test)
   op_problem.set_constraint_upper_bounds(b_host.data(), b_host.size());
   op_problem.set_objective_coefficients(b_host.data(), b_host.size());
 
-  auto problem = cuopt::linear_programming::detail::problem_t<int, double>(op_problem);
+  auto gpu_problem = host_to_gpu_problem(&handle, op_problem);
+
+  auto problem = cuopt::linear_programming::detail::problem_t<int, double>(gpu_problem);
 
   pdlp_solver_settings_t<int, double> solver_settings;
   solver_settings.tolerances.relative_primal_tolerance = 0;  // Shouldn't matter
@@ -742,12 +745,10 @@ TEST(pdlp_class, best_primal_so_far_iteration)
   cuopt::mps_parser::mps_data_model_t<int, double> op_problem2 =
     cuopt::mps_parser::parse_mps<int, double>(path);
 
-  optimization_problem_solution_t<int, double> solution1 =
-    solve_lp(&handle1, op_problem1, solver_settings);
+  optimization_problem_solution_t<int, double> solution1 = solve_lp(op_problem1, solver_settings);
   RAFT_CUDA_TRY(cudaDeviceSynchronize());
-  solver_settings.save_best_primal_so_far = true;
-  optimization_problem_solution_t<int, double> solution2 =
-    solve_lp(&handle2, op_problem2, solver_settings);
+  solver_settings.save_best_primal_so_far                = true;
+  optimization_problem_solution_t<int, double> solution2 = solve_lp(op_problem2, solver_settings);
   RAFT_CUDA_TRY(cudaDeviceSynchronize());
 
   EXPECT_TRUE(solution2.get_additional_termination_information().l2_primal_residual <
@@ -772,12 +773,10 @@ TEST(pdlp_class, best_primal_so_far_time)
   cuopt::mps_parser::mps_data_model_t<int, double> op_problem2 =
     cuopt::mps_parser::parse_mps<int, double>(path);
 
-  optimization_problem_solution_t<int, double> solution1 =
-    solve_lp(&handle1, op_problem1, solver_settings);
+  optimization_problem_solution_t<int, double> solution1 = solve_lp(op_problem1, solver_settings);
   RAFT_CUDA_TRY(cudaDeviceSynchronize());
-  solver_settings.save_best_primal_so_far = true;
-  optimization_problem_solution_t<int, double> solution2 =
-    solve_lp(&handle2, op_problem2, solver_settings);
+  solver_settings.save_best_primal_so_far                = true;
+  optimization_problem_solution_t<int, double> solution2 = solve_lp(op_problem2, solver_settings);
   RAFT_CUDA_TRY(cudaDeviceSynchronize());
 
   EXPECT_TRUE(solution2.get_additional_termination_information().l2_primal_residual <
@@ -802,12 +801,10 @@ TEST(pdlp_class, first_primal_feasible)
   cuopt::mps_parser::mps_data_model_t<int, double> op_problem2 =
     cuopt::mps_parser::parse_mps<int, double>(path);
 
-  optimization_problem_solution_t<int, double> solution1 =
-    solve_lp(&handle1, op_problem1, solver_settings);
+  optimization_problem_solution_t<int, double> solution1 = solve_lp(op_problem1, solver_settings);
   RAFT_CUDA_TRY(cudaDeviceSynchronize());
-  solver_settings.first_primal_feasible = true;
-  optimization_problem_solution_t<int, double> solution2 =
-    solve_lp(&handle2, op_problem2, solver_settings);
+  solver_settings.first_primal_feasible                  = true;
+  optimization_problem_solution_t<int, double> solution2 = solve_lp(op_problem2, solver_settings);
   RAFT_CUDA_TRY(cudaDeviceSynchronize());
 
   EXPECT_EQ(solution1.get_termination_status(), pdlp_termination_status_t::IterationLimit);
@@ -841,7 +838,7 @@ TEST(pdlp_class, warm_start)
       cuopt::mps_parser::parse_mps<int, double>(path);
     auto op_problem1 =
       cuopt::linear_programming::mps_data_model_to_optimization_problem<int, double>(
-        &handle, mps_data_model);
+        mps_data_model);
 
     // Solving from scratch until 1e-2
     optimization_problem_solution_t<int, double> solution1 = solve_lp(op_problem1, solver_settings);
@@ -850,14 +847,14 @@ TEST(pdlp_class, warm_start)
     solver_settings.set_optimality_tolerance(1e-1);
     auto op_problem2 =
       cuopt::linear_programming::mps_data_model_to_optimization_problem<int, double>(
-        &handle, mps_data_model);
+        mps_data_model);
     optimization_problem_solution_t<int, double> solution2 = solve_lp(op_problem2, solver_settings);
 
     // Solving until 1e-2 using the previous state as a warm start
     solver_settings.set_optimality_tolerance(1e-2);
     auto op_problem3 =
       cuopt::linear_programming::mps_data_model_to_optimization_problem<int, double>(
-        &handle, mps_data_model);
+        mps_data_model);
     solver_settings.set_pdlp_warm_start_data(solution2.get_pdlp_warm_start_data());
     optimization_problem_solution_t<int, double> solution3 = solve_lp(op_problem3, solver_settings);
 
@@ -879,7 +876,7 @@ TEST(dual_simplex, afiro)
   cuopt::mps_parser::mps_data_model_t<int, double> op_problem =
     cuopt::mps_parser::parse_mps<int, double>(path, true);
 
-  optimization_problem_solution_t<int, double> solution = solve_lp(&handle_, op_problem, settings);
+  optimization_problem_solution_t<int, double> solution = solve_lp(op_problem, settings);
   EXPECT_EQ(solution.get_termination_status(), pdlp_termination_status_t::Optimal);
   EXPECT_FALSE(is_incorrect_objective(
     afiro_primal_objective, solution.get_additional_termination_information().primal_objective));
@@ -897,8 +894,7 @@ TEST(pdlp_class, run_empty_matrix_pdlp)
   auto solver_settings   = pdlp_solver_settings_t<int, double>{};
   solver_settings.method = cuopt::linear_programming::method_t::PDLP;
 
-  optimization_problem_solution_t<int, double> solution =
-    solve_lp(&handle_, op_problem, solver_settings);
+  optimization_problem_solution_t<int, double> solution = solve_lp(op_problem, solver_settings);
   EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_NUMERICAL_ERROR);
 }
 
@@ -914,8 +910,7 @@ TEST(pdlp_class, run_empty_matrix_dual_simplex)
   auto solver_settings   = pdlp_solver_settings_t<int, double>{};
   solver_settings.method = cuopt::linear_programming::method_t::Concurrent;
 
-  optimization_problem_solution_t<int, double> solution =
-    solve_lp(&handle_, op_problem, solver_settings);
+  optimization_problem_solution_t<int, double> solution = solve_lp(op_problem, solver_settings);
   EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL);
   EXPECT_FALSE(solution.get_additional_termination_information().solved_by_pdlp);
 }
@@ -932,8 +927,7 @@ TEST(pdlp_class, test_max)
   solver_settings.method           = cuopt::linear_programming::method_t::PDLP;
   solver_settings.pdlp_solver_mode = cuopt::linear_programming::pdlp_solver_mode_t::Stable2;
 
-  optimization_problem_solution_t<int, double> solution =
-    solve_lp(&handle_, op_problem, solver_settings);
+  optimization_problem_solution_t<int, double> solution = solve_lp(op_problem, solver_settings);
   EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL);
   EXPECT_NEAR(
     solution.get_additional_termination_information().primal_objective, 17.0, factor_tolerance);
@@ -950,8 +944,7 @@ TEST(pdlp_class, test_max_with_offset)
   auto solver_settings   = pdlp_solver_settings_t<int, double>{};
   solver_settings.method = cuopt::linear_programming::method_t::PDLP;
 
-  optimization_problem_solution_t<int, double> solution =
-    solve_lp(&handle_, op_problem, solver_settings);
+  optimization_problem_solution_t<int, double> solution = solve_lp(op_problem, solver_settings);
   EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL);
   EXPECT_NEAR(
     solution.get_additional_termination_information().primal_objective, 0.0, factor_tolerance);
@@ -967,8 +960,7 @@ TEST(pdlp_class, test_lp_no_constraints)
 
   auto solver_settings = pdlp_solver_settings_t<int, double>{};
 
-  optimization_problem_solution_t<int, double> solution =
-    solve_lp(&handle_, op_problem, solver_settings);
+  optimization_problem_solution_t<int, double> solution = solve_lp(op_problem, solver_settings);
   EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL);
   EXPECT_NEAR(
     solution.get_additional_termination_information().primal_objective, 1.0, factor_tolerance);
diff --git a/cpp/tests/linear_programming/unit_tests/optimization_problem_test.cu b/cpp/tests/linear_programming/unit_tests/optimization_problem_test.cu
index 13ade79c1..414bee239 100644
--- a/cpp/tests/linear_programming/unit_tests/optimization_problem_test.cu
+++ b/cpp/tests/linear_programming/unit_tests/optimization_problem_test.cu
@@ -17,6 +17,7 @@
 
 #include <utilities/common_utils.hpp>
 
+#include <cuopt/linear_programming/utilities/problem_conversion.cuh>
 #include <linear_programming/utilities/problem_checking.cuh>
 #include <mip/problem/problem.cuh>
 #include <mps_parser/parser.hpp>
@@ -34,6 +35,15 @@
 
 namespace cuopt::linear_programming {
 
+// Helper function to check problem representation (converts host problem to GPU problem first)
+template <typename i_t, typename f_t>
+void check_problem_representation_helper(const optimization_problem_t<i_t, f_t>& host_problem)
+{
+  raft::handle_t handle;
+  auto gpu_problem = host_to_gpu_problem(&handle, host_problem);
+  problem_checking_t<i_t, f_t>::check_problem_representation(gpu_problem);
+}
+
 cuopt::mps_parser::mps_data_model_t<int, double> read_from_mps(const std::string& file,
                                                                bool fixed_mps_format = true)
 {
@@ -116,8 +126,8 @@ TEST(optimization_problem_t, good_mps_file_comments)
 
 TEST(optimization_problem_t, test_set_get_fields)
 {
-  raft::handle_t handle;
-  auto problem = optimization_problem_t<int, double>(&handle);
+  // optimization_problem_t now uses host memory (std::vector) - no need for handle or cudaMemcpy
+  auto problem = optimization_problem_t<int, double>();
 
   double A_host[]      = {1.0, 2.0, 3.0};
   int indices_host[]   = {0, 1, 2};
@@ -127,97 +137,68 @@ TEST(optimization_problem_t, test_set_get_fields)
   double var_ub_host[] = {1.0, 1.1, 1.2};
   double con_lb_host[] = {0.5, 0.6, 0.7};
   double con_ub_host[] = {1.5, 1.6, 1.7};
-  std::vector<double> result(3);
-  std::vector<int> result_int(3);
 
   problem.set_csr_constraint_matrix(A_host, 3, indices_host, 3, indices_host, 3);
 
-  // Test set_A_values
-  cudaMemcpy(result.data(),
-             problem.get_constraint_matrix_values().data(),
-             3 * sizeof(double),
-             cudaMemcpyDeviceToHost);
-  EXPECT_NEAR(1.0, result[0], 1e-5);
-  EXPECT_NEAR(2.0, result[1], 1e-5);
-  EXPECT_NEAR(3.0, result[2], 1e-5);
-
-  // Test A_indices
-  cudaMemcpy(result_int.data(),
-             problem.get_constraint_matrix_indices().data(),
-             3 * sizeof(int),
-             cudaMemcpyDeviceToHost);
-  EXPECT_EQ(0, result_int[0]);
-  EXPECT_EQ(1, result_int[1]);
-  EXPECT_EQ(2, result_int[2]);
-
-  // Test A_offsets_
-  cudaMemcpy(result_int.data(),
-             problem.get_constraint_matrix_offsets().data(),
-             3 * sizeof(int),
-             cudaMemcpyDeviceToHost);
-  EXPECT_EQ(0, result_int[0]);
-  EXPECT_EQ(1, result_int[1]);
-  EXPECT_EQ(2, result_int[2]);
-
-  // Test b_
+  // Test set_A_values - data is already on host
+  const auto& A_values = problem.get_constraint_matrix_values();
+  EXPECT_NEAR(1.0, A_values[0], 1e-5);
+  EXPECT_NEAR(2.0, A_values[1], 1e-5);
+  EXPECT_NEAR(3.0, A_values[2], 1e-5);
+
+  // Test A_indices - data is already on host
+  const auto& A_indices = problem.get_constraint_matrix_indices();
+  EXPECT_EQ(0, A_indices[0]);
+  EXPECT_EQ(1, A_indices[1]);
+  EXPECT_EQ(2, A_indices[2]);
+
+  // Test A_offsets_ - data is already on host
+  const auto& A_offsets = problem.get_constraint_matrix_offsets();
+  EXPECT_EQ(0, A_offsets[0]);
+  EXPECT_EQ(1, A_offsets[1]);
+  EXPECT_EQ(2, A_offsets[2]);
+
+  // Test b_ - data is already on host
   problem.set_constraint_bounds(b_host, 3);
-  cudaMemcpy(result.data(),
-             problem.get_constraint_bounds().data(),
-             3 * sizeof(double),
-             cudaMemcpyDeviceToHost);
-  EXPECT_NEAR(4.0, result[0], 1e-5);
-  EXPECT_NEAR(5.0, result[1], 1e-5);
-  EXPECT_NEAR(6.0, result[2], 1e-5);
-
-  // Test c_
+  const auto& b = problem.get_constraint_bounds();
+  EXPECT_NEAR(4.0, b[0], 1e-5);
+  EXPECT_NEAR(5.0, b[1], 1e-5);
+  EXPECT_NEAR(6.0, b[2], 1e-5);
+
+  // Test c_ - data is already on host
   problem.set_objective_coefficients(c_host, 3);
-  cudaMemcpy(result.data(),
-             problem.get_objective_coefficients().data(),
-             3 * sizeof(double),
-             cudaMemcpyDeviceToHost);
-  EXPECT_NEAR(7.0, result[0], 1e-5);
-  EXPECT_NEAR(8.0, result[1], 1e-5);
-  EXPECT_NEAR(9.0, result[2], 1e-5);
-
-  // Test variable_lower_bounds_
+  const auto& c = problem.get_objective_coefficients();
+  EXPECT_NEAR(7.0, c[0], 1e-5);
+  EXPECT_NEAR(8.0, c[1], 1e-5);
+  EXPECT_NEAR(9.0, c[2], 1e-5);
+
+  // Test variable_lower_bounds_ - data is already on host
   problem.set_variable_lower_bounds(var_lb_host, 3);
-  cudaMemcpy(result.data(),
-             problem.get_variable_lower_bounds().data(),
-             3 * sizeof(double),
-             cudaMemcpyDeviceToHost);
-  EXPECT_NEAR(0.0, result[0], 1e-5);
-  EXPECT_NEAR(0.1, result[1], 1e-5);
-  EXPECT_NEAR(0.2, result[2], 1e-5);
-
-  // Test variable_upper_bounds_
+  const auto& var_lb = problem.get_variable_lower_bounds();
+  EXPECT_NEAR(0.0, var_lb[0], 1e-5);
+  EXPECT_NEAR(0.1, var_lb[1], 1e-5);
+  EXPECT_NEAR(0.2, var_lb[2], 1e-5);
+
+  // Test variable_upper_bounds_ - data is already on host
   problem.set_variable_upper_bounds(var_ub_host, 3);
-  cudaMemcpy(result.data(),
-             problem.get_variable_upper_bounds().data(),
-             3 * sizeof(double),
-             cudaMemcpyDeviceToHost);
-  EXPECT_NEAR(1.0, result[0], 1e-5);
-  EXPECT_NEAR(1.1, result[1], 1e-5);
-  EXPECT_NEAR(1.2, result[2], 1e-5);
-
-  // Test constraint_lower_bounds_
+  const auto& var_ub = problem.get_variable_upper_bounds();
+  EXPECT_NEAR(1.0, var_ub[0], 1e-5);
+  EXPECT_NEAR(1.1, var_ub[1], 1e-5);
+  EXPECT_NEAR(1.2, var_ub[2], 1e-5);
+
+  // Test constraint_lower_bounds_ - data is already on host
   problem.set_constraint_lower_bounds(con_lb_host, 3);
-  cudaMemcpy(result.data(),
-             problem.get_constraint_lower_bounds().data(),
-             3 * sizeof(double),
-             cudaMemcpyDeviceToHost);
-  EXPECT_NEAR(0.5, result[0], 1e-5);
-  EXPECT_NEAR(0.6, result[1], 1e-5);
-  EXPECT_NEAR(0.7, result[2], 1e-5);
-
-  // Test constraint_upper_bounds_
+  const auto& con_lb = problem.get_constraint_lower_bounds();
+  EXPECT_NEAR(0.5, con_lb[0], 1e-5);
+  EXPECT_NEAR(0.6, con_lb[1], 1e-5);
+  EXPECT_NEAR(0.7, con_lb[2], 1e-5);
+
+  // Test constraint_upper_bounds_ - data is already on host
   problem.set_constraint_upper_bounds(con_ub_host, 3);
-  cudaMemcpy(result.data(),
-             problem.get_constraint_upper_bounds().data(),
-             3 * sizeof(double),
-             cudaMemcpyDeviceToHost);
-  EXPECT_NEAR(1.5, result[0], 1e-5);
-  EXPECT_NEAR(1.6, result[1], 1e-5);
-  EXPECT_NEAR(1.7, result[2], 1e-5);
+  const auto& con_ub = problem.get_constraint_upper_bounds();
+  EXPECT_NEAR(1.5, con_ub[0], 1e-5);
+  EXPECT_NEAR(1.6, con_ub[1], 1e-5);
+  EXPECT_NEAR(1.7, con_ub[2], 1e-5);
 
   // Test objective_scaling_factor_
   double obj_scale = 1.5;
@@ -256,12 +237,10 @@ TEST(optimization_problem_t, test_set_get_fields)
 
 TEST(optimization_problem_t, test_check_problem_validity)
 {
-  raft::handle_t handle;
-  auto op_problem_ = optimization_problem_t<int, double>(&handle);
+  auto op_problem_ = optimization_problem_t<int, double>();
 
   // Test if exception is thrown when A_CSR_matrix are not set
-  EXPECT_THROW((problem_checking_t<int, double>::check_problem_representation(op_problem_)),
-               cuopt::logic_error);
+  EXPECT_THROW(check_problem_representation_helper(op_problem_), cuopt::logic_error);
 
   // Set A_CSR_matrix
   double A_host[]    = {1.0};
@@ -270,8 +249,7 @@ TEST(optimization_problem_t, test_check_problem_validity)
   op_problem_.set_csr_constraint_matrix(A_host, 1, indices_host, 1, offset_host, 2);
 
   // Test if exception is thrown when c is not set
-  EXPECT_THROW((problem_checking_t<int, double>::check_problem_representation(op_problem_)),
-               cuopt::logic_error);
+  EXPECT_THROW(check_problem_representation_helper(op_problem_), cuopt::logic_error);
 
   // Test that n_vars is not set
   EXPECT_EQ(op_problem_.get_n_variables(), 0);
@@ -281,8 +259,7 @@ TEST(optimization_problem_t, test_check_problem_validity)
   op_problem_.set_objective_coefficients(c_host, 1);
 
   // Test if exception is thrown when constraints are not set
-  EXPECT_THROW((problem_checking_t<int, double>::check_problem_representation(op_problem_)),
-               cuopt::logic_error);
+  EXPECT_THROW(check_problem_representation_helper(op_problem_), cuopt::logic_error);
 
   // Test that n_vars is now set
   EXPECT_EQ(op_problem_.get_n_variables(), 1);
@@ -295,8 +272,7 @@ TEST(optimization_problem_t, test_check_problem_validity)
   op_problem_.set_row_types(row_type_host, 1);
 
   // Test if exception is thrown when row_type is set but not b
-  EXPECT_THROW((problem_checking_t<int, double>::check_problem_representation(op_problem_)),
-               cuopt::logic_error);
+  EXPECT_THROW(check_problem_representation_helper(op_problem_), cuopt::logic_error);
 
   // Test that n_constraints is now set
   EXPECT_EQ(op_problem_.get_n_constraints(), 1);
@@ -306,7 +282,7 @@ TEST(optimization_problem_t, test_check_problem_validity)
   op_problem_.set_constraint_bounds(b_host, 1);
 
   // Test that nothing is thrown when both b and row types are set
-  EXPECT_NO_THROW((problem_checking_t<int, double>::check_problem_representation(op_problem_)));
+  EXPECT_NO_THROW(check_problem_representation_helper(op_problem_));
 
   // Unsetting row types and constraints bounds
   op_problem_.set_row_types(row_type_host, 0);
@@ -316,8 +292,7 @@ TEST(optimization_problem_t, test_check_problem_validity)
   EXPECT_EQ(op_problem_.get_n_constraints(), 0);
 
   // Test again if exception is thrown when constraints bounds are not set
-  EXPECT_THROW((problem_checking_t<int, double>::check_problem_representation(op_problem_)),
-               cuopt::logic_error);
+  EXPECT_THROW(check_problem_representation_helper(op_problem_), cuopt::logic_error);
 
   // Seting constraint lower bounds
   double constraint_lower_bounds_host[] = {1.0};
@@ -327,21 +302,19 @@ TEST(optimization_problem_t, test_check_problem_validity)
   EXPECT_EQ(op_problem_.get_n_constraints(), 1);
 
   // Test if exception is thrown when upper constraints bounds are not set
-  EXPECT_THROW((problem_checking_t<int, double>::check_problem_representation(op_problem_)),
-               cuopt::logic_error);
+  EXPECT_THROW(check_problem_representation_helper(op_problem_), cuopt::logic_error);
 
   // Seting constraint upper bounds
   double constraint_upper_bounds_host[] = {1.0};
   op_problem_.set_constraint_upper_bounds(constraint_upper_bounds_host, 1);
 
   // Test if no exception is thrown when constraints bounds are set
-  EXPECT_NO_THROW((problem_checking_t<int, double>::check_problem_representation(op_problem_)));
+  EXPECT_NO_THROW(check_problem_representation_helper(op_problem_));
 }
 
 TEST(optimization_problem_t, test_csr_validity)
 {
-  raft::handle_t handle;
-  auto op_problem_   = optimization_problem_t<int, double>(&handle);
+  auto op_problem_   = optimization_problem_t<int, double>();
   double A_host[]    = {1.0, 1.0};
   int indices_host[] = {0, 0};
   int offset_host[]  = {0, 1, 2};
@@ -351,46 +324,41 @@ TEST(optimization_problem_t, test_csr_validity)
   char row_type_host[] = {'E', 'E'};
   op_problem_.set_row_types(row_type_host, 2);
   // Valid problem
-  EXPECT_NO_THROW((problem_checking_t<int, double>::check_problem_representation(op_problem_)));
+  EXPECT_NO_THROW((check_problem_representation_helper(op_problem_)));
 
   // Test case 0: A_indices and A_values have different size
   {
     int incorrect_indices_size[] = {0};
     op_problem_.set_csr_constraint_matrix(A_host, 2, incorrect_indices_size, 1, offset_host, 3);
-    EXPECT_THROW((problem_checking_t<int, double>::check_problem_representation(op_problem_)),
-                 cuopt::logic_error);
+    EXPECT_THROW((check_problem_representation_helper(op_problem_)), cuopt::logic_error);
   }
 
   // Test case 1: A_offsets first value not 0
   {
     int incorrect_first_offset[] = {1, 1, 2};
     op_problem_.set_csr_constraint_matrix(A_host, 2, indices_host, 2, incorrect_first_offset, 3);
-    EXPECT_THROW((problem_checking_t<int, double>::check_problem_representation(op_problem_)),
-                 cuopt::logic_error);
+    EXPECT_THROW((check_problem_representation_helper(op_problem_)), cuopt::logic_error);
   }
 
   // Test case 2: A_offsets not in increasing order
   {
     int unsorted_offsets[] = {0, 2, 1};
     op_problem_.set_csr_constraint_matrix(A_host, 2, indices_host, 2, unsorted_offsets, 3);
-    EXPECT_THROW((problem_checking_t<int, double>::check_problem_representation(op_problem_)),
-                 cuopt::logic_error);
+    EXPECT_THROW((check_problem_representation_helper(op_problem_)), cuopt::logic_error);
   }
 
   // Test case 3: A_indices value is negative
   {
     int negative_indices_host[] = {0, -1};
     op_problem_.set_csr_constraint_matrix(A_host, 2, negative_indices_host, 2, offset_host, 3);
-    EXPECT_THROW((problem_checking_t<int, double>::check_problem_representation(op_problem_)),
-                 cuopt::logic_error);
+    EXPECT_THROW((check_problem_representation_helper(op_problem_)), cuopt::logic_error);
   }
 
   // Test case 4: A_indices value is greater than number of vars
   {
     int too_big_indices_host[] = {0, 1};
     op_problem_.set_csr_constraint_matrix(A_host, 2, too_big_indices_host, 2, offset_host, 3);
-    EXPECT_THROW((problem_checking_t<int, double>::check_problem_representation(op_problem_)),
-                 cuopt::logic_error);
+    EXPECT_THROW((check_problem_representation_helper(op_problem_)), cuopt::logic_error);
   }
 }
 
@@ -399,7 +367,7 @@ TEST(optimization_problem_t, test_row_type_invalidity_char)
   raft::handle_t handle;
 
   // Constraints set through row types
-  auto op_problem_1  = optimization_problem_t<int, double>(&handle);
+  auto op_problem_1  = optimization_problem_t<int, double>();
   double A_host[]    = {1.0, 1.0, 1.0};
   int indices_host[] = {0, 0, 0};
   int offset_host[]  = {0, 1, 2, 3};
@@ -409,8 +377,7 @@ TEST(optimization_problem_t, test_row_type_invalidity_char)
   char row_type_host[] = {'E', 'L', 'N'};
   op_problem_1.set_row_types(row_type_host, 3);
 
-  EXPECT_THROW((problem_checking_t<int, double>::check_problem_representation(op_problem_1)),
-               cuopt::logic_error);
+  EXPECT_THROW((check_problem_representation_helper(op_problem_1)), cuopt::logic_error);
 }
 
 TEST(optimization_problem_t, test_row_type_invalidity_size)
@@ -418,7 +385,7 @@ TEST(optimization_problem_t, test_row_type_invalidity_size)
   raft::handle_t handle;
 
   // Constraints set through row types
-  auto op_problem_1  = optimization_problem_t<int, double>(&handle);
+  auto op_problem_1  = optimization_problem_t<int, double>();
   double A_host[]    = {1.0, 1.0, 1.0};
   int indices_host[] = {0, 0, 0};
   int offset_host[]  = {0, 1, 2, 3};
@@ -428,18 +395,17 @@ TEST(optimization_problem_t, test_row_type_invalidity_size)
   char row_type_host[] = {'E', 'L', 'L'};
   op_problem_1.set_row_types(row_type_host, 2);
 
-  EXPECT_THROW((problem_checking_t<int, double>::check_problem_representation(op_problem_1)),
-               cuopt::logic_error);
+  EXPECT_THROW((check_problem_representation_helper(op_problem_1)), cuopt::logic_error);
 
   op_problem_1.set_row_types(row_type_host, 3);
-  EXPECT_NO_THROW((problem_checking_t<int, double>::check_problem_representation(op_problem_1)));
+  EXPECT_NO_THROW((check_problem_representation_helper(op_problem_1)));
 }
 
 TEST(optimization_problem_t, test_variable_invalidity_size)
 {
   raft::handle_t handle;
 
-  auto op_problem_1  = optimization_problem_t<int, double>(&handle);
+  auto op_problem_1  = optimization_problem_t<int, double>();
   double A_host[]    = {1.0, 1.0, 1.0};
   int indices_host[] = {0, 0, 0};
   int offset_host[]  = {0, 1, 2, 3};
@@ -450,25 +416,23 @@ TEST(optimization_problem_t, test_variable_invalidity_size)
   op_problem_1.set_objective_coefficients(A_host, 1);
 
   op_problem_1.set_variable_lower_bounds(A_host, 2);
-  EXPECT_THROW((problem_checking_t<int, double>::check_problem_representation(op_problem_1)),
-               cuopt::logic_error);
+  EXPECT_THROW((check_problem_representation_helper(op_problem_1)), cuopt::logic_error);
 
   op_problem_1.set_variable_lower_bounds(A_host, 1);
-  EXPECT_NO_THROW((problem_checking_t<int, double>::check_problem_representation(op_problem_1)));
+  EXPECT_NO_THROW((check_problem_representation_helper(op_problem_1)));
 
   op_problem_1.set_variable_upper_bounds(A_host, 2);
-  EXPECT_THROW((problem_checking_t<int, double>::check_problem_representation(op_problem_1)),
-               cuopt::logic_error);
+  EXPECT_THROW((check_problem_representation_helper(op_problem_1)), cuopt::logic_error);
 
   op_problem_1.set_variable_upper_bounds(A_host, 1);
-  EXPECT_NO_THROW((problem_checking_t<int, double>::check_problem_representation(op_problem_1)));
+  EXPECT_NO_THROW((check_problem_representation_helper(op_problem_1)));
 }
 
 TEST(optimization_problem_t, test_constraints_invalidity_size)
 {
   raft::handle_t handle;
 
-  auto op_problem_1  = optimization_problem_t<int, double>(&handle);
+  auto op_problem_1  = optimization_problem_t<int, double>();
   double A_host[]    = {1.0, 1.0, 1.0};
   int indices_host[] = {0, 0, 0};
   int offset_host[]  = {0, 1, 2, 3};
@@ -478,15 +442,13 @@ TEST(optimization_problem_t, test_constraints_invalidity_size)
   op_problem_1.set_constraint_upper_bounds(A_host, 2);
   op_problem_1.set_objective_coefficients(A_host, 1);
 
-  EXPECT_THROW((problem_checking_t<int, double>::check_problem_representation(op_problem_1)),
-               cuopt::logic_error);
+  EXPECT_THROW((check_problem_representation_helper(op_problem_1)), cuopt::logic_error);
 
   op_problem_1.set_constraint_lower_bounds(A_host, 3);
-  EXPECT_THROW((problem_checking_t<int, double>::check_problem_representation(op_problem_1)),
-               cuopt::logic_error);
+  EXPECT_THROW((check_problem_representation_helper(op_problem_1)), cuopt::logic_error);
 
   op_problem_1.set_constraint_upper_bounds(A_host, 3);
-  EXPECT_NO_THROW((problem_checking_t<int, double>::check_problem_representation(op_problem_1)));
+  EXPECT_NO_THROW((check_problem_representation_helper(op_problem_1)));
 }
 
 TEST(optimization_problem_t, good_mps_mip_file_1)
diff --git a/cpp/tests/linear_programming/unit_tests/solver_settings_test.cu b/cpp/tests/linear_programming/unit_tests/solver_settings_test.cu
index ece05832f..508f0bcb1 100644
--- a/cpp/tests/linear_programming/unit_tests/solver_settings_test.cu
+++ b/cpp/tests/linear_programming/unit_tests/solver_settings_test.cu
@@ -96,22 +96,16 @@ TEST(SolverSettingsTest, warm_start_smaller_vector)
   std::vector<double> primal_expected = {1.0, 0.0};
   std::vector<double> dual_expected   = {0.0, 2.0, 1.0};
 
-  rmm::device_uvector<double> current_primal_solution =
-    cuopt::device_copy(primal, handle_.get_stream());
-  rmm::device_uvector<double> initial_primal_average =
-    cuopt::device_copy(primal, handle_.get_stream());
-  rmm::device_uvector<double> current_ATY = cuopt::device_copy(primal, handle_.get_stream());
-  rmm::device_uvector<double> sum_primal_solutions =
-    cuopt::device_copy(primal, handle_.get_stream());
-  rmm::device_uvector<double> last_restart_duality_gap_primal_solution =
-    cuopt::device_copy(primal, handle_.get_stream());
-
-  rmm::device_uvector<double> current_dual_solution =
-    cuopt::device_copy(dual, handle_.get_stream());
-  rmm::device_uvector<double> initial_dual_average = cuopt::device_copy(dual, handle_.get_stream());
-  rmm::device_uvector<double> sum_dual_solutions   = cuopt::device_copy(dual, handle_.get_stream());
-  rmm::device_uvector<double> last_restart_duality_gap_dual_solution =
-    cuopt::device_copy(dual, handle_.get_stream());
+  std::vector<double> current_primal_solution(primal);
+  std::vector<double> initial_primal_average(primal);
+  std::vector<double> current_ATY(primal);
+  std::vector<double> sum_primal_solutions(primal);
+  std::vector<double> last_restart_duality_gap_primal_solution(primal);
+
+  std::vector<double> current_dual_solution(dual);
+  std::vector<double> initial_dual_average(dual);
+  std::vector<double> sum_dual_solutions(dual);
+  std::vector<double> last_restart_duality_gap_dual_solution(dual);
 
   rmm::device_uvector<int> d_primal_mapping =
     cuopt::device_copy(primal_mapping, handle_.get_stream());
@@ -137,16 +131,16 @@ TEST(SolverSettingsTest, warm_start_smaller_vector)
                                         -1);
   solver_settings.set_pdlp_warm_start_data(warm_start_data, d_primal_mapping, d_dual_mapping);
 
-  std::vector<double> h_current_primal_solution =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_primal_solution_);
-  std::vector<double> h_initial_primal_average =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_primal_average_);
-  std::vector<double> h_current_ATY =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_ATY_);
-  std::vector<double> h_sum_primal_solutions =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_primal_solutions_);
-  std::vector<double> h_last_restart_duality_gap_primal_solution = cuopt::host_copy(
-    solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_primal_solution_);
+  const std::vector<double>& h_current_primal_solution =
+    solver_settings.get_pdlp_warm_start_data().current_primal_solution_;
+  const std::vector<double>& h_initial_primal_average =
+    solver_settings.get_pdlp_warm_start_data().initial_primal_average_;
+  const std::vector<double>& h_current_ATY =
+    solver_settings.get_pdlp_warm_start_data().current_ATY_;
+  const std::vector<double>& h_sum_primal_solutions =
+    solver_settings.get_pdlp_warm_start_data().sum_primal_solutions_;
+  const std::vector<double>& h_last_restart_duality_gap_primal_solution =
+    solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_primal_solution_;
 
   EXPECT_EQ(h_current_primal_solution.size(), primal_expected.size());
   EXPECT_EQ(h_initial_primal_average.size(), primal_expected.size());
@@ -160,14 +154,14 @@ TEST(SolverSettingsTest, warm_start_smaller_vector)
   EXPECT_EQ(h_sum_primal_solutions, primal_expected);
   EXPECT_EQ(h_last_restart_duality_gap_primal_solution, primal_expected);
 
-  std::vector<double> h_current_dual_solution =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_dual_solution_);
-  std::vector<double> h_initial_dual_average =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_dual_average_);
-  std::vector<double> h_sum_dual_solutions =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_dual_solutions_);
-  std::vector<double> h_last_restart_duality_gap_dual_solution = cuopt::host_copy(
-    solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution_);
+  const std::vector<double>& h_current_dual_solution =
+    solver_settings.get_pdlp_warm_start_data().current_dual_solution_;
+  const std::vector<double>& h_initial_dual_average =
+    solver_settings.get_pdlp_warm_start_data().initial_dual_average_;
+  const std::vector<double>& h_sum_dual_solutions =
+    solver_settings.get_pdlp_warm_start_data().sum_dual_solutions_;
+  const std::vector<double>& h_last_restart_duality_gap_dual_solution =
+    solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution_;
 
   EXPECT_EQ(h_current_dual_solution.size(), dual_expected.size());
   EXPECT_EQ(h_initial_dual_average.size(), dual_expected.size());
@@ -196,22 +190,16 @@ TEST(SolverSettingsTest, warm_start_bigger_vector)
   std::vector<double> primal_expected = {0.0, 1.0, 2.0, 3.0, 0.0, 0.0};
   std::vector<double> dual_expected   = {0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0};
 
-  rmm::device_uvector<double> current_primal_solution =
-    cuopt::device_copy(primal, handle_.get_stream());
-  rmm::device_uvector<double> initial_primal_average =
-    cuopt::device_copy(primal, handle_.get_stream());
-  rmm::device_uvector<double> current_ATY = cuopt::device_copy(primal, handle_.get_stream());
-  rmm::device_uvector<double> sum_primal_solutions =
-    cuopt::device_copy(primal, handle_.get_stream());
-  rmm::device_uvector<double> last_restart_duality_gap_primal_solution =
-    cuopt::device_copy(primal, handle_.get_stream());
-
-  rmm::device_uvector<double> current_dual_solution =
-    cuopt::device_copy(dual, handle_.get_stream());
-  rmm::device_uvector<double> initial_dual_average = cuopt::device_copy(dual, handle_.get_stream());
-  rmm::device_uvector<double> sum_dual_solutions   = cuopt::device_copy(dual, handle_.get_stream());
-  rmm::device_uvector<double> last_restart_duality_gap_dual_solution =
-    cuopt::device_copy(dual, handle_.get_stream());
+  std::vector<double> current_primal_solution(primal);
+  std::vector<double> initial_primal_average(primal);
+  std::vector<double> current_ATY(primal);
+  std::vector<double> sum_primal_solutions(primal);
+  std::vector<double> last_restart_duality_gap_primal_solution(primal);
+
+  std::vector<double> current_dual_solution(dual);
+  std::vector<double> initial_dual_average(dual);
+  std::vector<double> sum_dual_solutions(dual);
+  std::vector<double> last_restart_duality_gap_dual_solution(dual);
 
   rmm::device_uvector<int> d_primal_mapping =
     cuopt::device_copy(primal_mapping, handle_.get_stream());
@@ -237,16 +225,16 @@ TEST(SolverSettingsTest, warm_start_bigger_vector)
                                         -1);
   solver_settings.set_pdlp_warm_start_data(warm_start_data, d_primal_mapping, d_dual_mapping);
 
-  std::vector<double> h_current_primal_solution =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_primal_solution_);
-  std::vector<double> h_initial_primal_average =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_primal_average_);
-  std::vector<double> h_current_ATY =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_ATY_);
-  std::vector<double> h_sum_primal_solutions =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_primal_solutions_);
-  std::vector<double> h_last_restart_duality_gap_primal_solution = cuopt::host_copy(
-    solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_primal_solution_);
+  const std::vector<double>& h_current_primal_solution =
+    solver_settings.get_pdlp_warm_start_data().current_primal_solution_;
+  const std::vector<double>& h_initial_primal_average =
+    solver_settings.get_pdlp_warm_start_data().initial_primal_average_;
+  const std::vector<double>& h_current_ATY =
+    solver_settings.get_pdlp_warm_start_data().current_ATY_;
+  const std::vector<double>& h_sum_primal_solutions =
+    solver_settings.get_pdlp_warm_start_data().sum_primal_solutions_;
+  const std::vector<double>& h_last_restart_duality_gap_primal_solution =
+    solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_primal_solution_;
 
   EXPECT_EQ(h_current_primal_solution.size(), primal_expected.size());
   EXPECT_EQ(h_initial_primal_average.size(), primal_expected.size());
@@ -260,14 +248,14 @@ TEST(SolverSettingsTest, warm_start_bigger_vector)
   EXPECT_EQ(h_sum_primal_solutions, primal_expected);
   EXPECT_EQ(h_last_restart_duality_gap_primal_solution, primal_expected);
 
-  std::vector<double> h_current_dual_solution =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_dual_solution_);
-  std::vector<double> h_initial_dual_average =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_dual_average_);
-  std::vector<double> h_sum_dual_solutions =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_dual_solutions_);
-  std::vector<double> h_last_restart_duality_gap_dual_solution = cuopt::host_copy(
-    solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution_);
+  const std::vector<double>& h_current_dual_solution =
+    solver_settings.get_pdlp_warm_start_data().current_dual_solution_;
+  const std::vector<double>& h_initial_dual_average =
+    solver_settings.get_pdlp_warm_start_data().initial_dual_average_;
+  const std::vector<double>& h_sum_dual_solutions =
+    solver_settings.get_pdlp_warm_start_data().sum_dual_solutions_;
+  const std::vector<double>& h_last_restart_duality_gap_dual_solution =
+    solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution_;
 
   EXPECT_EQ(h_current_dual_solution.size(), dual_expected.size());
   EXPECT_EQ(h_initial_dual_average.size(), dual_expected.size());
diff --git a/cpp/tests/linear_programming/utilities/pdlp_test_utilities.cuh b/cpp/tests/linear_programming/utilities/pdlp_test_utilities.cuh
index e0144b75b..9267bc681 100644
--- a/cpp/tests/linear_programming/utilities/pdlp_test_utilities.cuh
+++ b/cpp/tests/linear_programming/utilities/pdlp_test_utilities.cuh
@@ -42,13 +42,33 @@ static std::string make_path_absolute(const std::string& file)
 }
 
 // Compute on the CPU x * c to check that the returned objective value is correct
+// Overload for host-based solutions
+static void test_objective_sanity(
+  const cuopt::mps_parser::mps_data_model_t<int, double>& op_problem,
+  const std::vector<double>& primal_solution,
+  double objective_value,
+  double epsilon = tolerance)
+{
+  const auto& c_vector = op_problem.get_objective_coefficients();
+  std::vector<double> out(primal_solution.size());
+  std::transform(primal_solution.cbegin(),
+                 primal_solution.cend(),
+                 c_vector.cbegin(),
+                 out.begin(),
+                 std::multiplies<double>());
+
+  const auto sum_primal_objective = std::accumulate(out.cbegin(), out.cend(), 0.0);
+  EXPECT_NEAR(sum_primal_objective, objective_value, epsilon);
+}
+
+// Overload for device-based solutions
 static void test_objective_sanity(
   const cuopt::mps_parser::mps_data_model_t<int, double>& op_problem,
   const rmm::device_uvector<double>& primal_solution,
   double objective_value,
   double epsilon = tolerance)
 {
-  const auto primal_vars = host_copy(primal_solution);
+  const auto primal_vars = host_copy(primal_solution, rmm::cuda_stream_view{});
   const auto& c_vector   = op_problem.get_objective_coefficients();
   std::vector<double> out(primal_vars.size());
   std::transform(primal_vars.cbegin(),
@@ -72,7 +92,8 @@ static void test_constraint_sanity(
   double epsilon        = tolerance,
   bool presolve_enabled = false)
 {
-  const std::vector<double> primal_vars              = host_copy(solution.get_primal_solution());
+  // Solution is already on host
+  const std::vector<double>& primal_vars             = solution.get_primal_solution();
   const std::vector<double>& values                  = op_problem.get_constraint_matrix_values();
   const std::vector<int>& indices                    = op_problem.get_constraint_matrix_indices();
   const std::vector<int>& offsets                    = op_problem.get_constraint_matrix_offsets();
diff --git a/cpp/tests/mip/bounds_standardization_test.cu b/cpp/tests/mip/bounds_standardization_test.cu
index 14aa271cd..d15a5b5e2 100644
--- a/cpp/tests/mip/bounds_standardization_test.cu
+++ b/cpp/tests/mip/bounds_standardization_test.cu
@@ -20,6 +20,7 @@
 
 #include <cuopt/linear_programming/mip/solver_settings.hpp>
 #include <cuopt/linear_programming/mip/solver_stats.hpp>
+#include <cuopt/linear_programming/utilities/problem_conversion.cuh>
 #include <linear_programming/pdlp.cuh>
 #include <linear_programming/utilities/problem_checking.cuh>
 #include <mip/presolve/trivial_presolve.cuh>
@@ -65,13 +66,14 @@ void test_bounds_standardization_test(std::string test_instance)
   cuopt::mps_parser::mps_data_model_t<int, double> problem =
     cuopt::mps_parser::parse_mps<int, double>(path, false);
   handle_.sync_stream();
-  auto op_problem = mps_data_model_to_optimization_problem(&handle_, problem);
-  problem_checking_t<int, double>::check_problem_representation(op_problem);
+  auto op_problem  = mps_data_model_to_optimization_problem(problem);
+  auto gpu_problem = host_to_gpu_problem(&handle_, op_problem);
+  problem_checking_t<int, double>::check_problem_representation(gpu_problem);
   setup_pdlp(handle_.get_stream());
-  init_handler(op_problem.get_handle_ptr());
+  init_handler(&handle_);
   // run the problem constructor of MIP, so that we do bounds standardization
-  detail::problem_t<int, double> standardized_problem(op_problem);
-  detail::problem_t<int, double> original_problem(op_problem);
+  detail::problem_t<int, double> standardized_problem(gpu_problem);
+  detail::problem_t<int, double> original_problem(gpu_problem);
   standardized_problem.preprocess_problem();
   detail::trivial_presolve(standardized_problem);
   detail::solution_t<int, double> solution_1(standardized_problem);
diff --git a/cpp/tests/mip/doc_example_test.cu b/cpp/tests/mip/doc_example_test.cu
index e154e8c69..a3bf0bd85 100644
--- a/cpp/tests/mip/doc_example_test.cu
+++ b/cpp/tests/mip/doc_example_test.cu
@@ -98,25 +98,25 @@ void test_mps_file()
   auto problem = create_doc_example_problem();
 
   settings.time_limit                  = test_time_limit;
-  mip_solution_t<int, double> solution = solve_mip(&handle_, problem, settings);
+  mip_solution_t<int, double> solution = solve_mip(problem, settings);
   EXPECT_EQ(solution.get_termination_status(), mip_termination_status_t::Optimal);
 
   double obj_val = solution.get_objective_value();
   // Expected objective value from documentation example is approximately 303.5
   EXPECT_NEAR(303.5, obj_val, 1.0);
 
-  // Test solution bounds
+  // Test solution bounds (solution is already on host)
   test_variable_bounds(problem, solution.get_solution(), settings);
 
-  // Get solution values
+  // Get solution values (already on host)
   const auto& sol_values = solution.get_solution();
   // x should be approximately 37 and integer
-  EXPECT_NEAR(37.0, sol_values.element(0, handle_.get_stream()), 0.1);
-  EXPECT_NEAR(std::round(sol_values.element(0, handle_.get_stream())),
-              sol_values.element(0, handle_.get_stream()),
+  EXPECT_NEAR(37.0, sol_values[0], 0.1);
+  EXPECT_NEAR(std::round(sol_values[0]),
+              sol_values[0],
               settings.tolerances.integrality_tolerance);  // Check x is integer
   // y should be approximately 39.5
-  EXPECT_NEAR(39.5, sol_values.element(1, handle_.get_stream()), 0.1);
+  EXPECT_NEAR(39.5, sol_values[1], 0.1);
 }
 
 TEST(docs, mixed_integer_linear_programming) { test_mps_file(); }
@@ -136,7 +136,7 @@ TEST(docs, user_problem_file)
   settings.time_limit        = test_time_limit;
   settings.user_problem_file = user_problem_path;
   settings.presolve          = false;
-  EXPECT_EQ(solve_mip(&handle_, problem, settings).get_termination_status(),
+  EXPECT_EQ(solve_mip(problem, settings).get_termination_status(),
             mip_termination_status_t::Optimal);
 
   EXPECT_TRUE(std::filesystem::exists(user_problem_path));
@@ -150,24 +150,24 @@ TEST(docs, user_problem_file)
 
   const auto user_problem_path2 = std::filesystem::temp_directory_path() / "user_problem2.mps";
   settings.user_problem_file    = user_problem_path2;
-  mip_solution_t<int, double> solution = solve_mip(&handle_, problem2, settings);
+  mip_solution_t<int, double> solution = solve_mip(problem2, settings);
   EXPECT_EQ(solution.get_termination_status(), mip_termination_status_t::Optimal);
 
   double obj_val = solution.get_objective_value();
   // Expected objective value from documentation example is approximately 303.5
   EXPECT_NEAR(303.5, obj_val, 1.0);
 
-  // Get solution values
+  // Get solution values (already on host)
   const auto& sol_values = solution.get_solution();
   // x should be approximately 37 and integer
   for (int i = 0; i < problem2.get_n_variables(); i++) {
     if (problem2.get_variable_names()[i] == "x") {
-      EXPECT_NEAR(37.0, sol_values.element(i, handle_.get_stream()), 0.1);
-      EXPECT_NEAR(std::round(sol_values.element(i, handle_.get_stream())),
-                  sol_values.element(i, handle_.get_stream()),
+      EXPECT_NEAR(37.0, sol_values[i], 0.1);
+      EXPECT_NEAR(std::round(sol_values[i]),
+                  sol_values[i],
                   settings.tolerances.integrality_tolerance);  // Check x is integer
     } else {                                                   // y should be approximately 39.5
-      EXPECT_NEAR(39.5, sol_values.element(i, handle_.get_stream()), 0.1);
+      EXPECT_NEAR(39.5, sol_values[i], 0.1);
     }
   }
 }
diff --git a/cpp/tests/mip/elim_var_remap_test.cu b/cpp/tests/mip/elim_var_remap_test.cu
index e6aa6ec17..ff268be99 100644
--- a/cpp/tests/mip/elim_var_remap_test.cu
+++ b/cpp/tests/mip/elim_var_remap_test.cu
@@ -18,6 +18,7 @@
 #include "../linear_programming/utilities/pdlp_test_utilities.cuh"
 #include "mip_utils.cuh"
 
+#include <cuopt/linear_programming/utilities/problem_conversion.cuh>
 #include <linear_programming/pdlp.cuh>
 #include <linear_programming/utilities/problem_checking.cuh>
 #include <mip/presolve/trivial_presolve.cuh>
@@ -80,12 +81,13 @@ void test_elim_var_remap(std::string test_instance)
   cuopt::mps_parser::mps_data_model_t<int, double> mps_problem =
     cuopt::mps_parser::parse_mps<int, double>(path, false);
   handle_.sync_stream();
-  auto op_problem = mps_data_model_to_optimization_problem(&handle_, mps_problem);
-  problem_checking_t<int, double>::check_problem_representation(op_problem);
+  auto op_problem  = mps_data_model_to_optimization_problem(mps_problem);
+  auto gpu_problem = host_to_gpu_problem(&handle_, op_problem);
+  problem_checking_t<int, double>::check_problem_representation(gpu_problem);
 
-  init_handler(op_problem.get_handle_ptr());
+  init_handler(&handle_);
   // run the problem constructor of MIP, so that we do bounds standardization
-  detail::problem_t<int, double> problem(op_problem);
+  detail::problem_t<int, double> problem(gpu_problem);
   problem.preprocess_problem();
   trivial_presolve(problem);
 
@@ -148,13 +150,14 @@ void test_elim_var_solution(std::string test_instance)
   cuopt::mps_parser::mps_data_model_t<int, double> mps_problem =
     cuopt::mps_parser::parse_mps<int, double>(path, false);
   handle_.sync_stream();
-  auto op_problem = mps_data_model_to_optimization_problem(&handle_, mps_problem);
-  problem_checking_t<int, double>::check_problem_representation(op_problem);
+  auto op_problem  = mps_data_model_to_optimization_problem(mps_problem);
+  auto gpu_problem = host_to_gpu_problem(&handle_, op_problem);
+  problem_checking_t<int, double>::check_problem_representation(gpu_problem);
   setup_pdlp(handle_.get_stream());
-  init_handler(op_problem.get_handle_ptr());
+  init_handler(&handle_);
   // run the problem constructor of MIP, so that we do bounds standardization
-  detail::problem_t<int, double> standardized_problem(op_problem);
-  detail::problem_t<int, double> original_problem(op_problem);
+  detail::problem_t<int, double> standardized_problem(gpu_problem);
+  detail::problem_t<int, double> original_problem(gpu_problem);
   standardized_problem.preprocess_problem();
   trivial_presolve(standardized_problem);
   detail::problem_t<int, double> sub_problem(standardized_problem);
@@ -182,7 +185,8 @@ void test_elim_var_solution(std::string test_instance)
 
   auto fixed_vars = select_k_random(standardized_problem.n_variables - 1, 5);
   for (auto& v : fixed_vars) {
-    double v_val = opt_sol_1.get_solution().element(v, handle_.get_stream());
+    // Solution is already on host
+    double v_val = opt_sol_1.get_solution()[v];
     double2 val  = double2{v_val, v_val};
     sub_problem.variable_bounds.set_element(v, val, handle_.get_stream());
   }
diff --git a/cpp/tests/mip/incumbent_callback_test.cu b/cpp/tests/mip/incumbent_callback_test.cu
index f45efca72..637c02153 100644
--- a/cpp/tests/mip/incumbent_callback_test.cu
+++ b/cpp/tests/mip/incumbent_callback_test.cu
@@ -116,7 +116,7 @@ void test_incumbent_callback(std::string test_instance)
   cuopt::mps_parser::mps_data_model_t<int, double> mps_problem =
     cuopt::mps_parser::parse_mps<int, double>(path, false);
   handle_.sync_stream();
-  auto op_problem = mps_data_model_to_optimization_problem(&handle_, mps_problem);
+  auto op_problem = mps_data_model_to_optimization_problem(mps_problem);
 
   auto settings       = mip_solver_settings_t<int, double>{};
   settings.time_limit = 30.;
diff --git a/cpp/tests/mip/mip_utils.cuh b/cpp/tests/mip/mip_utils.cuh
index b53952e12..33e8a10a8 100644
--- a/cpp/tests/mip/mip_utils.cuh
+++ b/cpp/tests/mip/mip_utils.cuh
@@ -24,6 +24,35 @@
 
 namespace cuopt::linear_programming::test {
 
+// Overload for host-based solutions
+static void test_variable_bounds(
+  const cuopt::mps_parser::mps_data_model_t<int, double>& problem,
+  const std::vector<double>& solution,
+  const cuopt::linear_programming::mip_solver_settings_t<int, double> settings)
+{
+  const double* lower_bound_ptr = problem.get_variable_lower_bounds().data();
+  const double* upper_bound_ptr = problem.get_variable_upper_bounds().data();
+  const double* assignment_ptr  = solution.data();
+  cuopt_assert(solution.size() == problem.get_variable_lower_bounds().size(), "");
+  cuopt_assert(solution.size() == problem.get_variable_upper_bounds().size(), "");
+  std::vector<int> indices(solution.size());
+  std::iota(indices.begin(), indices.end(), 0);
+  bool result = std::all_of(indices.begin(), indices.end(), [=](int idx) {
+    bool res = true;
+    if (lower_bound_ptr != nullptr) {
+      res = res && (assignment_ptr[idx] >=
+                    lower_bound_ptr[idx] - settings.tolerances.integrality_tolerance);
+    }
+    if (upper_bound_ptr != nullptr) {
+      res = res && (assignment_ptr[idx] <=
+                    upper_bound_ptr[idx] + settings.tolerances.integrality_tolerance);
+    }
+    return res;
+  });
+  EXPECT_TRUE(result);
+}
+
+// Overload for device-based solutions
 static void test_variable_bounds(
   const cuopt::mps_parser::mps_data_model_t<int, double>& problem,
   const rmm::device_uvector<double>& solution,
@@ -31,7 +60,7 @@ static void test_variable_bounds(
 {
   const double* lower_bound_ptr = problem.get_variable_lower_bounds().data();
   const double* upper_bound_ptr = problem.get_variable_upper_bounds().data();
-  auto host_assignment          = cuopt::host_copy(solution);
+  auto host_assignment          = cuopt::host_copy(solution, rmm::cuda_stream_view{});
   double* assignment_ptr        = host_assignment.data();
   cuopt_assert(host_assignment.size() == problem.get_variable_lower_bounds().size(), "");
   cuopt_assert(host_assignment.size() == problem.get_variable_upper_bounds().size(), "");
@@ -76,9 +105,38 @@ struct violation {
   }
 };
 
+// Forward declaration
+static void test_constraint_sanity_per_row_impl(
+  const cuopt::mps_parser::mps_data_model_t<int, double>& op_problem,
+  const std::vector<double>& solution,
+  double abs_tolerance,
+  double rel_tolerance);
+
+// Overload for host vectors (std::vector)
 static void test_constraint_sanity_per_row(
   const cuopt::mps_parser::mps_data_model_t<int, double>& op_problem,
-  const rmm::device_uvector<double>& solution,
+  const std::vector<double>& solution,
+  double abs_tolerance,
+  double rel_tolerance)
+{
+  test_constraint_sanity_per_row_impl(op_problem, solution, abs_tolerance, rel_tolerance);
+}
+
+// Overload for device vectors (device_uvector) - converts to host then calls impl
+static void test_constraint_sanity_per_row(
+  const cuopt::mps_parser::mps_data_model_t<int, double>& op_problem,
+  const rmm::device_uvector<double>& device_solution,
+  double abs_tolerance,
+  double rel_tolerance)
+{
+  auto solution = cuopt::host_copy(device_solution, rmm::cuda_stream_view{});
+  test_constraint_sanity_per_row_impl(op_problem, solution, abs_tolerance, rel_tolerance);
+}
+
+// Implementation that works with host vectors
+static void test_constraint_sanity_per_row_impl(
+  const cuopt::mps_parser::mps_data_model_t<int, double>& op_problem,
+  const std::vector<double>& solution,
   double abs_tolerance,
   double rel_tolerance)
 {
@@ -91,7 +149,8 @@ static void test_constraint_sanity_per_row(
   const std::vector<double>& variable_upper_bounds   = op_problem.get_variable_upper_bounds();
   std::vector<double> residual(constraint_lower_bounds.size(), 0.0);
   std::vector<double> viol(constraint_lower_bounds.size(), 0.0);
-  auto h_solution = cuopt::host_copy(solution);
+  // Solution is already on host
+  const std::vector<double>& h_solution = solution;
   // CSR SpMV
   for (size_t i = 0; i < offsets.size() - 1; ++i) {
     for (int j = offsets[i]; j < offsets[i + 1]; ++j) {
@@ -127,7 +186,7 @@ static std::tuple<mip_termination_status_t, double, double> test_mps_file(
   settings.time_limit                  = time_limit;
   settings.heuristics_only             = heuristics_only;
   settings.presolve                    = presolve;
-  mip_solution_t<int, double> solution = solve_mip(&handle_, problem, settings);
+  mip_solution_t<int, double> solution = solve_mip(problem, settings);
   return std::make_tuple(solution.get_termination_status(),
                          solution.get_objective_value(),
                          solution.get_solution_bound());
diff --git a/cpp/tests/mip/miplib_test.cu b/cpp/tests/mip/miplib_test.cu
index d0866455f..1a952ff54 100644
--- a/cpp/tests/mip/miplib_test.cu
+++ b/cpp/tests/mip/miplib_test.cu
@@ -59,7 +59,7 @@ void test_miplib_file(result_map_t test_instance, mip_solver_settings_t<int, dou
 #endif
 
   settings.time_limit                  = test_time_limit;
-  mip_solution_t<int, double> solution = solve_mip(&handle_, problem, settings);
+  mip_solution_t<int, double> solution = solve_mip(problem, settings);
   bool is_feasible = solution.get_termination_status() == mip_termination_status_t::FeasibleFound ||
                      solution.get_termination_status() == mip_termination_status_t::Optimal;
   EXPECT_TRUE(is_feasible);
diff --git a/cpp/tests/mip/multi_probe_test.cu b/cpp/tests/mip/multi_probe_test.cu
index 10d6bc7bc..b31852f57 100644
--- a/cpp/tests/mip/multi_probe_test.cu
+++ b/cpp/tests/mip/multi_probe_test.cu
@@ -19,6 +19,7 @@
 #include "mip_utils.cuh"
 
 #include <raft/sparse/detail/cusparse_wrappers.h>
+#include <cuopt/linear_programming/utilities/problem_conversion.cuh>
 #include <linear_programming/initial_scaling_strategy/initial_scaling.cuh>
 #include <linear_programming/utilities/problem_checking.cuh>
 #include <mip/presolve/bounds_presolve.cuh>
@@ -154,9 +155,10 @@ void test_multi_probe(std::string path)
   cuopt::mps_parser::mps_data_model_t<int, double> mps_problem =
     cuopt::mps_parser::parse_mps<int, double>(path, false);
   handle_.sync_stream();
-  auto op_problem = mps_data_model_to_optimization_problem(&handle_, mps_problem);
-  problem_checking_t<int, double>::check_problem_representation(op_problem);
-  detail::problem_t<int, double> problem(op_problem);
+  auto op_problem  = mps_data_model_to_optimization_problem(mps_problem);
+  auto gpu_problem = host_to_gpu_problem(&handle_, op_problem);
+  problem_checking_t<int, double>::check_problem_representation(gpu_problem);
+  detail::problem_t<int, double> problem(gpu_problem);
   mip_solver_settings_t<int, double> default_settings{};
   detail::pdlp_initial_scaling_strategy_t<int, double> scaling(&handle_,
                                                                problem,
diff --git a/cpp/tests/mip/problem_test.cu b/cpp/tests/mip/problem_test.cu
index a7c9a07ea..44b1b78e3 100644
--- a/cpp/tests/mip/problem_test.cu
+++ b/cpp/tests/mip/problem_test.cu
@@ -18,6 +18,7 @@
 #include "../linear_programming/utilities/pdlp_test_utilities.cuh"
 
 #include <cuopt/linear_programming/solve.hpp>
+#include <cuopt/linear_programming/utilities/problem_conversion.cuh>
 #include <linear_programming/utils.cuh>
 #include <mip/presolve/trivial_presolve.cuh>
 #include <mip/problem/problem.cuh>
@@ -69,9 +70,9 @@ thrust::host_vector<T> rand_vec(i_t size, T dist_beg, T dist_end)
 }
 
 template <typename i_t, typename f_t>
-lp::optimization_problem_t<i_t, f_t> create_problem(raft::handle_t const* h, i_t n_cnst, i_t n_var)
+lp::optimization_problem_t<i_t, f_t> create_problem(i_t n_cnst, i_t n_var)
 {
-  lp::optimization_problem_t<i_t, f_t> problem(h);
+  lp::optimization_problem_t<i_t, f_t> problem;
   thrust::default_random_engine rng(1337);
   thrust::uniform_real_distribution<f_t> dist(0, 5);
 
@@ -154,26 +155,18 @@ void set_equal_var_bounds(optimization_problem_t<i_t, f_t>& problem,
                           thrust::host_vector<i_t>& selected_vars)
 {
   cuopt_assert(selected_vars.size() < problem.get_n_variables(), "invalid number of variables");
-  rmm::device_uvector<f_t>& v_lb = problem.get_variable_lower_bounds();
-  rmm::device_uvector<f_t>& v_ub = problem.get_variable_upper_bounds();
-  rmm::device_uvector<i_t> sel_vars(selected_vars.size(), problem.get_handle_ptr()->get_stream());
-  raft::copy(sel_vars.data(),
-             selected_vars.data(),
-             selected_vars.size(),
-             problem.get_handle_ptr()->get_stream());
-  auto lb = make_span(v_lb);
-  auto ub = make_span(v_ub);
-  auto vt = make_span(problem.get_variable_types());
-  thrust::for_each(problem.get_handle_ptr()->get_thrust_policy(),
-                   sel_vars.begin(),
-                   sel_vars.end(),
-                   [lb, ub, vt] __device__(auto v) {
-                     if (vt[v] == var_t::INTEGER) {
-                       lb[v] = ub[v] = ceil(ub[v]);
-                     } else {
-                       lb[v] = ub[v];
-                     }
-                   });
+  // Problem now uses host memory (std::vector) - work directly with host data
+  auto& v_lb     = problem.get_variable_lower_bounds();
+  auto& v_ub     = problem.get_variable_upper_bounds();
+  const auto& vt = problem.get_variable_types();
+
+  for (const auto& v : selected_vars) {
+    if (vt[v] == var_t::INTEGER) {
+      v_lb[v] = v_ub[v] = std::ceil(v_ub[v]);
+    } else {
+      v_lb[v] = v_ub[v];
+    }
+  }
 }
 
 template <typename i_t, typename f_t>
@@ -192,12 +185,14 @@ void test_equal_val_bounds(i_t n_cnst, i_t n_var)
 {
   const raft::handle_t handle_{};
 
-  auto op_problem = create_problem<i_t, f_t>(&handle_, n_cnst, n_var);
+  auto op_problem = create_problem<i_t, f_t>(n_cnst, n_var);
   auto selected_vars =
     generate_random_vals<i_t, f_t>(op_problem.get_n_variables(), std::max(n_var * 0.1, 1.));
   set_equal_var_bounds<i_t, f_t>(op_problem, selected_vars);
 
-  dtl::problem_t<i_t, f_t> problem(op_problem);
+  // Convert host problem to GPU problem for solver
+  auto gpu_problem = host_to_gpu_problem(&handle_, op_problem);
+  dtl::problem_t<i_t, f_t> problem(gpu_problem);
 
   problem.preprocess_problem();
 
@@ -240,12 +235,16 @@ TEST(problem, setting_both_rhs_and_constraints_bounds)
   // Check constraints lower/upper bounds after having filled the row type and rhs
   {
     raft::handle_t handle;
-    optimization_problem_t<int, double> op_problem(&handle);
+    optimization_problem_t<int, double> op_problem;
     fill_problem(op_problem);
-    cuopt::linear_programming::detail::problem_t<int, double> problem(op_problem);
+    auto gpu_problem = host_to_gpu_problem(&handle, op_problem);
+    cuopt::linear_programming::detail::problem_t<int, double> problem(gpu_problem);
 
-    const auto constraints_lower_bounds = host_copy(problem.constraint_lower_bounds);
-    const auto constraints_upper_bounds = host_copy(problem.constraint_upper_bounds);
+    // problem members are device vectors, need stream for copy
+    const auto constraints_lower_bounds =
+      host_copy(problem.constraint_lower_bounds, problem.handle_ptr->get_stream());
+    const auto constraints_upper_bounds =
+      host_copy(problem.constraint_upper_bounds, problem.handle_ptr->get_stream());
 
     EXPECT_EQ(constraints_lower_bounds[0], 1.0);
     EXPECT_EQ(constraints_upper_bounds[0], 1.0);
@@ -254,16 +253,20 @@ TEST(problem, setting_both_rhs_and_constraints_bounds)
   // Check constraints lower/upper bounds after having set both
   {
     raft::handle_t handle;
-    optimization_problem_t<int, double> op_problem(&handle);
+    optimization_problem_t<int, double> op_problem;
     fill_problem(op_problem);
     double lower[] = {2.0};
     double upper[] = {3.0};
     op_problem.set_constraint_lower_bounds(lower, 1);
     op_problem.set_constraint_upper_bounds(upper, 1);
-    cuopt::linear_programming::detail::problem_t<int, double> problem(op_problem);
-
-    const auto constraints_lower_bounds = host_copy(problem.constraint_lower_bounds);
-    const auto constraints_upper_bounds = host_copy(problem.constraint_upper_bounds);
+    auto gpu_problem = host_to_gpu_problem(&handle, op_problem);
+    cuopt::linear_programming::detail::problem_t<int, double> problem(gpu_problem);
+
+    // problem members are device vectors, need stream for copy
+    const auto constraints_lower_bounds =
+      host_copy(problem.constraint_lower_bounds, problem.handle_ptr->get_stream());
+    const auto constraints_upper_bounds =
+      host_copy(problem.constraint_upper_bounds, problem.handle_ptr->get_stream());
     EXPECT_EQ(constraints_lower_bounds[0], 2.0);
     EXPECT_EQ(constraints_upper_bounds[0], 3.0);
   }
@@ -272,16 +275,20 @@ TEST(problem, setting_both_rhs_and_constraints_bounds)
   // Set upper / lower before
   {
     raft::handle_t handle;
-    optimization_problem_t<int, double> op_problem(&handle);
+    optimization_problem_t<int, double> op_problem;
     double lower[] = {2.0};
     double upper[] = {3.0};
     op_problem.set_constraint_lower_bounds(lower, 1);
     op_problem.set_constraint_upper_bounds(upper, 1);
     fill_problem(op_problem);
-    cuopt::linear_programming::detail::problem_t<int, double> problem(op_problem);
-
-    const auto constraints_lower_bounds = host_copy(problem.constraint_lower_bounds);
-    const auto constraints_upper_bounds = host_copy(problem.constraint_upper_bounds);
+    auto gpu_problem = host_to_gpu_problem(&handle, op_problem);
+    cuopt::linear_programming::detail::problem_t<int, double> problem(gpu_problem);
+
+    // problem members are device vectors, need stream for copy
+    const auto constraints_lower_bounds =
+      host_copy(problem.constraint_lower_bounds, problem.handle_ptr->get_stream());
+    const auto constraints_upper_bounds =
+      host_copy(problem.constraint_upper_bounds, problem.handle_ptr->get_stream());
     EXPECT_EQ(constraints_lower_bounds[0], 2.0);
     EXPECT_EQ(constraints_upper_bounds[0], 3.0);
   }
@@ -294,11 +301,16 @@ TEST(optimization_problem_t_DeathTest, test_check_problem_validity)
   GTEST_FLAG_SET(death_test_style, "threadsafe");
 
   raft::handle_t handle;
-  auto op_problem        = optimization_problem_t<int, double>(&handle);
+  auto op_problem        = optimization_problem_t<int, double>();
   using custom_problem_t = cuopt::linear_programming::detail::problem_t<int, double>;
 
   // Check if assert if nothing
-  EXPECT_DEATH({ custom_problem_t problem(op_problem); }, "");
+  EXPECT_DEATH(
+    {
+      auto gpu_problem = host_to_gpu_problem(&handle, op_problem);
+      custom_problem_t problem(gpu_problem);
+    },
+    "");
 
   // Set A_CSR_matrix
   /*
@@ -311,35 +323,56 @@ TEST(optimization_problem_t_DeathTest, test_check_problem_validity)
   op_problem.set_csr_constraint_matrix(A_host, 5, indices_host, 5, offset_host, 3);
 
   // Test if assert is thrown when c is not set
-  EXPECT_DEATH({ custom_problem_t problem(op_problem); }, "");
+  EXPECT_DEATH(
+    {
+      auto gpu_problem = host_to_gpu_problem(&handle, op_problem);
+      custom_problem_t problem(gpu_problem);
+    },
+    "");
 
   // Set c
   double c_host[] = {1.0, 2.0, 3.0};
   op_problem.set_objective_coefficients(c_host, 3);
 
   // Test if assert is thrown when constraints are not set
-  EXPECT_DEATH({ custom_problem_t problem(op_problem); }, "");
+  EXPECT_DEATH(
+    {
+      auto gpu_problem = host_to_gpu_problem(&handle, op_problem);
+      custom_problem_t problem(gpu_problem);
+    },
+    "");
 
   // Set row type
   char row_type_host[] = {'E', 'E'};
   op_problem.set_row_types(row_type_host, 2);
 
   // Test if assert is thrown when row_type is set but not b
-  EXPECT_DEATH({ custom_problem_t problem(op_problem); }, "");
+  EXPECT_DEATH(
+    {
+      auto gpu_problem = host_to_gpu_problem(&handle, op_problem);
+      custom_problem_t problem(gpu_problem);
+    },
+    "");
 
   // Set b
   double b_host[] = {1.0, 2.0};
   op_problem.set_constraint_bounds(b_host, 2);
 
   // Test that nothing is thrown when both b and row types are set
-  custom_problem_t problem(op_problem);
+  auto gpu_problem1 = host_to_gpu_problem(&handle, op_problem);
+  custom_problem_t problem(gpu_problem1);
 
   // Unsetting row types and constraints bounds
   op_problem.set_row_types(row_type_host, 0);
   op_problem.set_constraint_bounds(b_host, 0);
 
   // Test again if assert is thrown when constraints bounds are not set
-  EXPECT_DEATH({ custom_problem_t problem(op_problem); }, "");
+  EXPECT_DEATH(
+    {
+      auto gpu_problem = host_to_gpu_problem(&handle, op_problem);
+      custom_problem_t problem(gpu_problem);
+    },
+    "");
 
   // Seting constraint lower bounds
   double constraint_lower_bounds_host[] = {1.0f, 2.0f};
@@ -348,7 +381,8 @@ TEST(optimization_problem_t_DeathTest, test_check_problem_validity)
   op_problem.set_constraint_upper_bounds(constraint_lower_bounds_host, 2);
 
   // Test if no assert is thrown when constraints bounds are set
-  custom_problem_t problem2(op_problem);
+  auto gpu_problem2 = host_to_gpu_problem(&handle, op_problem);
+  custom_problem_t problem2(gpu_problem2);
 
   // Manually unsetting the tranpose fields in problem2 (automatically created in LP mode)
   problem2.reverse_coefficients = rmm::device_uvector<double>(0, handle.get_stream());
diff --git a/cpp/tests/mip/unit_test.cu b/cpp/tests/mip/unit_test.cu
index 9897b2feb..0285cd554 100644
--- a/cpp/tests/mip/unit_test.cu
+++ b/cpp/tests/mip/unit_test.cu
@@ -182,15 +182,15 @@ TEST(LPTest, TestSampleLP2)
   settings.time_limit = 5;
 
   // Solve
-  auto result = cuopt::linear_programming::solve_lp(&handle, problem, settings);
+  auto result = cuopt::linear_programming::solve_lp(problem, settings);
 
   // Check results
   EXPECT_EQ(result.get_termination_status(),
             cuopt::linear_programming::pdlp_termination_status_t::Optimal);
   ASSERT_EQ(result.get_primal_solution().size(), 1);
 
-  // Copy solution to host to access values
-  auto primal_host = cuopt::host_copy(result.get_primal_solution());
+  // Solution is already on host
+  const auto& primal_host = result.get_primal_solution();
   EXPECT_NEAR(primal_host[0], 0.0, 1e-6);
 
   EXPECT_NEAR(result.get_additional_termination_information().primal_objective, 0.0, 1e-6);
@@ -207,7 +207,7 @@ TEST(LPTest, TestSampleLP)
   settings.time_limit = 5;
   settings.presolve   = false;
 
-  auto result = cuopt::linear_programming::solve_lp(&handle, problem, settings);
+  auto result = cuopt::linear_programming::solve_lp(problem, settings);
 
   EXPECT_EQ(result.get_termination_status(),
             cuopt::linear_programming::pdlp_termination_status_t::Optimal);
@@ -228,7 +228,7 @@ TEST(ErrorTest, TestError)
   problem.set_constraint_lower_bounds(lower_bounds.data(), lower_bounds.size());
   problem.set_constraint_upper_bounds(upper_bounds.data(), upper_bounds.size());
 
-  auto result = cuopt::linear_programming::solve_mip(&handle, problem, settings);
+  auto result = cuopt::linear_programming::solve_mip(problem, settings);
 
   EXPECT_EQ(result.get_termination_status(),
             cuopt::linear_programming::mip_termination_status_t::NoTermination);
@@ -254,7 +254,7 @@ TEST_P(MILPTestParams, TestSampleMILP)
   settings.heuristics_only = heuristics_only;
   settings.presolve        = false;
 
-  auto result = cuopt::linear_programming::solve_mip(&handle, problem, settings);
+  auto result = cuopt::linear_programming::solve_mip(problem, settings);
 
   EXPECT_EQ(result.get_termination_status(), expected_termination_status);
 }
@@ -275,7 +275,7 @@ TEST_P(MILPTestParams, TestSingleVarMILP)
   settings.heuristics_only = heuristics_only;
   settings.presolve        = false;
 
-  auto result = cuopt::linear_programming::solve_mip(&handle, problem, settings);
+  auto result = cuopt::linear_programming::solve_mip(problem, settings);
 
   EXPECT_EQ(result.get_termination_status(),
             cuopt::linear_programming::mip_termination_status_t::Optimal);
diff --git a/docs/cuopt/source/introduction.rst b/docs/cuopt/source/introduction.rst
index de24f4746..c74a34572 100644
--- a/docs/cuopt/source/introduction.rst
+++ b/docs/cuopt/source/introduction.rst
@@ -2,7 +2,7 @@
 Introduction
 ==========================
 
-**NVIDIA® cuOpt™** is a GPU-accelerated optimization library that solves `Mixed Integer Linear Programming (MILP) <https://en.wikipedia.org/wiki/Linear_programming#Integer_unknowns>`_, `Linear Programming (LP) <https://en.wikipedia.org/wiki/Linear_programming>`_, and `Vehicle Routing Problems (VRP) <https://en.wikipedia.org/wiki/Vehicle_routing_problem>`_. It enables solutions for large-scale problems with millions of variables and constraints, offering seamless deployment across hybrid and multi-cloud environments.
+**NVIDIA® cuOpt™** is a GPU-accelerated optimization engine that solves `Mixed Integer Linear Programming (MILP) <https://en.wikipedia.org/wiki/Linear_programming#Integer_unknowns>`_, `Linear Programming (LP) <https://en.wikipedia.org/wiki/Linear_programming>`_, and `Vehicle Routing Problems (VRP) <https://en.wikipedia.org/wiki/Vehicle_routing_problem>`_. It enables solutions for large-scale problems with millions of variables and constraints, offering seamless deployment across hybrid and multi-cloud environments.
 
 Using accelerated computing, NVIDIA® cuOpt optimizes operations research and logistics by enabling better, faster decisions.
 
diff --git a/docs/cuopt/source/system-requirements.rst b/docs/cuopt/source/system-requirements.rst
index 8a6eb7d83..f88de044c 100644
--- a/docs/cuopt/source/system-requirements.rst
+++ b/docs/cuopt/source/system-requirements.rst
@@ -56,7 +56,7 @@ Dependencies are installed automatically when using the pip and Conda installati
        - ARM64
 
    * GPU:
-      - NVIDIA H100 SXM (compute capability >= 9.0)
+      - NVIDIA H100 SXM (compute capability >= 9.0) and above
 
    * CPU:
       - 32+ cores
diff --git a/python/cuopt/CMakeLists.txt b/python/cuopt/CMakeLists.txt
index 57a03830a..74d046331 100644
--- a/python/cuopt/CMakeLists.txt
+++ b/python/cuopt/CMakeLists.txt
@@ -28,6 +28,8 @@ project(
             # that is fixed we need to keep C.
             C CXX CUDA)
 
+# Protobuf is required because cuopt library depends on it
+find_package(protobuf REQUIRED)
 
 find_package(cuopt "${RAPIDS_VERSION}")
 find_package(mps_parser "${RAPIDS_VERSION}")
diff --git a/python/cuopt/cuopt/linear_programming/solver/solver.pxd b/python/cuopt/cuopt/linear_programming/solver/solver.pxd
index d6adf14f0..e4a65c7d8 100644
--- a/python/cuopt/cuopt/linear_programming/solver/solver.pxd
+++ b/python/cuopt/cuopt/linear_programming/solver/solver.pxd
@@ -131,20 +131,21 @@ cdef extern from "cuopt/linear_programming/pdlp/solver_solution.hpp" namespace "
 
 
 cdef extern from "cuopt/linear_programming/utilities/cython_solve.hpp" namespace "cuopt::cython": # noqa
+    # NOTE: Uses host memory (vector[double]) for remote solving
     cdef cppclass linear_programming_ret_t:
-        unique_ptr[device_buffer] primal_solution_
-        unique_ptr[device_buffer] dual_solution_
-        unique_ptr[device_buffer] reduced_cost_
+        vector[double] primal_solution_
+        vector[double] dual_solution_
+        vector[double] reduced_cost_
         # PDLP warm start data
-        unique_ptr[device_buffer] current_primal_solution_
-        unique_ptr[device_buffer] current_dual_solution_
-        unique_ptr[device_buffer] initial_primal_average_
-        unique_ptr[device_buffer] initial_dual_average_
-        unique_ptr[device_buffer] current_ATY_
-        unique_ptr[device_buffer] sum_primal_solutions_
-        unique_ptr[device_buffer] sum_dual_solutions_
-        unique_ptr[device_buffer] last_restart_duality_gap_primal_solution_
-        unique_ptr[device_buffer] last_restart_duality_gap_dual_solution_
+        vector[double] current_primal_solution_
+        vector[double] current_dual_solution_
+        vector[double] initial_primal_average_
+        vector[double] initial_dual_average_
+        vector[double] current_ATY_
+        vector[double] sum_primal_solutions_
+        vector[double] sum_dual_solutions_
+        vector[double] last_restart_duality_gap_primal_solution_
+        vector[double] last_restart_duality_gap_dual_solution_
         double initial_primal_weight_
         double initial_step_size_
         int total_pdlp_iterations_
@@ -167,7 +168,7 @@ cdef extern from "cuopt/linear_programming/utilities/cython_solve.hpp" namespace
         bool solved_by_pdlp_
 
     cdef cppclass mip_ret_t:
-        unique_ptr[device_buffer] solution_
+        vector[double] solution_
         mip_termination_status_t termination_status_
         error_type_t error_status_
         string error_message_
diff --git a/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx b/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx
index a468d57ae..84845680b 100644
--- a/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx
+++ b/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx
@@ -308,12 +308,12 @@ cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr,
 
     from cuopt.linear_programming.solution.solution import Solution
 
-    sol_ret = move(sol_ret_ptr.get()[0])
+    # Access the solver_ret_t through the unique_ptr without moving it first
+    cdef solver_ret_t* sol_ret = sol_ret_ptr.get()
 
     if sol_ret.problem_type == ProblemCategory.MIP or sol_ret.problem_type == ProblemCategory.IP: # noqa
-        solution = DeviceBuffer.c_from_unique_ptr(
-            move(sol_ret.mip_ret.solution_)
-        )
+        # Convert host vector directly to numpy (no GPU dependency!)
+        solution = np.array(sol_ret.mip_ret.solution_, dtype=np.float64)
         termination_status = sol_ret.mip_ret.termination_status_
         error_status = sol_ret.mip_ret.error_status_
         error_message = sol_ret.mip_ret.error_message_
@@ -328,13 +328,6 @@ cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr,
         num_nodes = sol_ret.mip_ret.nodes_
         num_simplex_iterations = sol_ret.mip_ret.simplex_iterations_
 
-        solution = cudf.Series._from_column(
-            cudf.core.column.build_column(
-                as_buffer(solution),
-                dtype=np.dtype(np.float64)
-            )
-        ).to_numpy()
-
         return Solution(
             ProblemCategory(sol_ret.problem_type),
             dict(zip(data_model_obj.get_variable_names(), solution)),
@@ -355,30 +348,16 @@ cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr,
         )
 
     else:
-        primal_solution = DeviceBuffer.c_from_unique_ptr(
-            move(sol_ret.lp_ret.primal_solution_)
+        # Convert host vectors directly to numpy (no GPU dependency!)
+        primal_solution = np.array(
+            sol_ret.lp_ret.primal_solution_, dtype=np.float64
+        )
+        dual_solution = np.array(
+            sol_ret.lp_ret.dual_solution_, dtype=np.float64
+        )
+        reduced_cost = np.array(
+            sol_ret.lp_ret.reduced_cost_, dtype=np.float64
         )
-        dual_solution = DeviceBuffer.c_from_unique_ptr(move(sol_ret.lp_ret.dual_solution_)) # noqa
-        reduced_cost = DeviceBuffer.c_from_unique_ptr(move(sol_ret.lp_ret.reduced_cost_)) # noqa
-
-        primal_solution = cudf.Series._from_column(
-            cudf.core.column.build_column(
-                as_buffer(primal_solution),
-                dtype=np.dtype(np.float64)
-            )
-        ).to_numpy()
-        dual_solution = cudf.Series._from_column(
-            cudf.core.column.build_column(
-                as_buffer(dual_solution),
-                dtype=np.dtype(np.float64)
-            )
-        ).to_numpy()
-        reduced_cost = cudf.Series._from_column(
-            cudf.core.column.build_column(
-                as_buffer(reduced_cost),
-                dtype=np.dtype(np.float64)
-            )
-        ).to_numpy()
 
         termination_status = sol_ret.lp_ret.termination_status_
         error_status = sol_ret.lp_ret.error_status_
@@ -394,97 +373,58 @@ cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr,
 
         # In BatchSolve, we don't get the warm start data
         if not is_batch:
-            current_primal_solution = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.current_primal_solution_)
+            print(
+                f"[create_solution] is_batch={is_batch}, "
+                "processing warm start data"
             )
-            current_dual_solution = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.current_dual_solution_)
+            # Convert host vectors directly to numpy (no GPU dependency!)
+            current_primal_solution = np.array(
+                sol_ret.lp_ret.current_primal_solution_, dtype=np.float64
             )
-            initial_primal_average = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.initial_primal_average_)
+            current_dual_solution = np.array(
+                sol_ret.lp_ret.current_dual_solution_, dtype=np.float64
             )
-            initial_dual_average = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.initial_dual_average_)
+            initial_primal_average = np.array(
+                sol_ret.lp_ret.initial_primal_average_, dtype=np.float64
             )
-            current_ATY = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.current_ATY_)
+            initial_dual_average = np.array(
+                sol_ret.lp_ret.initial_dual_average_, dtype=np.float64
             )
-            sum_primal_solutions = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.sum_primal_solutions_)
+            current_ATY = np.array(
+                sol_ret.lp_ret.current_ATY_, dtype=np.float64
             )
-            sum_dual_solutions = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.sum_dual_solutions_)
+            sum_primal_solutions = np.array(
+                sol_ret.lp_ret.sum_primal_solutions_, dtype=np.float64
             )
-            last_restart_duality_gap_primal_solution = DeviceBuffer.c_from_unique_ptr( # noqa
-                move(sol_ret.lp_ret.last_restart_duality_gap_primal_solution_)
+            sum_dual_solutions = np.array(
+                sol_ret.lp_ret.sum_dual_solutions_, dtype=np.float64
             )
-            last_restart_duality_gap_dual_solution = DeviceBuffer.c_from_unique_ptr( # noqa
-                move(sol_ret.lp_ret.last_restart_duality_gap_dual_solution_)
+            last_restart_duality_gap_primal_solution = np.array(
+                sol_ret.lp_ret.last_restart_duality_gap_primal_solution_,
+                dtype=np.float64
             )
+            last_restart_duality_gap_dual_solution = np.array(
+                sol_ret.lp_ret.last_restart_duality_gap_dual_solution_,
+                dtype=np.float64
+            )
+
             initial_primal_weight = sol_ret.lp_ret.initial_primal_weight_
             initial_step_size = sol_ret.lp_ret.initial_step_size_
             total_pdlp_iterations = sol_ret.lp_ret.total_pdlp_iterations_
             total_pdhg_iterations = sol_ret.lp_ret.total_pdhg_iterations_
-            last_candidate_kkt_score = sol_ret.lp_ret.last_candidate_kkt_score_
+            last_candidate_kkt_score = (
+                sol_ret.lp_ret.last_candidate_kkt_score_
+            )
             last_restart_kkt_score = sol_ret.lp_ret.last_restart_kkt_score_
             sum_solution_weight = sol_ret.lp_ret.sum_solution_weight_
-            iterations_since_last_restart = sol_ret.lp_ret.iterations_since_last_restart_ # noqa
-
-            current_primal_solution = cudf.Series._from_column(
-                cudf.core.column.build_column(
-                    as_buffer(current_primal_solution),
-                    dtype=np.dtype(np.float64)
-                )
-            ).to_numpy()
-            current_dual_solution = cudf.Series._from_column(
-                cudf.core.column.build_column(
-                    as_buffer(current_dual_solution),
-                    dtype=np.dtype(np.float64)
-                )
-            ).to_numpy()
-            initial_primal_average = cudf.Series._from_column(
-                cudf.core.column.build_column(
-                    as_buffer(initial_primal_average),
-                    dtype=np.dtype(np.float64)
-                )
-            ).to_numpy()
-            initial_dual_average = cudf.Series._from_column(
-                cudf.core.column.build_column(
-                    as_buffer(initial_dual_average),
-                    dtype=np.dtype(np.float64)
-                )
-            ).to_numpy()
-            current_ATY = cudf.Series._from_column(
-                cudf.core.column.build_column(
-                    as_buffer(current_ATY),
-                    dtype=np.dtype(np.float64)
-                )
-            ).to_numpy()
-            sum_primal_solutions = cudf.Series._from_column(
-                cudf.core.column.build_column(
-                    as_buffer(sum_primal_solutions),
-                    dtype=np.dtype(np.float64)
-                )
-            ).to_numpy()
-            sum_dual_solutions = cudf.Series._from_column(
-                cudf.core.column.build_column(
-                    as_buffer(sum_dual_solutions),
-                    dtype=np.dtype(np.float64)
-                )
-            ).to_numpy()
-            last_restart_duality_gap_primal_solution = cudf.Series._from_column( # noqa
-                cudf.core.column.build_column(
-                    as_buffer(last_restart_duality_gap_primal_solution),
-                    dtype=np.dtype(np.float64)
-                )
-            ).to_numpy()
-            last_restart_duality_gap_dual_solution = cudf.Series._from_column(
-                cudf.core.column.build_column(
-                    as_buffer(last_restart_duality_gap_dual_solution),
-                    dtype=np.dtype(np.float64)
-                )
-            ).to_numpy()
+            iterations_since_last_restart = (
+                sol_ret.lp_ret.iterations_since_last_restart_
+            )  # noqa
 
+            print(
+                "[create_solution] About to return Solution "
+                "with warm start data"
+            )
             return Solution(
                 ProblemCategory(sol_ret.problem_type),
                 dict(zip(data_model_obj.get_variable_names(), primal_solution)), # noqa
diff --git a/test_remote_client.py b/test_remote_client.py
new file mode 100644
index 000000000..6e3d03560
--- /dev/null
+++ b/test_remote_client.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Simple client test for remote solve - uses environment variables from shell"""
+
+import os
+import sys
+
+print("=" * 60)
+print("Remote Solve Client Test")
+print("=" * 60)
+
+# Check environment variables
+remote_host = os.environ.get("CUOPT_REMOTE_HOST")
+remote_port = os.environ.get("CUOPT_REMOTE_PORT")
+
+if not remote_host or not remote_port:
+    print("❌ Error: CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT must be set")
+    sys.exit(1)
+
+print(f"\n✅ CUOPT_REMOTE_HOST={remote_host}")
+print(f"✅ CUOPT_REMOTE_PORT={remote_port}")
+
+print("\nLoading cuopt library...")
+from cuopt.linear_programming import Problem
+
+print("✅ Library loaded")
+
+# Read and solve problem
+mps_file = "datasets/linear_programming/afiro_original.mps"
+
+if not os.path.exists(mps_file):
+    print(f"❌ MPS file not found: {mps_file}")
+    sys.exit(1)
+
+print(f"\nReading MPS file: {mps_file}")
+problem = Problem.readMPS(mps_file)
+
+print("\n" + "=" * 60)
+print("Calling solve() - watch for remote connection...")
+print("=" * 60)
+
+try:
+    # Note: problem.solve() modifies the problem object in place and returns None
+    problem.solve()
+
+    print("\n✅ Solve completed!")
+    print(f"Status: {problem.Status}")
+    print(f"Solve Time: {problem.SolveTime:.3f}s")
+
+    # Show solution stats
+    if hasattr(problem, 'SolutionStats'):
+        stats = problem.SolutionStats
+        if hasattr(stats, 'primal_objective'):
+            print(f"Objective: {stats.primal_objective:.6f}")
+        if hasattr(stats, 'dual_objective'):
+            print(f"Dual Objective: {stats.dual_objective:.6f}")
+        if hasattr(stats, 'nb_iterations'):
+            print(f"Iterations: {stats.nb_iterations}")
+
+    # Show variable count and sample values
+    if problem.vars and len(problem.vars) > 0:
+        print(f"\nVariables: {len(problem.vars)}")
+        non_zero_count = sum(1 for var in problem.vars if var.Value is not None and var.Value != 0)
+        print(f"Non-zero values: {non_zero_count}")
+
+    # Verify warm start data was received
+    if hasattr(problem, 'warmstart_data') and problem.warmstart_data:
+        print(f"\n✅ Warm start data received and populated")
+    else:
+        print(f"\n⚠️  No warm start data")
+
+except Exception as e:
+    print(f"\n❌ Exception during solve: {e}")
+    import traceback
+    traceback.print_exc()
+
+print("\n" + "=" * 60)
+print("✅ Client test completed")
+print("=" * 60)
diff --git a/test_remote_solve_e2e.sh b/test_remote_solve_e2e.sh
new file mode 100755
index 000000000..42c565b56
--- /dev/null
+++ b/test_remote_solve_e2e.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# End-to-end test of remote solve: Start server, run client, stop server
+
+set -e
+
+# Check if in correct conda environment
+if [ "$CONDA_DEFAULT_ENV" != "cuopt_dev_2510_12" ]; then
+    echo "Error: Please run this script from the cuopt_dev_2510_12 conda environment"
+    echo "Run: conda activate cuopt_dev_2510_12"
+    exit 1
+fi
+
+PORT=9999
+SERVER_PID=""
+SERVER_LOG="server_e2e.log"
+
+# Cleanup function
+cleanup() {
+    echo ""
+    echo "=== Cleanup ==="
+    if [ -n "$SERVER_PID" ]; then
+        echo "Stopping server (PID: $SERVER_PID)..."
+        kill $SERVER_PID 2>/dev/null || true
+        wait $SERVER_PID 2>/dev/null || true
+        echo "Server stopped"
+    fi
+}
+
+# Set trap for cleanup on exit
+trap cleanup EXIT INT TERM
+
+echo "=========================================================="
+echo "cuOpt Remote Solve - End-to-End Test"
+echo "=========================================================="
+echo ""
+
+# Start server in background
+echo "=== Starting Server ==="
+echo "Command: cpp/build/cuopt_remote_server $PORT"
+cpp/build/cuopt_remote_server $PORT > $SERVER_LOG 2>&1 &
+SERVER_PID=$!
+
+echo "Server started (PID: $SERVER_PID) on port $PORT"
+echo "Server log: $SERVER_LOG"
+echo ""
+
+# Wait for server to be ready
+echo "Waiting for server to be ready..."
+sleep 3
+
+# Check if server is still running
+if ! kill -0 $SERVER_PID 2>/dev/null; then
+    echo "❌ Server failed to start!"
+    echo "Server log contents:"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+echo "✅ Server is running"
+echo ""
+
+# Set environment variables for client
+export CUOPT_REMOTE_HOST="127.0.0.1"
+export CUOPT_REMOTE_PORT="$PORT"
+
+echo "=== Running Client ==="
+echo "Environment variables:"
+echo "  CUOPT_REMOTE_HOST=$CUOPT_REMOTE_HOST"
+echo "  CUOPT_REMOTE_PORT=$CUOPT_REMOTE_PORT"
+echo ""
+
+# Run Python client with timeout
+echo "Executing: timeout 60 python test_remote_client.py"
+echo ""
+echo "========== CLIENT OUTPUT =========="
+timeout 60 python test_remote_client.py 2>&1 | grep -v "CuPy" || echo "Client timed out or failed"
+CLIENT_EXIT=$?
+echo "==================================="
+echo ""
+
+# Give server time to finish processing
+sleep 2
+
+# Show server log
+echo "========== SERVER LOG =========="
+cat $SERVER_LOG | grep -v "^$"
+echo "================================"
+echo ""
+
+if [ $CLIENT_EXIT -eq 0 ]; then
+    echo "=========================================================="
+    echo "✅ End-to-End Test Completed Successfully!"
+    echo "=========================================================="
+    exit 0
+elif [ $CLIENT_EXIT -eq 124 ]; then
+    echo "=========================================================="
+    echo "❌ Client timed out after 60 seconds"
+    echo "=========================================================="
+    exit 1
+else
+    echo "=========================================================="
+    echo "❌ Client failed with exit code: $CLIENT_EXIT"
+    echo "=========================================================="
+    exit 1
+fi