use call_once to prevent repeated thread count setting

kpedro88 · kpedro88 · commit badc9d290bcf · 2025-10-06T19:24:10.000-05:00
diff --git a/src/libtorch.cc b/src/libtorch.cc
@@ -28,6 +28,7 @@
 
 #include <cstdint>
 #include <exception>
+#include <mutex>
 
 #include "libtorch_utils.h"
 #include "triton/backend/backend_common.h"
@@ -66,6 +67,11 @@
 // PyTorch C++ (LibTorch) Backend that implements the TRITONBACKEND API.
 //
 
+namespace {
+  std::once_flag pytorch_interop_threads_flag;
+  std::once_flag pytorch_intraop_threads_flag;
+}
+
 namespace triton { namespace backend { namespace pytorch {
 
 //
@@ -509,13 +515,17 @@ ModelState::ParseParameters()
       }
     } else {
       if (intra_op_thread_count > 0) {
-        at::set_num_threads(intra_op_thread_count);
-        LOG_MESSAGE(
-            TRITONSERVER_LOG_INFO,
-            (std::string("Intra op thread count is set to ") +
-             std::to_string(intra_op_thread_count) + " for model instance '" +
-             Name() + "'")
-                .c_str());
+        // at::set_num_threads() does not throw if called more than once, but issues warnings.
+        // std::call_once() is useful to limit these.
+        std::call_once(pytorch_intraop_threads_flag, [this, intra_op_thread_count](){
+          at::set_num_threads(intra_op_thread_count);
+          LOG_MESSAGE(
+              TRITONSERVER_LOG_INFO,
+              (std::string("Intra op thread count is set to ") +
+               std::to_string(intra_op_thread_count) + " for model instance '" +
+               this->Name() + "'")
+                  .c_str());
+        });
       }
     }
 
@@ -533,13 +543,28 @@ ModelState::ParseParameters()
       }
     } else {
       if (inter_op_thread_count > 0) {
-        at::set_num_interop_threads(inter_op_thread_count);
-        LOG_MESSAGE(
-            TRITONSERVER_LOG_INFO,
-            (std::string("Inter op thread count is set to ") +
-             std::to_string(inter_op_thread_count) + " for model instance '" +
-             Name() + "'")
-                .c_str());
+        // at::set_num_interop_threads() throws if called more than once.
+        // std::call_once() should prevent this, but try/catch is additionally used for safety.
+        std::call_once(pytorch_interop_threads_flag, [this, inter_op_thread_count](){
+          try {
+            at::set_num_interop_threads(inter_op_thread_count);
+            LOG_MESSAGE(
+                TRITONSERVER_LOG_INFO,
+                (std::string("Inter op thread count is set to ") +
+                 std::to_string(inter_op_thread_count) + " for model instance '" +
+                 Name() + "'")
+                    .c_str());
+          } catch (const c10::Error& e) {
+            int current_inter_op_thread_count = at::get_num_interop_threads();
+            bool current_is_requested = inter_op_thread_count == current_inter_op_thread_count;
+            LOG_MESSAGE(
+                TRITONSERVER_LOG_INFO,
+                (std::string("Inter op thread count is already set to ") +
+                 std::to_string(current_inter_op_thread_count) +
+                 (current_is_requested ? "" : " and cannot be changed. Setting ignored") +
+                 " for model instance '" + this->Name() + "'").c_str());
+          }
+        });
       }
     }
   }