Revert D83100663 (pytorch#4938)

generatedunixname89002005232357 · facebook-github-bot · commit 7a1e337d007c · 2025-09-26T16:07:04.000-07:00
Summary: Pull Request resolved: pytorch#4938 X-link: facebookresearch/FBGEMM#1960 This diff reverts D83100663 this diff D83100663 broke unicorn's no_gpu build restriction by adding //deeplearning/fbgemm/fbgemm_gpu:config_cpp dependency to CPU-only embedding ops, which transitively pulls in //caffe2/c10:c10_cuda - please revert as this blocks unicorn deployments. Depends on D83100663 Differential Revision: D83360843 fbshipit-source-id: ab201a1fee6690091193385fe6b201c79e8c53c6
diff --git a/fbgemm_gpu/codegen/inference/embedding_forward_quantized_cpu_template.cpp b/fbgemm_gpu/codegen/inference/embedding_forward_quantized_cpu_template.cpp
@@ -18,7 +18,6 @@
 #include "fbgemm_gpu/embedding_common.h"
 #include "fbgemm/FbgemmEmbedding.h"
 #include "fbgemm_gpu/utils/tensor_utils.h"
-#include "fbgemm_gpu/config/feature_gates.h"
 
 #if defined(__x86_64__) || defined(__i386__) || (defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)))
 #include <immintrin.h>
@@ -191,9 +190,8 @@ Tensor int_nbit_split_embedding{{ "_nobag" if nobag else "" }}_codegen_forward_{
     {% else %}
     TORCH_CHECK(D > 0);
     {% endif %}
-    const static bool disablePinnedMemory = fbgemm_gpu::config::is_feature_enabled_from_env(fbgemm_gpu::config::FeatureGateName::TBE_CPU_OUTPUT_DISABLE_PINNED_MEMORY);
     bool pinned_memory = false;
-    if (!disablePinnedMemory && at::Context::hasCUDA() && at::getNumGPUs() > 0) {
+    if (at::Context::hasCUDA() && at::getNumGPUs() > 0) {
       pinned_memory = true;
     }
 
diff --git a/fbgemm_gpu/include/fbgemm_gpu/config/feature_gates.h b/fbgemm_gpu/include/fbgemm_gpu/config/feature_gates.h
@@ -62,8 +62,7 @@ namespace fbgemm_gpu::config {
   X(TBE_ROCM_INFERENCE_PACKED_BAGS) \
   X(TBE_ROCM_HIP_BACKWARD_KERNEL)   \
   X(BOUNDS_CHECK_INDICES_V2)        \
-  X(TBE_REPORT_INPUT_PARAMS)        \
-  X(TBE_CPU_OUTPUT_DISABLE_PINNED_MEMORY)
+  X(TBE_REPORT_INPUT_PARAMS)
 // X(EXAMPLE_FEATURE_FLAG)
 
 /// @ingroup fbgemm-gpu-config
@@ -92,13 +91,6 @@ bool check_feature_gate_key(const std::string& key);
 /// is enabled.
 bool is_feature_enabled(const FeatureGateName& feature);
 
-/// @ingroup fbgemm-gpu-config
-///
-/// @brief For the given `FeatureGateName`, check if the corresponding inference
-/// feature is enabled in the env vars only. Only applicable for inference
-/// features suitable for env var rollouts
-bool is_feature_enabled_from_env(const FeatureGateName& feature);
-
 #ifdef FBGEMM_FBCODE
 bool is_feature_enabled(const FbFeatureGateName& feature);
 #endif
diff --git a/fbgemm_gpu/src/config/feature_gates.cpp b/fbgemm_gpu/src/config/feature_gates.cpp
@@ -45,50 +45,39 @@ bool ev_check_key(const std::string& key) {
   }
 }
 
-static bool check_feature_gate_key_impl(
-    const std::string& key,
-    bool check_env_vars_only) {
+DLL_PUBLIC bool check_feature_gate_key(const std::string& key) {
   // Cache feature flags to avoid repeated JK and env var checks
   static std::map<std::string, bool> feature_flags_cache;
-  if (const auto search = feature_flags_cache.find(key);
-      search != feature_flags_cache.end()) {
-    return search->second;
-  }
 #ifdef FBGEMM_FBCODE
-  const auto value =
-      check_env_vars_only ? ev_check_key(key) : jk_check_key(key);
-#else
-  const auto value = ev_check_key(key);
+  static const auto no_jk = ev_check_key("NO_JK");
 #endif
 
-  feature_flags_cache.insert({key, value});
-  return value;
-}
+  if (const auto search = feature_flags_cache.find(key);
+      search != feature_flags_cache.end()) {
+    return search->second;
 
-DLL_PUBLIC bool check_feature_gate_key(const std::string& key) {
+  } else {
+    const auto value =
 #ifdef FBGEMM_FBCODE
-  static const auto no_jk = ev_check_key("NO_JK");
+        (no_jk) ? ev_check_key(key) : jk_check_key(key);
 #else
-  static const auto no_jk = false;
+        ev_check_key(key);
 #endif
 
-  return check_feature_gate_key_impl(key, no_jk);
+    feature_flags_cache.insert({key, value});
+    return value;
+  }
 }
 
 DLL_PUBLIC bool is_feature_enabled(const FeatureGateName& feature) {
   return check_feature_gate_key(to_string(feature));
 }
 
-DLL_PUBLIC bool is_feature_enabled_from_env(const FeatureGateName& feature) {
-  return check_feature_gate_key_impl(
-      to_string(feature), /* check_env_vars_only */ true);
-}
-
 #ifdef FBGEMM_FBCODE
 DLL_PUBLIC bool is_feature_enabled(const FbFeatureGateName& feature) {
   return check_feature_gate_key(to_string(feature));
 }
-#endif // FBGEMM_FBCODE
+#endif
 
 } // namespace fbgemm_gpu::config
 
diff --git a/fbgemm_gpu/test/tbe/inference/nbit_forward_test.py b/fbgemm_gpu/test/tbe/inference/nbit_forward_test.py
@@ -8,7 +8,6 @@
 # pyre-strict
 # pyre-ignore-all-errors[56]
 
-import os
 import random
 import unittest
 from typing import Any, Callable, Optional, Union
@@ -124,12 +123,6 @@
 
 @optests.generate_opcheck_tests(fast=True, additional_decorators=additional_decorators)
 class NBitFowardTest(NBitFowardTestCommon):
-    def _is_cpu_output_on_pinned_memory(self) -> bool:
-        return (
-            os.getenv("FBGEMM_TBE_CPU_OUTPUT_DISABLE_PINNED_MEMORY") != "1"
-            and torch.cuda.is_available()
-        )
-
     def execute_nbit_forward_fused_pooled_emb_quant_(
         self,
         T: int,
@@ -905,9 +898,6 @@ def test_nbit_forward_cpu_seq_int8(
         lengths = torch.cat(lengths_list, 0)
         offsets = torch.ops.fbgemm.asynchronous_complete_cumsum(lengths)
         quant_cc_output = quant_cc(indices.int(), offsets.int())
-        self.assertEqual(
-            quant_cc_output.is_pinned(), self._is_cpu_output_on_pinned_memory()
-        )
         tables_rows = [
             T for T, _, _ in quant_cc.split_embedding_weights_with_scale_bias(0)
         ]