Skip to content

Commit 7a1e337

Browse files
generatedunixname89002005232357facebook-github-bot
authored andcommitted
Revert D83100663 (pytorch#4938)
Summary: Pull Request resolved: pytorch#4938 X-link: facebookresearch/FBGEMM#1960 This diff reverts D83100663 this diff D83100663 broke unicorn's no_gpu build restriction by adding //deeplearning/fbgemm/fbgemm_gpu:config_cpp dependency to CPU-only embedding ops, which transitively pulls in //caffe2/c10:c10_cuda - please revert as this blocks unicorn deployments. Depends on D83100663 Differential Revision: D83360843 fbshipit-source-id: ab201a1fee6690091193385fe6b201c79e8c53c6
1 parent 4f25e72 commit 7a1e337

File tree

4 files changed

+15
-46
lines changed

4 files changed

+15
-46
lines changed

fbgemm_gpu/codegen/inference/embedding_forward_quantized_cpu_template.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
#include "fbgemm_gpu/embedding_common.h"
1919
#include "fbgemm/FbgemmEmbedding.h"
2020
#include "fbgemm_gpu/utils/tensor_utils.h"
21-
#include "fbgemm_gpu/config/feature_gates.h"
2221

2322
#if defined(__x86_64__) || defined(__i386__) || (defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)))
2423
#include <immintrin.h>
@@ -191,9 +190,8 @@ Tensor int_nbit_split_embedding{{ "_nobag" if nobag else "" }}_codegen_forward_{
191190
{% else %}
192191
TORCH_CHECK(D > 0);
193192
{% endif %}
194-
const static bool disablePinnedMemory = fbgemm_gpu::config::is_feature_enabled_from_env(fbgemm_gpu::config::FeatureGateName::TBE_CPU_OUTPUT_DISABLE_PINNED_MEMORY);
195193
bool pinned_memory = false;
196-
if (!disablePinnedMemory && at::Context::hasCUDA() && at::getNumGPUs() > 0) {
194+
if (at::Context::hasCUDA() && at::getNumGPUs() > 0) {
197195
pinned_memory = true;
198196
}
199197

fbgemm_gpu/include/fbgemm_gpu/config/feature_gates.h

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,7 @@ namespace fbgemm_gpu::config {
6262
X(TBE_ROCM_INFERENCE_PACKED_BAGS) \
6363
X(TBE_ROCM_HIP_BACKWARD_KERNEL) \
6464
X(BOUNDS_CHECK_INDICES_V2) \
65-
X(TBE_REPORT_INPUT_PARAMS) \
66-
X(TBE_CPU_OUTPUT_DISABLE_PINNED_MEMORY)
65+
X(TBE_REPORT_INPUT_PARAMS)
6766
// X(EXAMPLE_FEATURE_FLAG)
6867

6968
/// @ingroup fbgemm-gpu-config
@@ -92,13 +91,6 @@ bool check_feature_gate_key(const std::string& key);
9291
/// is enabled.
9392
bool is_feature_enabled(const FeatureGateName& feature);
9493

95-
/// @ingroup fbgemm-gpu-config
96-
///
97-
/// @brief For the given `FeatureGateName`, check if the corresponding inference
98-
/// feature is enabled in the env vars only. Only applicable for inference
99-
/// features suitable for env var rollouts
100-
bool is_feature_enabled_from_env(const FeatureGateName& feature);
101-
10294
#ifdef FBGEMM_FBCODE
10395
bool is_feature_enabled(const FbFeatureGateName& feature);
10496
#endif

fbgemm_gpu/src/config/feature_gates.cpp

Lines changed: 13 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -45,50 +45,39 @@ bool ev_check_key(const std::string& key) {
4545
}
4646
}
4747

48-
static bool check_feature_gate_key_impl(
49-
const std::string& key,
50-
bool check_env_vars_only) {
48+
DLL_PUBLIC bool check_feature_gate_key(const std::string& key) {
5149
// Cache feature flags to avoid repeated JK and env var checks
5250
static std::map<std::string, bool> feature_flags_cache;
53-
if (const auto search = feature_flags_cache.find(key);
54-
search != feature_flags_cache.end()) {
55-
return search->second;
56-
}
5751
#ifdef FBGEMM_FBCODE
58-
const auto value =
59-
check_env_vars_only ? ev_check_key(key) : jk_check_key(key);
60-
#else
61-
const auto value = ev_check_key(key);
52+
static const auto no_jk = ev_check_key("NO_JK");
6253
#endif
6354

64-
feature_flags_cache.insert({key, value});
65-
return value;
66-
}
55+
if (const auto search = feature_flags_cache.find(key);
56+
search != feature_flags_cache.end()) {
57+
return search->second;
6758

68-
DLL_PUBLIC bool check_feature_gate_key(const std::string& key) {
59+
} else {
60+
const auto value =
6961
#ifdef FBGEMM_FBCODE
70-
static const auto no_jk = ev_check_key("NO_JK");
62+
(no_jk) ? ev_check_key(key) : jk_check_key(key);
7163
#else
72-
static const auto no_jk = false;
64+
ev_check_key(key);
7365
#endif
7466

75-
return check_feature_gate_key_impl(key, no_jk);
67+
feature_flags_cache.insert({key, value});
68+
return value;
69+
}
7670
}
7771

7872
DLL_PUBLIC bool is_feature_enabled(const FeatureGateName& feature) {
7973
return check_feature_gate_key(to_string(feature));
8074
}
8175

82-
DLL_PUBLIC bool is_feature_enabled_from_env(const FeatureGateName& feature) {
83-
return check_feature_gate_key_impl(
84-
to_string(feature), /* check_env_vars_only */ true);
85-
}
86-
8776
#ifdef FBGEMM_FBCODE
8877
DLL_PUBLIC bool is_feature_enabled(const FbFeatureGateName& feature) {
8978
return check_feature_gate_key(to_string(feature));
9079
}
91-
#endif // FBGEMM_FBCODE
80+
#endif
9281

9382
} // namespace fbgemm_gpu::config
9483

fbgemm_gpu/test/tbe/inference/nbit_forward_test.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
# pyre-strict
99
# pyre-ignore-all-errors[56]
1010

11-
import os
1211
import random
1312
import unittest
1413
from typing import Any, Callable, Optional, Union
@@ -124,12 +123,6 @@
124123

125124
@optests.generate_opcheck_tests(fast=True, additional_decorators=additional_decorators)
126125
class NBitFowardTest(NBitFowardTestCommon):
127-
def _is_cpu_output_on_pinned_memory(self) -> bool:
128-
return (
129-
os.getenv("FBGEMM_TBE_CPU_OUTPUT_DISABLE_PINNED_MEMORY") != "1"
130-
and torch.cuda.is_available()
131-
)
132-
133126
def execute_nbit_forward_fused_pooled_emb_quant_(
134127
self,
135128
T: int,
@@ -905,9 +898,6 @@ def test_nbit_forward_cpu_seq_int8(
905898
lengths = torch.cat(lengths_list, 0)
906899
offsets = torch.ops.fbgemm.asynchronous_complete_cumsum(lengths)
907900
quant_cc_output = quant_cc(indices.int(), offsets.int())
908-
self.assertEqual(
909-
quant_cc_output.is_pinned(), self._is_cpu_output_on_pinned_memory()
910-
)
911901
tables_rows = [
912902
T for T, _, _ in quant_cc.split_embedding_weights_with_scale_bias(0)
913903
]

0 commit comments

Comments
 (0)