Skip to content

Commit 8ff2ee6

Browse files
authored
Remove CUDA from benchmarks directory. (#9610)
This PR removes the CUDA specific code from the `benchmarks` directory. This is in line with the CUDA deprecation that started on release 2.8. **Key Changes:** - Removed the `keep_model_data_on_cuda` parameter - Used in combination with zero-overhead CUDA to XLA:CUDA data movement, removed in [#9598][1] and [#9603][2] - Deleted `llama.py`, `nightly.sh`, `run_benchmark.sh`, `run_single_graph_bm.sh`, and `run_top_tier_bm.sh` - All of them ran benchmarks comparing PyTorch Inductor with XLA:CUDA, specifically [1]: #9598 [2]: #9603
1 parent ddf75a1 commit 8ff2ee6

15 files changed

+62
-838
lines changed

benchmarks/README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ Disable autoboost selecting clock rate based on thermal, and power budget effect
7777
Run the `experiment_runner.py` from the `pytorch` directory, which should be the
7878
parent of the `xla` directory.
7979

80-
The following example runs the alexnet benchmark on GPU through the
80+
The following example runs the alexnet benchmark on CPU through the
8181
Pytorch/XLA-dynamo path and through the Inductor-dynamo with 5 repetitions each.
8282
The results will be stored in a json file (eg results.jsonl) in `experiment_results`.
8383

@@ -88,7 +88,7 @@ python xla/benchmarks/experiment_runner.py \
8888
--xla=PJRT --xla=None \
8989
--test=eval --test=train \
9090
--suite-name=torchbench \
91-
--accelerator=cuda \
91+
--accelerator=cpu \
9292
--output-dirname=experiment_results \
9393
--repeat=5 \
9494
--print-subprocess \
@@ -118,7 +118,7 @@ python xla/benchmarks/experiment_runner.py \
118118
--suite-name=torchbench \
119119
--progress-bar \
120120
--model-config='{"model_name":"BERT_pytorch"}' \
121-
--experiment-config='{"accelerator":"cuda","xla":"PJRT","xla_flags":null,"dynamo":"openxla","torch_xla2":null,"test":"train","keep_model_data_on_cuda":false,"enable_functionalization":false}' \
121+
--experiment-config='{"accelerator":"cpu","xla":"PJRT","xla_flags":null,"dynamo":"openxla","torch_xla2":null,"test":"train","enable_functionalization":false}' \
122122
--repeat 1
123123
```
124124

@@ -135,13 +135,13 @@ works only for inference now.
135135

136136
```
137137
cd pytorch
138-
PJRT_DEVICE=CUDA python3 new_xla/benchmarks/experiment_runner.py \
138+
PJRT_DEVICE=CPU python3 new_xla/benchmarks/experiment_runner.py \
139139
--xla=PJRT \
140140
--dynamo=openxla \
141141
--test=eval \
142142
--filter=BERT_pytorch$ \
143143
--suite-name=torchbench \
144-
--accelerator=cuda \
144+
--accelerator=cpu \
145145
--progress-bar \
146146
--output-dirname=/tmp/output \
147147
--repeat=2 \

benchmarks/benchmark_experiment.py

Lines changed: 7 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,12 @@ def list_experiment_configs(self):
2020

2121
# Start with default config.
2222
config_choices = {
23-
"accelerator": ["cpu", "cuda", "tpu"],
23+
"accelerator": ["cpu", "tpu"],
2424
"xla": [None, "PJRT", "XRT"],
2525
"xla_flags": [None],
2626
"dynamo": [None, "inductor", "openxla"],
2727
"torch_xla2": [None], # options only apply to torch_xla2
2828
"test": ["eval", "train"],
29-
"keep_model_data_on_cuda": [False],
3029
"enable_functionalization": [False],
3130
}
3231

@@ -46,10 +45,6 @@ def list_experiment_configs(self):
4645
if self._args.xla_flags:
4746
config_choices["xla_flags"] = list(
4847
map(parse_none_str, set(self._args.xla_flags)))
49-
if self._args.keep_model_data_on_cuda:
50-
config_choices["keep_model_data_on_cuda"] = [
51-
self._args.keep_model_data_on_cuda
52-
]
5348
if self._args.enable_functionalization:
5449
config_choices["enable_functionalization"] = [
5550
self._args.enable_functionalization
@@ -85,7 +80,6 @@ def _is_available(self,
8580
cfg_xla = experiment_config["xla"]
8681
cfg_test = experiment_config["test"]
8782
cfg_torch_xla2 = experiment_config["torch_xla2"]
88-
cfg_keep_model_data_on_cuda = experiment_config["keep_model_data_on_cuda"]
8983

9084
# Check that dynamo refers to an existing backend.
9185
if cfg_dynamo is not None and cfg_dynamo not in dynamo.list_backends(
@@ -118,16 +112,16 @@ def _is_available(self,
118112
if cfg_accelerator == "tpu":
119113
if cfg_xla is None:
120114
return False
121-
elif cfg_accelerator in ("cpu", "cuda"):
115+
elif cfg_accelerator == "cpu":
122116
if cfg_xla == "XRT":
123117
return False
118+
elif cfg_accelerator == "cuda":
119+
if cfg_xla is not None:
120+
# PyTorch/XLA with CUDA backend is no longer supported.
121+
return False
124122
else:
125123
raise NotImplementedError
126124

127-
# cfg_keep_model_data_on_cuda is only avaible when using dynamo
128-
if cfg_keep_model_data_on_cuda and cfg_dynamo != "openxla":
129-
return False
130-
131125
return True
132126

133127
def load_experiment(self,
@@ -140,15 +134,13 @@ def load_experiment(self,
140134
test = experiment_config["test"]
141135
batch_size = experiment_config.get("batch_size", self._args.batch_size)
142136
torch_xla2 = experiment_config["torch_xla2"]
143-
keep_model_data_on_cuda = experiment_config["keep_model_data_on_cuda"]
144137
enable_functionalization = experiment_config["enable_functionalization"]
145138
return BenchmarkExperiment(
146139
accelerator=accelerator,
147140
xla=xla,
148141
xla_flags=xla_flags,
149142
dynamo=dynamo,
150143
torch_xla2=torch_xla2,
151-
keep_model_data_on_cuda=keep_model_data_on_cuda,
152144
test=test,
153145
batch_size=batch_size,
154146
enable_functionalization=enable_functionalization,
@@ -159,14 +151,12 @@ class BenchmarkExperiment:
159151

160152
def __init__(self, accelerator: str, xla: Optional[str],
161153
xla_flags: Optional[str], dynamo: str, torch_xla2: bool,
162-
keep_model_data_on_cuda: bool, test: str, batch_size: str,
163-
enable_functionalization: bool):
154+
test: str, batch_size: str, enable_functionalization: bool):
164155
self.accelerator = accelerator
165156
self.xla = xla
166157
self.xla_flags = xla_flags
167158
self.dynamo = dynamo
168159
self.torch_xla2 = torch_xla2
169-
self.keep_model_data_on_cuda = keep_model_data_on_cuda
170160
self.test = test
171161
self.batch_size = batch_size
172162
self.accelerator_model = get_accelerator_model(self.accelerator)
@@ -191,8 +181,6 @@ def update_process_env(self, process_env: Dict[str, str]):
191181
if is_xla_device_available("TPU"):
192182
process_env["TPU_NUM_DEVICES"] = "1"
193183
process_env["XRT_TPU_CONFIG"] = "localservice;0;localhost:51011"
194-
elif is_xla_device_available("CUDA"):
195-
process_env["GPU_NUM_DEVICES"] = "1"
196184
elif self.xla is None:
197185
# In non-xla CPU training experiments, an env var is still needed if an
198186
# xla device exists, or there will be "Missing XLA configuration" error.
@@ -246,7 +234,6 @@ def to_dict(self):
246234
d["xla_flags"] = self.xla_flags
247235
d["dynamo"] = self.dynamo
248236
d["torch_xla2"] = self.torch_xla2
249-
d["keep_model_data_on_cuda"] = self.keep_model_data_on_cuda
250237
d["test"] = self.test
251238
d["batch_size"] = self.batch_size
252239
d["enable_functionalization"] = self.enable_functionalization

benchmarks/benchmark_model.py

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,6 @@ def prepare_for_experiment(
103103
else:
104104
raise NotImplementedError
105105

106-
keep_model_data_on_cuda = self.benchmark_experiment.keep_model_data_on_cuda
107106
if self.benchmark_experiment.torch_xla2:
108107
import torch_xla2.export
109108
import torch_xla2
@@ -125,7 +124,7 @@ def prepare_for_experiment(
125124
self.module = lambda *x: jax_func(weights, x)
126125
self.example_inputs = move_to_device(
127126
self.example_inputs, device, torch_xla2=True)
128-
elif not keep_model_data_on_cuda:
127+
else:
129128
self.module = self.module.to(self.device)
130129
self.example_inputs = move_to_device(
131130
self.example_inputs, self.device, torch_xla2=False)
@@ -137,14 +136,6 @@ def prepare_for_experiment(
137136
logger.info(f"Running torch.compile with opts {compilation_opts}")
138137
self.model_iter_fn = torch.compile(self.model_iter_fn, **compilation_opts)
139138

140-
if keep_model_data_on_cuda:
141-
142-
def assert_func(t):
143-
assert t.device.type.lower(
144-
) == 'cuda', 'When keep_model_data_on_cuda is set, the input data should remain on the CUDA device.'
145-
146-
pytree.tree_map_only(torch.Tensor, assert_func, self.example_inputs)
147-
148139
def pick_grad(self):
149140
if self.benchmark_experiment.test == "eval":
150141
return torch.no_grad()

benchmarks/experiment_runner.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -936,11 +936,6 @@ def __str__(self):
936936
help="""Collect CUDA and CPU times per operation. This will also gather
937937
CPU fallbacks.""",
938938
)
939-
parser.add_argument(
940-
"--keep-model-data-on-cuda",
941-
action="store_true",
942-
help="""Whether to keep the model and data on CUDA and not to move to an XLA device. This is to be used with PyTorch/XLA dynamo. When set, PyTorch/XLA dynamo bridge move the model and data to the XLA device.""",
943-
)
944939
parser.add_argument(
945940
"--xla-flags",
946941
type=str,

0 commit comments

Comments
 (0)