pytorch
diff --git a/‎README.md‎
Lines changed: 1 addition & 5 deletions b/‎README.md‎
Lines changed: 1 addition & 5 deletions
diff --git a/‎distributed/FSDP/README.md‎
Lines changed: 6 additions & 2 deletions b/‎distributed/FSDP/README.md‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎distributed/FSDP/T5_training.py‎
Lines changed: 3 additions & 3 deletions b/‎distributed/FSDP/T5_training.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎distributed/FSDP/utils/environment.py‎
Lines changed: 0 additions & 2 deletions b/‎distributed/FSDP/utils/environment.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎distributed/FSDP2/README.md‎
Lines changed: 6 additions & 4 deletions b/‎distributed/FSDP2/README.md‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎distributed/FSDP2/train.py‎ renamed to ‎distributed/FSDP2/example.py‎
Lines changed: 23 additions & 4 deletions b/‎distributed/FSDP2/train.py‎ renamed to ‎distributed/FSDP2/example.py‎
Lines changed: 23 additions & 4 deletions
diff --git a/‎distributed/FSDP2/requirements.txt‎
Lines changed: 2 additions & 0 deletions b/‎distributed/FSDP2/requirements.txt‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎distributed/FSDP2/run_example.sh‎
Lines changed: 11 additions & 0 deletions b/‎distributed/FSDP2/run_example.sh‎
Lines changed: 11 additions & 0 deletions
@@ -1,7 +1,5 @@
 # PyTorch Examples
 
-![Run Examples](https://github.com/pytorch/examples/workflows/Run%20Examples/badge.svg)
-
 https://pytorch.org/examples/
 
 `pytorch/examples` is a repository showcasing examples of using [PyTorch](https://github.com/pytorch/pytorch). The goal is to have curated, short, few/no dependencies _high quality_ examples that are substantially different from each other that can be emulated in your existing work.
@@ -21,7 +19,7 @@ https://pytorch.org/examples/
 - [Variational Auto-Encoders](./vae/README.md)
 - [Superresolution using an efficient sub-pixel convolutional neural network](./super_resolution/README.md)
 - [Hogwild training of shared ConvNets across multiple processes on MNIST](mnist_hogwild)
-- [Training a CartPole to balance in OpenAI Gym with actor-critic](./reinforcement_learning/README.md)
+- [Training a CartPole to balance with actor-critic](./reinforcement_learning/README.md)
 - [Natural Language Inference (SNLI) with GloVe vectors, LSTMs, and torchtext](snli)
 - [Time sequence prediction - use an LSTM to learn Sine waves](./time_sequence_prediction/README.md)
 - [Implement the Neural Style Transfer algorithm on images](./fast_neural_style/README.md)
@@ -32,8 +30,6 @@ https://pytorch.org/examples/
 - [Image Classification Using Forward-Forward](./mnist_forward_forward/README.md)
 - [Language Translation using Transformers](./language_translation/README.md)
 
-
-
 Additionally, a list of good examples hosted in their own repositories:
 
 - [Neural Machine Translation using sequence-to-sequence RNN with attention (OpenNMT)](https://github.com/OpenNMT/OpenNMT-py)
 
@@ -1,6 +1,10 @@
-## FSDP T5
+Note: FSDP1 is deprecated. Please follow [FSDP2 tutorial](https://docs.pytorch.org/tutorials/intermediate/FSDP_tutorial.html) and [code examples](https://github.com/pytorch/examples/tree/main/distributed/FSDP2).
 
-To run the T5 example with FSDP for text summarization:
+## FSDP1 T5
+
+
+
+To run the T5 example with FSDP1 for text summarization:
 
 ## Get the wikihow dataset
 ```bash
 
@@ -199,11 +199,11 @@ def fsdp_main(args):
     # Training settings
     parser = argparse.ArgumentParser(description='PyTorch T5 FSDP Example')
     parser.add_argument('--batch-size', type=int, default=4, metavar='N',
-                        help='input batch size for training (default: 64)')
+                        help='input batch size for training (default: 4)')
     parser.add_argument('--test-batch-size', type=int, default=4, metavar='N',
-                        help='input batch size for testing (default: 1000)')
+                        help='input batch size for testing (default: 4)')
     parser.add_argument('--epochs', type=int, default=2, metavar='N',
-                        help='number of epochs to train (default: 3)')
+                        help='number of epochs to train (default: 2)')
     parser.add_argument('--seed', type=int, default=1, metavar='S',
                         help='random seed (default: 1)')
     parser.add_argument('--track_memory', action='store_false', default=True,
 
@@ -1,8 +1,6 @@
 # Copyright (c) 2022 Meta Platforms, Inc. and its affiliates.
 # All rights reserved.
 #
-# This source code is licensed under the Apache-style license found in the
-# LICENSE file in the root directory of this source tree.
 
 # This is a simple check to confirm that your current server has full bfloat support -
 #  both GPU native support, and Network communication support.
 
@@ -1,25 +1,27 @@
 ## FSDP2
 To run FSDP2 on transformer model:
+
 ```
 cd distributed/FSDP2
-torchrun --nproc_per_node 2 train.py
+pip install -r requirements.txt
+torchrun --nproc_per_node 2 example.py
 ```
 * For 1st time, it creates a "checkpoints" folder and saves state dicts there
 * For 2nd time, it loads from previous checkpoints
 
 To enable explicit prefetching
 ```
-torchrun --nproc_per_node 2 train.py --explicit-prefetch
+torchrun --nproc_per_node 2 example.py --explicit-prefetch
 ```
 
 To enable mixed precision
 ```
-torchrun --nproc_per_node 2 train.py --mixed-precision
+torchrun --nproc_per_node 2 example.py --mixed-precision
 ```
 
 To showcase DCP API
 ```
-torchrun --nproc_per_node 2 train.py --dcp-api
+torchrun --nproc_per_node 2 example.py --dcp-api
 ```
 
 ## Ensure you are running a recent version of PyTorch:
 
@@ -7,6 +7,11 @@
 from torch.distributed.fsdp import fully_shard, MixedPrecisionPolicy
 from utils import inspect_mixed_precision, inspect_model
 
+def verify_min_gpu_count(min_gpus: int = 2) -> bool:
+    """ verification that we have at least 2 gpus to run dist examples """
+    has_gpu = torch.accelerator.is_available()
+    gpu_count = torch.accelerator.device_count()
+    return has_gpu and gpu_count >= min_gpus
 
 def set_modules_to_forward_prefetch(model, num_to_forward_prefetch):
     for i, layer in enumerate(model.layers):
@@ -29,10 +34,23 @@ def set_modules_to_backward_prefetch(model, num_to_backward_prefetch):
 
 
 def main(args):
+    _min_gpu_count = 2
+    if not verify_min_gpu_count(min_gpus=_min_gpu_count):
+        print(f"Unable to locate sufficient {_min_gpu_count} gpus to run this example. Exiting.")
+        exit()
     rank = int(os.environ["LOCAL_RANK"])
-    device = torch.device(f"cuda:{rank}")
-    torch.cuda.set_device(device)
-    torch.distributed.init_process_group(backend="nccl", device_id=device)
+    if torch.accelerator.is_available():
+        device_type = torch.accelerator.current_accelerator()
+        device = torch.device(f"{device_type}:{rank}")
+        torch.accelerator.device_index(rank)
+        print(f"Running on rank {rank} on device {device}")
+    else:
+        device = torch.device("cpu")
+        print(f"Running on device {device}")
+
+    backend = torch.distributed.get_default_backend_for_device(device)
+    torch.distributed.init_process_group(backend=backend, device_id=device)
+
     torch.manual_seed(0)
     vocab_size = 1024
     batch_size = 32
@@ -64,7 +82,7 @@ def main(args):
 
     checkpointer = Checkpointer("checkpoints", dcp_api=args.dcp_api)
     if checkpointer.last_training_time is None:
-        model.to_empty(device="cuda")
+        model.to_empty(device=device)
         model.reset_parameters()
     else:
         checkpointer.load_model(model)
@@ -96,4 +114,5 @@ def main(args):
     parser.add_argument("--mixed-precision", action="store_true", default=False)
     parser.add_argument("--dcp-api", action="store_true", default=False)
     args = parser.parse_args()
+    
     main(args)
@@ -0,0 +1,2 @@
+torch>=2.7
+numpy
@@ -0,0 +1,11 @@
+# /bin/bash
+# bash run_example.sh {file_to_run.py} {num_gpus}
+# where file_to_run = example to run. Default = 'example.py'
+# num_gpus = num local gpus to use (must be at least 2). Default = 4
+
+# samples to run include:
+# example.py
+
+echo "Launching ${1:-example.py} with ${2:-4} gpus"
+torchrun --nnodes=1 --nproc_per_node=${2:-4} ${1:-example.py}
+
Original file line number	Diff line number	Diff line change
`@@ -1,8 +1,6 @@`
`1`	`1`	`# Copyright (c) 2022 Meta Platforms, Inc. and its affiliates.`
`2`	`2`	`# All rights reserved.`
`3`	`3`	`#`
`4`		`-# This source code is licensed under the Apache-style license found in the`
`5`		`-# LICENSE file in the root directory of this source tree.`
`6`	`4`
`7`	`5`	`# This is a simple check to confirm that your current server has full bfloat support -`
`8`	`6`	`# both GPU native support, and Network communication support.`