Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions .meta/mast/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Forge MAST Environment Setup

A simple setup script to automatically configure your environment for running Forge with MAST jobs.
This only applies to Meta internal users.

## Quick Start

⚠️ Important Note: the setup script will clone the forge repository under "/data/users/$USER".

### 1. Run the Setup Script

The `env_setup.sh` script will automatically:
- ✅ Activate and configure the required conda environment
- ✅ Clone/update the Forge repository
- ✅ Install Forge package dependencies
- ✅ Mount the required oilfs workspace to `/mnt/wsfuse`
- ✅ Configure your environment for MAST job submission

```bash
# Make the script executable
chmod +x .meta/mast/env_setup.sh

# Run the setup
./.meta/mast/env_setup.sh

```

### 2. Submit MAST job

Use the launch script to submit a MAST job:

```bash
# Make the launch script executable (first time only)
chmod +x .meta/mast/launch.sh

# Launch a job with your desired config
./.meta/mast/launch.sh .meta/mast/qwen3_1_7b_mast.yaml
```

The launch script will automatically:
- Navigate to the forge root directory
- Reinstall the forge package with your latest changes
- Set the correct PYTHONPATH
- Launch the MAST job with the specified config

You can run it from anywhere, and it will figure out the correct paths.
File renamed without changes.
80 changes: 74 additions & 6 deletions apps/mast/env_setup.sh → .meta/mast/env_setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
# setup_forge_env.sh - Setup conda environment and install forge with mounting
set -e # Exit on any error

# Configuration
CONDA_ENV_NAME="forge:stable"

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
Expand Down Expand Up @@ -45,6 +48,7 @@ mount_workspace() {
log_info "Creating mount directory: $mount_dir"
sudo mkdir -p "$mount_dir" || {
log_error "Failed to create mount directory (may need sudo privileges)"
log_error "You could alternatively try to unmount with `sudo umount /mnt/wsfuse`"
return 1
}
fi
Expand Down Expand Up @@ -130,10 +134,10 @@ if [ ! -f "$CONDA_SCRIPT_PATH" ]; then
fi

log_info "Sourcing conda script: $CONDA_SCRIPT_PATH"
source "$CONDA_SCRIPT_PATH" activate forge:e146614
source "$CONDA_SCRIPT_PATH" activate "$CONDA_ENV_NAME"

if [ $? -ne 0 ]; then
log_error "Failed to activate conda environment forge-e146614"
log_error "Failed to activate conda environment $CONDA_ENV_NAME"
exit 1
fi

Expand Down Expand Up @@ -191,8 +195,72 @@ fi

log_info "Current directory: $(pwd)"

# Step 5: Install forge package
log_info "Step 5: Installing forge package..."
# Step 5: Install torchtitan
log_info "Step 5: Installing torchtitan..."

# Source versions.sh to get the pinned commit
VERSIONS_FILE="$FORGE_REPO_DIR/assets/versions.sh"
if [ -f "$VERSIONS_FILE" ]; then
log_info "Sourcing version information from: $VERSIONS_FILE"
source "$VERSIONS_FILE"

if [ -n "$TORCHTITAN_COMMIT" ]; then
log_info "Installing torchtitan from commit: $TORCHTITAN_COMMIT"
pip uninstall -y torchtitan
pip install "git+https://github.com/pytorch/torchtitan.git@$TORCHTITAN_COMMIT"

if [ $? -eq 0 ]; then
log_info "Torchtitan installed successfully"
else
log_error "Failed to install torchtitan"
exit 1
fi
else
log_error "TORCHTITAN_COMMIT not found in versions.sh"
exit 1
fi
else
log_error "versions.sh not found at: $VERSIONS_FILE"
log_error "Cannot proceed without version information"
exit 1
fi

# Step 5.5: Apply monarch torch import hack
log_info "Step 5.5: Applying monarch torch import hack..."

MONARCH_INIT="$CONDA_PREFIX/lib/python3.10/site-packages/monarch/__init__.py"
if [ -f "$MONARCH_INIT" ]; then
# Check if we already applied the hack
if grep -q "^import torch # Injected by forge setup" "$MONARCH_INIT"; then
log_info "Monarch torch import hack already applied, skipping"
else
log_info "Injecting 'import torch' into monarch/__init__.py"

# Create a backup
cp "$MONARCH_INIT" "$MONARCH_INIT.bak"

# Use sed to inject 'import torch' before the "# Import before monarch" comment
# We add it right after "from typing import TYPE_CHECKING" and before the comment
sed -i '/^from typing import TYPE_CHECKING$/a\
\
# Torch must be imported before monarch (injected by forge setup)\
import torch # Injected by forge setup' "$MONARCH_INIT"

if [ $? -eq 0 ]; then
log_info "Successfully injected torch import into monarch/__init__.py"
else
log_error "Failed to inject torch import, restoring backup"
mv "$MONARCH_INIT.bak" "$MONARCH_INIT"
exit 1
fi
fi
else
log_warn "monarch/__init__.py not found at: $MONARCH_INIT"
log_warn "Skipping monarch torch import hack (monarch may not be installed yet)"
fi

# Step 6: Install forge package
log_info "Step 6: Installing forge package..."
pip install --no-deps --force-reinstall .
if [ $? -ne 0 ]; then
log_error "Failed to install forge package"
Expand Down Expand Up @@ -234,5 +302,5 @@ log_info "Mounted workspace available at: /mnt/wsfuse"
echo ""
log_info "Installation completed successfully!"
echo ""
log_info "Re-activate the conda environment to make the changes take effect:"
log_info "conda deactivate && conda activate forge-e146614"
log_info "Test that this is working locally with:"
log_info "python -m apps.grpo.main --config=apps/grpo/qwen3_1_7b.yaml"
68 changes: 68 additions & 0 deletions .meta/mast/launch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/bin/bash

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# launch.sh - Launch MAST jobs with Forge
set -e # Exit on any error

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

# Logging functions
log_info() {
echo -e "${GREEN}[INFO]${NC} $1"
}

log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}

# Check if config file is provided
if [ $# -eq 0 ]; then
log_error "No config file provided"
echo "Usage: $0 <config_file>"
echo "Example: $0 .meta/mast/qwen3_1_7b_mast.yaml"
exit 1
fi

CONFIG_FILE="$1"

# Get the directory where this script is located
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

# Navigate to forge root (two levels up from .meta/mast/)
FORGE_ROOT="$( cd "$SCRIPT_DIR/../.." && pwd )"

log_info "Forge root directory: $FORGE_ROOT"
log_info "Config file: $CONFIG_FILE"

# Check if config file exists
if [ ! -f "$FORGE_ROOT/$CONFIG_FILE" ]; then
log_error "Config file not found: $FORGE_ROOT/$CONFIG_FILE"
exit 1
fi

# Navigate to forge root
cd "$FORGE_ROOT"
log_info "Changed to directory: $(pwd)"

# Reinstall forge package
log_info "Reinstalling forge package..."
pip install --force-reinstall --no-deps .
if [ $? -ne 0 ]; then
log_error "Failed to reinstall forge package"
exit 1
fi

log_info "Successfully reinstalled forge package"

# Launch the job
log_info "Launching MAST job..."
PYTHONPATH=. python .meta/mast/main.py --config "$CONFIG_FILE"
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Grouped Relative Policy Optimization (GRPO)
# >>> python -m apps.grpo.main --config apps/grpo/qwen3_1_7b.yaml
# >>> ./.meta/mast/launch.sh .meta/mast/qwen3_14b_mast.yaml

# Global configuration
group_size: 8
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Grouped Relative Policy Optimization (GRPO)
# >>> python -m apps.grpo.main --config apps/grpo/qwen3_1_7b.yaml
# >>> ./.meta/mast/launch.sh .meta/mast/qwen3_1_7b_mast.yaml

# Global configuration
group_size: 8
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Grouped Relative Policy Optimization (GRPO)
# >>> python -m apps.mast.main --config apps/mast/qwen3_1_7b_mast.yaml
# >>> ./.meta/mast/launch.sh .meta/mast/qwen3_32b_mast.yaml

# Global configuration
group_size: 8
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Grouped Relative Policy Optimization (GRPO)
# >>> python -m apps.grpo.main --config apps/grpo/qwen3_1_7b.yaml
# >>> ./.meta/mast/launch.sh .meta/mast/qwen3_4b_mast.yaml

# Global configuration
group_size: 8
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Grouped Relative Policy Optimization (GRPO)
# >>> python -m apps.grpo.main --config apps/grpo/qwen3_1_7b.yaml
# >>> ./.meta/mast/launch.sh .meta/mast/qwen3_8b_mast.yaml

# Global configuration
group_size: 8
Expand Down
33 changes: 0 additions & 33 deletions apps/mast/README.md

This file was deleted.

Loading