Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions apps/mast/README.md → .meta/mast/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Forge MAST Environment Setup

A simple setup script to automatically configure your environment for running Forge with MAST jobs.
This only applies to Meta internal users.

## Quick Start

Expand All @@ -9,7 +10,7 @@ A simple setup script to automatically configure your environment for running Fo
### 1. Run the Setup Script

The `env_setup.sh` script will automatically:
- ✅ Activate the required conda environment (`forge-8448524`)
- ✅ Activate and configure the required conda environment
- ✅ Clone/update the Forge repository
- ✅ Install Forge package dependencies
- ✅ Mount the required oilfs workspace to `/mnt/wsfuse`
Expand All @@ -20,14 +21,14 @@ The `env_setup.sh` script will automatically:
chmod +x env_setup.sh

# Run the setup
./apps/mast/env_setup.sh
./.meta/mast/env_setup.sh

```

### 2. Submit MAST job

```
pip install --force-reinstall --no-deps . && python -m apps.mast.main --config apps/mast/qwen3_1_7b_mast.yaml
pip install --force-reinstall --no-deps . && python -m .meta.mast.main --config .meta/mast/qwen3_1_7b_mast.yaml
```

⚠️ Important Note: `pip install --force-reinstall --no-deps .` is required every time you make a change to the local codebase. This ensures your latest changes are installed before job submission.
File renamed without changes.
80 changes: 74 additions & 6 deletions apps/mast/env_setup.sh → .meta/mast/env_setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
# setup_forge_env.sh - Setup conda environment and install forge with mounting
set -e # Exit on any error

# Configuration
CONDA_ENV_NAME="forge:stable"

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
Expand Down Expand Up @@ -45,6 +48,7 @@ mount_workspace() {
log_info "Creating mount directory: $mount_dir"
sudo mkdir -p "$mount_dir" || {
log_error "Failed to create mount directory (may need sudo privileges)"
log_error "You could alternatively try to unmount with `sudo umount /mnt/wsfuse`"
return 1
}
fi
Expand Down Expand Up @@ -130,10 +134,10 @@ if [ ! -f "$CONDA_SCRIPT_PATH" ]; then
fi

log_info "Sourcing conda script: $CONDA_SCRIPT_PATH"
source "$CONDA_SCRIPT_PATH" activate forge:e146614
source "$CONDA_SCRIPT_PATH" activate "$CONDA_ENV_NAME"

if [ $? -ne 0 ]; then
log_error "Failed to activate conda environment forge-e146614"
log_error "Failed to activate conda environment $CONDA_ENV_NAME"
exit 1
fi

Expand Down Expand Up @@ -191,8 +195,72 @@ fi

log_info "Current directory: $(pwd)"

# Step 5: Install forge package
log_info "Step 5: Installing forge package..."
# Step 5: Install torchtitan
log_info "Step 5: Installing torchtitan..."

# Source versions.sh to get the pinned commit
VERSIONS_FILE="$FORGE_REPO_DIR/assets/versions.sh"
if [ -f "$VERSIONS_FILE" ]; then
log_info "Sourcing version information from: $VERSIONS_FILE"
source "$VERSIONS_FILE"

if [ -n "$TORCHTITAN_COMMIT" ]; then
log_info "Installing torchtitan from commit: $TORCHTITAN_COMMIT"
pip uninstall -y torchtitan
pip install "git+https://github.com/pytorch/torchtitan.git@$TORCHTITAN_COMMIT"

if [ $? -eq 0 ]; then
log_info "Torchtitan installed successfully"
else
log_error "Failed to install torchtitan"
exit 1
fi
else
log_error "TORCHTITAN_COMMIT not found in versions.sh"
exit 1
fi
else
log_error "versions.sh not found at: $VERSIONS_FILE"
log_error "Cannot proceed without version information"
exit 1
fi

# Step 5.5: Apply monarch torch import hack
log_info "Step 5.5: Applying monarch torch import hack..."

MONARCH_INIT="$CONDA_PREFIX/lib/python3.10/site-packages/monarch/__init__.py"
if [ -f "$MONARCH_INIT" ]; then
# Check if we already applied the hack
if grep -q "^import torch # Injected by forge setup" "$MONARCH_INIT"; then
log_info "Monarch torch import hack already applied, skipping"
else
log_info "Injecting 'import torch' into monarch/__init__.py"

# Create a backup
cp "$MONARCH_INIT" "$MONARCH_INIT.bak"

# Use sed to inject 'import torch' before the "# Import before monarch" comment
# We add it right after "from typing import TYPE_CHECKING" and before the comment
sed -i '/^from typing import TYPE_CHECKING$/a\
\
# Torch must be imported before monarch (injected by forge setup)\
import torch # Injected by forge setup' "$MONARCH_INIT"

if [ $? -eq 0 ]; then
log_info "Successfully injected torch import into monarch/__init__.py"
else
log_error "Failed to inject torch import, restoring backup"
mv "$MONARCH_INIT.bak" "$MONARCH_INIT"
exit 1
fi
fi
else
log_warn "monarch/__init__.py not found at: $MONARCH_INIT"
log_warn "Skipping monarch torch import hack (monarch may not be installed yet)"
fi

# Step 6: Install forge package
log_info "Step 6: Installing forge package..."
pip install --no-deps --force-reinstall .
if [ $? -ne 0 ]; then
log_error "Failed to install forge package"
Expand Down Expand Up @@ -234,5 +302,5 @@ log_info "Mounted workspace available at: /mnt/wsfuse"
echo ""
log_info "Installation completed successfully!"
echo ""
log_info "Re-activate the conda environment to make the changes take effect:"
log_info "conda deactivate && conda activate forge-e146614"
log_info "Test that this is working locally with:"
log_info "python -m apps.grpo.main --config=apps/grpo/qwen3_1_7b.yaml"
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Grouped Relative Policy Optimization (GRPO)
# >>> python -m apps.grpo.main --config apps/grpo/qwen3_1_7b.yaml
# >>> python -m .meta.mast.main --config .meta/mast/qwen3_14b_mast.yaml

# Global configuration
group_size: 8
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Grouped Relative Policy Optimization (GRPO)
# >>> python -m apps.grpo.main --config apps/grpo/qwen3_1_7b.yaml
# >>> python -m .meta.mast.main --config .meta/mast/qwen3_1_7b_mast.yaml
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yay


# Global configuration
group_size: 8
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Grouped Relative Policy Optimization (GRPO)
# >>> python -m apps.mast.main --config apps/mast/qwen3_1_7b_mast.yaml
# >>> python -m .meta.mast.main --config .meta/mast/qwen3_32b_mast.yaml

# Global configuration
group_size: 8
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Grouped Relative Policy Optimization (GRPO)
# >>> python -m apps.grpo.main --config apps/grpo/qwen3_1_7b.yaml
# >>> python -m .meta.mast.main --config .meta/mast/qwen3_4b_mast.yaml

# Global configuration
group_size: 8
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Grouped Relative Policy Optimization (GRPO)
# >>> python -m apps.grpo.main --config apps/grpo/qwen3_1_7b.yaml
# >>> python -m .meta.mast.main --config .meta/mast/qwen3_8b_mast.yaml

# Global configuration
group_size: 8
Expand Down
Loading