Merge branch 'main' into patch-35

Jack-Khuu · web-flow · commit 5e21fff7b759 · 2025-01-24T14:44:23.000-08:00
diff --git a/.github/workflows/run-readme-pr-macos.yml b/.github/workflows/run-readme-pr-macos.yml
@@ -33,7 +33,8 @@ jobs:
           sysctl machdep.cpu.core_count
           echo "::endgroup::"
 
-          .ci/scripts/run-docs readme
+          echo "using workaround for #1416 and #1315 by setting torchchat device explicitly"
+          TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs readme
   
           echo "::group::Completion"
           echo "tests complete"
@@ -68,7 +69,8 @@ jobs:
           sysctl machdep.cpu.core_count
           echo "::endgroup::"
 
-          .ci/scripts/run-docs quantization
+          echo "using workaround for #1416 and #1315 by setting torchchat device explicitly"
+          TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs quantization
   
           echo "::group::Completion"
           echo "tests complete"
@@ -103,7 +105,8 @@ jobs:
           sysctl machdep.cpu.core_count
           echo "::endgroup::"
 
-          .ci/scripts/run-docs gguf
+          echo "using workaround for #1416 and #1315 by setting torchchat device explicitly"
+          TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs gguf
 
           echo "::group::Completion"
           echo "tests complete"
@@ -137,9 +140,113 @@ jobs:
           sysctl machdep.cpu.core_count
           echo "::endgroup::"
 
-          .ci/scripts/run-docs advanced
+          echo "using workaround for #1416 and #1315 by setting torchchat device explicitly"
+          TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs advanced
 
           echo "::group::Completion"
           echo "tests complete"
           echo "*******************************************"
           echo "::endgroup::"
+
+  test-eval-macos:
+    runs-on: macos-14-xlarge
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.10.11'
+      - name: Setup Xcode
+        if: runner.os == 'macOS'
+        uses: maxim-lobanov/setup-xcode@v1
+        with:
+          xcode-version: '15.3'
+      - name: Run script
+        run: |
+          set -x
+          # NS: Remove previous installation  of torch first
+          # as this script does not isntall anything into conda env but rather as system dep
+          pip3 uninstall -y torch || true
+          set -eou pipefail
+
+          echo "::group::Print machine info"
+          uname -a
+          sysctl machdep.cpu.brand_string
+          sysctl machdep.cpu.core_count
+          echo "::endgroup::"
+
+          .ci/scripts/run-docs evaluation
+
+          echo "::group::Completion"
+          echo "tests complete"
+          echo "*******************************************"
+          echo "::endgroup::"   
+
+  test-multimodal-macos:
+    runs-on: macos-14-xlarge
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.10.11'
+      - name: Setup Xcode
+        if: runner.os == 'macOS'
+        uses: maxim-lobanov/setup-xcode@v1
+        with:
+          xcode-version: '15.3'
+      - name: Run script
+        run: |
+          set -x
+          # NS: Remove previous installation  of torch first
+          # as this script does not isntall anything into conda env but rather as system dep
+          pip3 uninstall -y torch || true
+          set -eou pipefail
+
+          echo "::group::Print machine info"
+          uname -a
+          sysctl machdep.cpu.brand_string
+          sysctl machdep.cpu.core_count
+          echo "::endgroup::"
+
+          .ci/scripts/run-docs multimodal
+
+          echo "::group::Completion"
+          echo "tests complete"
+          echo "*******************************************"
+          echo "::endgroup::"
+
+  test-native-macos:
+    runs-on: macos-14-xlarge
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.10.11'
+      - name: Setup Xcode
+        if: runner.os == 'macOS'
+        uses: maxim-lobanov/setup-xcode@v1
+        with:
+          xcode-version: '15.3'
+      - name: Run script
+        run: |
+          set -x
+          # NS: Remove previous installation  of torch first
+          # as this script does not isntall anything into conda env but rather as system dep
+          pip3 uninstall -y torch || true
+          set -eou pipefail
+
+          echo "::group::Print machine info"
+          uname -a
+          sysctl machdep.cpu.brand_string
+          sysctl machdep.cpu.core_count
+          echo "::endgroup::"
+
+          .ci/scripts/run-docs native
+
+          echo "::group::Completion"
+          echo "tests complete"
+          echo "*******************************************"
+          echo "::endgroup::"
+  
diff --git a/.github/workflows/run-readme-pr-mps.yml b/.github/workflows/run-readme-pr-mps.yml
@@ -113,3 +113,84 @@ jobs:
           echo "tests complete"
           echo "*******************************************"
           echo "::endgroup::"
+
+  test-evaluation-mps-macos:
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    with:
+      runner: macos-m1-14  # needs MPS, was macos-m1-stable
+      script: |
+          set -x
+          conda create -y -n test-evaluation-mps-macos python=3.10.11
+          conda activate test-evaluation-mps-macos
+          # NS: Remove previous installation  of torch first
+          # as this script does not isntall anything into conda env
+          # but rather  system dep
+          pip3 uninstall -y torch || true
+          set -eou pipefail
+
+          echo "::group::Print machine info"
+          uname -a
+          sysctl machdep.cpu.brand_string
+          sysctl machdep.cpu.core_count
+          echo "::endgroup::"
+
+          .ci/scripts/run-docs evaluation
+
+          echo "::group::Completion"
+          echo "tests complete"
+          echo "*******************************************"
+          echo "::endgroup::"
+
+  test-multimodal-mps-macos:
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    with:
+      runner: macos-m1-14  # needs MPS, was macos-m1-stable
+      script: |
+          set -x
+          conda create -y -n test-multimodal-mps-macos python=3.10.11
+          conda activate test-multimodal-mps-macos
+          # NS: Remove previous installation  of torch first
+          # as this script does not isntall anything into conda env
+          # but rather  system dep
+          pip3 uninstall -y torch || true
+          set -eou pipefail
+
+          echo "::group::Print machine info"
+          uname -a
+          sysctl machdep.cpu.brand_string
+          sysctl machdep.cpu.core_count
+          echo "::endgroup::"
+
+          .ci/scripts/run-docs multimodal
+
+          echo "::group::Completion"
+          echo "tests complete"
+          echo "*******************************************"
+          echo "::endgroup::"
+
+  test-native-mps-macos:
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    with:
+      runner: macos-m1-14  # needs MPS, was macos-m1-stable
+      script: |
+          set -x
+          conda create -y -n test-native-mps-macos python=3.10.11
+          conda activate test-native-mps-macos
+          # NS: Remove previous installation  of torch first
+          # as this script does not isntall anything into conda env
+          # but rather  system dep
+          pip3 uninstall -y torch || true
+          set -eou pipefail
+
+          echo "::group::Print machine info"
+          uname -a
+          sysctl machdep.cpu.brand_string
+          sysctl machdep.cpu.core_count
+          echo "::endgroup::"
+
+          .ci/scripts/run-docs native
+
+          echo "::group::Completion"
+          echo "tests complete"
+          echo "*******************************************"
+          echo "::endgroup::"
diff --git a/install/install_requirements.sh b/install/install_requirements.sh
@@ -51,13 +51,13 @@ echo "Using pip executable: $PIP_EXECUTABLE"
 # NOTE: If a newly-fetched version of the executorch repo changes the value of
 # PYTORCH_NIGHTLY_VERSION, you should re-run this script to install the necessary
 # package versions.
-PYTORCH_NIGHTLY_VERSION=dev20250119
+PYTORCH_NIGHTLY_VERSION=dev20250124
 
 # Nightly version for torchvision
-VISION_NIGHTLY_VERSION=dev20250119
+VISION_NIGHTLY_VERSION=dev20250124
 
 # Nightly version for torchtune
-TUNE_NIGHTLY_VERSION=dev20250119
+TUNE_NIGHTLY_VERSION=dev20250124
 
 # The pip repository that hosts nightly torch packages. cpu by default.
 # If cuda is available, based on presence of nvidia-smi, install the pytorch nightly
diff --git a/install/requirements.txt b/install/requirements.txt
@@ -16,8 +16,7 @@ jinja2
 # Miscellaneous
 snakeviz
 sentencepiece
-# numpy version range required by GGUF util
-numpy >= 1.17, < 2.0
+numpy >= 1.17
 blobfile
 tomli >= 1.1.0 ; python_version < "3.11"
 openai
diff --git a/torchchat/cli/builder.py b/torchchat/cli/builder.py
@@ -589,9 +589,8 @@ def do_nothing(max_batch_size, max_seq_length):
             # attributes will NOT be seen on by AOTI-compiled forward
             # function, e.g. calling model.setup_cache will NOT touch
             # AOTI compiled and maintained model buffers such as kv_cache.
-            from torch._inductor.package import load_package
 
-            aoti_compiled_model = load_package(
+            aoti_compiled_model = torch._inductor.aoti_load_package(
                 str(builder_args.aoti_package_path.absolute())
             )
 
diff --git a/torchchat/export.py b/torchchat/export.py
@@ -75,17 +75,20 @@ def export_for_server(
         if not package:
             options = {"aot_inductor.output_path": output_path}
 
-        path = torch._export.aot_compile(
+        ep = torch.export.export(
             model,
             example_inputs,
             dynamic_shapes=dynamic_shapes,
-            options=options,
         )
 
         if package:
-            from torch._inductor.package import package_aoti
-
-            path = package_aoti(output_path, path)
+            path = torch._inductor.aoti_compile_and_package(
+                ep, package_path=output_path, inductor_configs=options
+            )
+        else:
+            path = torch._inductor.aot_compile(
+                ep.module(), example_inputs, options=options
+            )
 
     print(f"The generated packaged model can be found at: {path}")
     return path
diff --git a/torchchat/model.py b/torchchat/model.py
@@ -657,7 +657,7 @@ def __init__(self, config: TransformerArgs) -> None:
             self.layers[str(layer_id)] = TransformerBlock(config)
 
         if config.stage_idx == config.n_stages - 1:
-            self.norm = RMSNorm(config.dim, eps=config.norm_eps)
+            self.norm = nn.RMSNorm(config.dim, eps=config.norm_eps)
             self.output = nn.Linear(config.dim, config.vocab_size, bias=False)
             if config.tie_word_embeddings:
                 self.output.weight = self.tok_embeddings.weight
@@ -751,8 +751,8 @@ def __init__(self, config: TransformerArgs) -> None:
         super().__init__()
         self.attention = Attention(config)
         self.feed_forward = FeedForward(config)
-        self.ffn_norm = RMSNorm(config.dim, config.norm_eps)
-        self.attention_norm = RMSNorm(config.dim, config.norm_eps)
+        self.ffn_norm = nn.RMSNorm(config.dim, config.norm_eps)
+        self.attention_norm = nn.RMSNorm(config.dim, config.norm_eps)
         # None for llama architecture, set for granite architectures
         self.residual_multiplier = (
             config.residual_multiplier
@@ -928,20 +928,6 @@ def forward(self, x: Tensor) -> Tensor:
         return self.w2(F.silu(self.w1(x)) * self.w3(x))
 
 
-class RMSNorm(nn.Module):
-    def __init__(self, dim: int, eps: float = 1e-5):
-        super().__init__()
-        self.eps = eps
-        self.weight = nn.Parameter(torch.ones(dim))
-
-    def _norm(self, x):
-        return x * torch.rsqrt(torch.mean(x * x, dim=-1, keepdim=True) + self.eps)
-
-    def forward(self, x: Tensor) -> Tensor:
-        output = self._norm(x.float()).type_as(x)
-        return output * self.weight
-
-
 def apply_scaling(freqs: torch.Tensor, rope_scaling: Dict[str, Any]):
     # Check for the presence of the required keys
     required_keys = {

Original file line number	Diff line number	Diff line change
`@@ -589,9 +589,8 @@ def do_nothing(max_batch_size, max_seq_length):`
`589`	`589`	`# attributes will NOT be seen on by AOTI-compiled forward`
`590`	`590`	`# function, e.g. calling model.setup_cache will NOT touch`
`591`	`591`	`# AOTI compiled and maintained model buffers such as kv_cache.`
`592`		`- from torch._inductor.package import load_package`
`593`	`592`
`594`		`- aoti_compiled_model = load_package(`
	`593`	`+ aoti_compiled_model = torch._inductor.aoti_load_package(`
`595`	`594`	`str(builder_args.aoti_package_path.absolute())`
`596`	`595`	`)`
`597`	`596`