update doc for phi-3-mini (#5320)

helunwencser · facebook-github-bot · commit 53c49fb58a0a · 2024-09-12T14:46:07.000-07:00
Summary: Update doc for phi-3-mini to reflect the latest state. Pull Request resolved: #5320 Reviewed By: tarun292 Differential Revision: D62601679 Pulled By: helunwencser fbshipit-source-id: 3d75151d62494327a0ed6771aa7a468fe4ea7c04
diff --git a/examples/models/phi-3-mini/README.md b/examples/models/phi-3-mini/README.md
@@ -4,9 +4,9 @@ This example demonstrates how to run a [Phi-3-mini](https://huggingface.co/micro
 # Instructions
 ## Step 1: Setup
 1. Follow the [tutorial](https://pytorch.org/executorch/main/getting-started-setup) to set up ExecuTorch. For installation run `./install_requirements.sh --pybind xnnpack`
-2. To export Phi-3-mini, we need this [PR](https://github.com/huggingface/transformers/pull/32339). Install transformers from master with the following command:
+2. Currently, we support transformers v4.44.2. Install transformers with the following command:
 ```
-pip uninstall -y transformers ; pip install git+https://github.com/huggingface/transformers
+pip uninstall -y transformers ; pip install transformers==4.44.2
 ```
 ## Step 2: Prepare and run the model
 1. Download the `tokenizer.model` from HuggingFace and create `tokenizer.bin`.
@@ -53,5 +53,14 @@ cmake --build cmake-out/examples/models/phi-3-mini -j16 --config Release
 ```
 - Run model. Options available [here](https://github.com/pytorch/executorch/blob/main/examples/models/phi-3-mini/main.cpp#L13-L30)
 ```
-cmake-out/examples/models/phi-3-mini/phi_3_mini_runner --model_path=<model pte file> --tokenizer_path=<tokenizer.bin> --seq_len=128 --prompt=<prompt>
+cmake-out/examples/models/phi-3-mini/phi_3_mini_runner \
+    --model_path=phi-3-mini.pte \
+    --tokenizer_path=tokenizer.bin \
+    --seq_len=128 \
+    --temperature=0 \
+    --prompt="<|system|>
+You are a helpful assistant.<|end|>
+<|user|>
+What is the capital of France?<|end|>
+<|assistant|>"
 ```
diff --git a/examples/models/phi-3-mini/runner.cpp b/examples/models/phi-3-mini/runner.cpp
@@ -81,7 +81,7 @@ uint64_t Runner::logits_to_token(const exec_aten::Tensor& logits_tensor) {
 }
 
 uint64_t Runner::prefill(std::vector<uint64_t>& tokens) {
-  auto result = module_->forward(from_blob(
+  auto result = module_->forward(executorch::extension::from_blob(
       tokens.data(),
       {1, static_cast<exec_aten::SizesType>(tokens.size())},
       ScalarType::Long));
@@ -91,7 +91,8 @@ uint64_t Runner::prefill(std::vector<uint64_t>& tokens) {
 }
 
 uint64_t Runner::run_model_step(uint64_t token) {
-  auto result = module_->forward(from_blob(&token, {1, 1}, ScalarType::Long));
+  auto result = module_->forward(
+      executorch::extension::from_blob(&token, {1, 1}, ScalarType::Long));
   ET_CHECK_MSG(
       result.error() == Error::Ok,
       "Failed to run forward() for token %" PRIu64,