diff --git a/.gitmodules b/.gitmodules index 831eafc9..8ccedc43 100644 --- a/.gitmodules +++ b/.gitmodules @@ -11,3 +11,7 @@ [submodule "mv2/wasm/executorch"] path = mv2/wasm/executorch url = https://github.com/pytorch/executorch.git + +[submodule "efficient_sam/wasm/executorch"] + path = efficient_sam/wasm/executorch + url = https://github.com/pytorch/executorch.git diff --git a/efficient_sam/wasm/CMakeLists.txt b/efficient_sam/wasm/CMakeLists.txt new file mode 100644 index 00000000..de1795a1 --- /dev/null +++ b/efficient_sam/wasm/CMakeLists.txt @@ -0,0 +1,36 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved. +# +# This source code is licensed under the BSD-style license found in the LICENSE +# file in the root directory of this source tree. + +# Please this file formatted by running: +# ~~~ +# cmake-format -i CMakeLists.txt +# ~~~ + +add_subdirectory("executorch") + +add_executable(executorch_wasm_demo_lib) +target_link_libraries(executorch_wasm_demo_lib PRIVATE executorch_wasm + executorch_backends) +target_link_options(executorch_wasm_demo_lib PRIVATE -sALLOW_MEMORY_GROWTH + -sSTACK_SIZE=262144 -sENVIRONMENT=web) + +add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/demo.js + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/demo.js + ${CMAKE_CURRENT_BINARY_DIR}/demo.js + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/demo.js + COMMENT "Copying demo.js to build output directory") + +add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/demo.html + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/demo.html + ${CMAKE_CURRENT_BINARY_DIR}/demo.html + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/demo.html + COMMENT "Copying demo.html to build output directory") + +add_custom_target( + executorch_wasm_demo + DEPENDS executorch_wasm_demo_lib ${CMAKE_CURRENT_BINARY_DIR}/demo.js + ${CMAKE_CURRENT_BINARY_DIR}/demo.html) diff --git a/efficient_sam/wasm/README.md b/efficient_sam/wasm/README.md new file mode 100644 index 00000000..05b00a8a --- /dev/null +++ b/efficient_sam/wasm/README.md @@ -0,0 +1,67 @@ +# ExecuTorch JavaScript Bindings Demo + +This demo showcases the capabilities of ExecuTorch's JavaScript bindings. It is able to load a model, run inference, and classify an image natively in the browser. + +## Installing Emscripten + +[Emscripten](https://emscripten.org/index.html) is necessary to compile ExecuTorch for Wasm. You can install Emscripten with these commands: + +```bash +# Clone the emsdk repository +git clone https://github.com/emscripten-core/emsdk.git +cd emsdk + +# Download and install version 4.0.10 of the SDK +./emsdk install 4.0.10 +./emsdk activate 4.0.10 + +# Add the Emscripten environment variables to your shell +source ./emsdk_env.sh +``` + +## Setting up ExecuTorch and Generating the Model File + +Make sure you have the system requirements listed in the [Getting Started Guide](https://docs.pytorch.org/executorch/main/getting-started.html#system-requirements) before continuing. + +1. Install ExecuTorch from PyPI. +```bash +pip3 install executorch +``` + +2. Update the ExecuTorch submodule. +```bash +git submodule update --init --recursive executorch +``` + +3. Generate the EfficientSAM binary file for this demo. + +```bash +bash export.sh +``` +It should output a file called `xnnpack_efficient_sam.pte`. + +## Building and Running + +Once you have Emscripten installed, ExecuTorch set up, and the model file generated, you can build and run the demo. Building may take up to 9 minutes. + +```bash +cd efficient_sam/wasm # The directory containing this README + +# Build the demo +bash build.sh + +# Run the demo +python3 -m http.server --directory build/ +``` + +The page will be available at http://localhost:8000/demo.html. + +## Demo Features + +- Load a model from a file + - This demo only supports the EfficientSAM model. Passing in a model with different input/output shapes will result in an error. +- Run inference on an image + - Supported formats: `.png`, `.gif`, `.jpeg`, `.jpg` +- Select a point on the image to run inference + - May take around 6.5 seconds to run inference +- Show and hide the segmentation mask diff --git a/efficient_sam/wasm/build.sh b/efficient_sam/wasm/build.sh new file mode 100644 index 00000000..987af23f --- /dev/null +++ b/efficient_sam/wasm/build.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +CMAKE_OUT=build + +emcmake cmake . -DEXECUTORCH_BUILD_WASM=ON \ + -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ + -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ + -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ + -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ + -DEXECUTORCH_BUILD_XNNPACK=ON \ + -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \ + -DEXECUTORCH_BUILD_DEVTOOLS=ON \ + -DFLATCC_ALLOW_WERROR=OFF \ + -DCMAKE_BUILD_TYPE=Release \ + -B"${CMAKE_OUT}" + +if [ "$(uname)" == "Darwin" ]; then + CMAKE_JOBS=$(( $(sysctl -n hw.ncpu) - 1 )) +else + CMAKE_JOBS=$(( $(nproc) - 1 )) +fi + +cmake --build ${CMAKE_OUT} --target executorch_wasm_demo -j ${CMAKE_JOBS} diff --git a/efficient_sam/wasm/demo.html b/efficient_sam/wasm/demo.html new file mode 100644 index 00000000..ddb82aa6 --- /dev/null +++ b/efficient_sam/wasm/demo.html @@ -0,0 +1,29 @@ + + + + + + + Executorch Wasm Demo + + + + + + + +

No model uploaded

+
+ + + +
+ + + + diff --git a/efficient_sam/wasm/demo.js b/efficient_sam/wasm/demo.js new file mode 100644 index 00000000..2b5735c1 --- /dev/null +++ b/efficient_sam/wasm/demo.js @@ -0,0 +1,319 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +const DIMS = 1024; + + +let modelButton = null; +let imageButton = null; +let inferenceButton = null +let maskButton = null; +let etdumpButton = null; +let canvasCtx = null; +let maskCanvas = null; +let maskCanvasCtx = null; +let pointerCanvas = null; +let pointerCanvasCtx = null; +let modelText = null; + +var Module = { + onRuntimeInitialized: function() { + modelButton = document.getElementById("upload_model_button"); + modelButton.addEventListener("click", openFilePickerModel); + + imageButton = document.getElementById("upload_image_button"); + imageButton.addEventListener("click", openFilePickerImage); + + inferenceButton = document.getElementById("inference_button"); + inferenceButton.addEventListener("click", runModel); + + maskButton = document.getElementById("mask_button"); + maskButton.addEventListener("click", toggleMask); + + const canvas = document.getElementById("canvas"); + canvasCtx = canvas.getContext("2d", { willReadFrequently: true }); + + maskCanvas = document.getElementById("mask_canvas"); + maskCanvasCtx = maskCanvas.getContext("2d"); + + pointerCanvas = document.getElementById("pointer_canvas"); + pointerCanvas.addEventListener("click", canvasClick); + pointerCanvasCtx = pointerCanvas.getContext("2d"); + + modelText = document.getElementById("model_text"); + + etdumpButton = document.getElementById("etdump_button"); + etdumpButton.addEventListener("click", etdump); + } +} +const et = Module; + +let module = null; +let imageTensor = null; +let point = null; + +function etdump() { + if (module == null) { + return; + } + + const etdump = module.etdump(); + const blob = new Blob([etdump.buffer], { type: "application/octet-stream" }); + const url = URL.createObjectURL(blob); + const a = document.createElement("a"); + a.href = url; + a.download = "result.etdump"; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + URL.revokeObjectURL(url); + etdump.delete(); +} + +function toggleMask(event) { + if (maskCanvas.style.display === "none") { + maskCanvas.style.display = "block"; + maskButton.textContent = "Hide Mask"; + } else { + maskCanvas.style.display = "none"; + maskButton.textContent = "Show Mask"; + } +} + +function runModel(event) { + const pointTensor = et.Tensor.fromArray([1, 1, 1, 2], point); + const labelTensor = et.Tensor.fromArray([1, 1, 1], [1]); + + const startTime = performance.now(); + console.log("Running model..."); + const output = module.forward([imageTensor, pointTensor, labelTensor]); + const endTime = performance.now(); + console.log(((endTime - startTime)/1000).toFixed(2) + "s"); + + const argmax = output[1].data.reduce((iMax, elem, i, arr) => elem > arr[iMax] ? i : iMax, 0); + + const imageData = maskCanvasCtx.createImageData(DIMS, DIMS); + for (let i = 0; i < DIMS; i++) { + for (let j = 0; j < DIMS; j++) { + const idx = ((i * DIMS + j) * 4); + const idx3 = (argmax * DIMS + i) * DIMS + j; + imageData.data[idx + 2] = 255; + imageData.data[idx + 3] = Math.min(1, output[0].data[idx3]) * 100; + } + } + maskCanvasCtx.putImageData(imageData, 0, 0); + + maskCanvas.style.display = "block"; + maskButton.textContent = "Hide Mask"; + maskButton.disabled = false; + inferenceButton.disabled = true; + + pointTensor.delete(); + labelTensor.delete(); + output[0].delete(); + output[1].delete(); +} + +function canvasClick(event) { + if (module == null) { + return; + } + + const rect = pointerCanvas.getBoundingClientRect(); + const x = event.clientX - rect.left; + const y = event.clientY - rect.top; + + pointerCanvasCtx.beginPath(); + pointerCanvasCtx.clearRect(0, 0, DIMS, DIMS); + pointerCanvasCtx.arc(x, y, 5, 0, 2 * Math.PI); + pointerCanvasCtx.stroke(); + + console.log("Clicked at: " + x + ", " + y); + point = [x, y]; + inferenceButton.disabled = false; +} + +function verifyModel(mod) { + try { + mod.loadMethod("forward"); + } catch (e) { + modelText.textContent = "Failed to load forward method: " + e; + modelText.style.color = "red"; + return false; + } + + const methodMeta = mod.getMethodMeta("forward"); + if (methodMeta.inputTags.length != 3) { + modelText.textContent = "Error: Expected input size of 3, got " + methodMeta.inputTags.length; + modelText.style.color = "red"; + return false; + } + + for (let i = 0; i < 3; i++) { + if (methodMeta.inputTags[i] != et.Tag.Tensor) { + modelText.textContent = "Error: Expected input " + i + " to be Tensor, got " + methodMeta.inputTags[i].name; + modelText.style.color = "red"; + return false; + } + } + + const expectedInputSizes = [[1, 3, DIMS, DIMS], [1, 1, 1, 2], [1, 1, 1]]; + for (let i = 0; i < 3; i++) { + const inputMeta = methodMeta.inputTensorMeta[i]; + if (inputMeta.sizes.length != expectedInputSizes[i].length) { + modelText.textContent = "Error: Expected input " + i + " shape to be " + expectedInputSizes[i] + ", got " + inputMeta.sizes; + modelText.style.color = "red"; + return false; + } + + for (let j = 0; j < expectedInputSizes[i].length; j++) { + if (inputMeta.sizes[j] != expectedInputSizes[i][j]) { + modelText.textContent = "Error: Expected input " + i + " shape to be " + expectedInputSizes[i] + ", got " + inputMeta.sizes; + modelText.style.color = "red"; + return false; + } + } + + if (inputMeta.scalarType != et.ScalarType.Float) { + modelText.textContent = "Error: Expected input " + i + " type to be Float, got " + inputMeta.scalarType.name; + modelText.style.color = "red"; + return false; + } + } + + if (methodMeta.outputTags.length != 2) { + modelText.textContent = "Error: Expected output size of 2, got " + methodMeta.outputTags.length; + modelText.style.color = "red"; + return false; + } + + for (let i = 0; i < 2; i++) { + if (methodMeta.outputTags[i] != et.Tag.Tensor) { + modelText.textContent = "Error: Expected output " + i + " to be Tensor, got " + methodMeta.outputTags[i].name; + modelText.style.color = "red"; + return false; + } + } + + const expectedOutputSizes = [[1, 1, 3, DIMS, DIMS], [1, 1, 3]]; + for (let i = 0; i < 2; i++) { + const outputMeta = methodMeta.outputTensorMeta[i]; + if (outputMeta.sizes.length != expectedOutputSizes[i].length) { + modelText.textContent = "Error: Expected output " + i + " shape to be " + expectedOutputSizes[i] + ", got " + outputMeta.sizes; + modelText.style.color = "red"; + return false; + } + + for (let j = 0; j < expectedOutputSizes[i].length; j++) { + if (outputMeta.sizes[j] != expectedOutputSizes[i][j]) { + modelText.textContent = "Error: Expected output " + i + " shape to be " + expectedOutputSizes[i] + ", got " + outputMeta.sizes; + modelText.style.color = "red"; + return false; + } + } + + if (outputMeta.scalarType != et.ScalarType.Float) { + modelText.textContent = "Error: Expected output " + i + " type to be Float, got " + outputMeta.scalarType.name; + modelText.style.color = "red"; + return false; + } + } + + return true; +} + +function loadModelFile(file) { + const reader = new FileReader(); + reader.onload = function(event) { + const buffer = event.target.result; + + const mod = et.Module.load(buffer); + + if (verifyModel(mod)) { + if (module != null) { + module.delete(); + } + module = mod; + modelText.textContent = 'Uploaded model: ' + file.name; + modelText.style.color = null; + canvasCtx.clearRect(0, 0, DIMS, DIMS); + upload_image_button.disabled = false; + etdumpButton.disabled = false; + } + }; + reader.readAsArrayBuffer(file); +} + +function* generateTensorData(data) { + for (let j = 0; j < 3; j++) { + for (let i = 0; i < data.length; i += 4) { + yield data[i + j] / 255.0; + } + } +} + +function loadImageFile(file) { + const img = new Image(); + img.onload = function() { + canvasCtx.drawImage(img, 0, 0, DIMS, DIMS); + const imageData = canvasCtx.getImageData(0, 0, DIMS, DIMS); + if (imageTensor != null) { + imageTensor.delete(); + } + imageTensor = et.Tensor.fromIter([1, 3, DIMS, DIMS], generateTensorData(imageData.data)); + + maskCanvas.style.display = "none"; + maskButton.textContent = "Hide Mask"; + maskButton.disabled = true; + } + img.src = URL.createObjectURL(file); +} + +async function openFilePickerModel() { + try { + const [fileHandle] = await window.showOpenFilePicker({ + types: [{ + description: 'Model Files', + accept: { + 'application/octet-stream': ['.pte'], + }, + }], + multiple: false, // Set to true for multiple file selection + }); + const file = await fileHandle.getFile(); + loadModelFile(file); + } catch (err) { + if (err.name === 'AbortError') { + // Handle user abort silently + } else { + console.error('File picker error:', err); + } + } +} + +async function openFilePickerImage() { + try { + const [fileHandle] = await window.showOpenFilePicker({ + types: [{ + description: "Images", + accept: { + "image/*": [".png", ".gif", ".jpeg", ".jpg"], + }, + }], + multiple: false, // Set to true for multiple file selection + }); + const file = await fileHandle.getFile(); + loadImageFile(file); + } catch (err) { + if (err.name === 'AbortError') { + // Handle user abort silently + } else { + console.error('File picker error:', err); + } + } +} diff --git a/efficient_sam/wasm/executorch b/efficient_sam/wasm/executorch new file mode 160000 index 00000000..a4b59cc7 --- /dev/null +++ b/efficient_sam/wasm/executorch @@ -0,0 +1 @@ +Subproject commit a4b59cc74bcd79b8bf6ceb18e6c6b84823ba4e3e diff --git a/efficient_sam/wasm/export.sh b/efficient_sam/wasm/export.sh new file mode 100644 index 00000000..2e64caf8 --- /dev/null +++ b/efficient_sam/wasm/export.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +cd executorch + +python3 -c " +from examples.models.model_factory import EagerModelFactory +from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner +from executorch.exir import to_edge_transform_and_lower +from torch.export import export + +model, example_inputs, _, _ = EagerModelFactory.create_model( + 'efficient_sam', 'EfficientSAM' +) + +prog = export(model, example_inputs) +edge = to_edge_transform_and_lower(prog, partitioner=[XnnpackPartitioner()]) +exec_prog = edge.to_executorch() +with open('../xnnpack_efficient_sam.pte', 'wb') as file: + exec_prog.write_to_file(file) +"