|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +set -euo pipefail |
| 4 | + |
| 5 | +# Configuration |
| 6 | +REPO_URL="https://github.com/huggingface/xet-core.git" |
| 7 | +DEFAULT_BRANCH="main" |
| 8 | +DEFAULT_PACKAGE="hf_xet_thin_wasm" |
| 9 | +DEFAULT_JS_TARGET="web" |
| 10 | +CLONE_DIR="xet-core-wasm-build" |
| 11 | + |
| 12 | +# Colors for output |
| 13 | +RED='\033[0;31m' |
| 14 | +GREEN='\033[0;32m' |
| 15 | +YELLOW='\033[1;33m' |
| 16 | +BLUE='\033[0;34m' |
| 17 | +NC='\033[0m' # No Color |
| 18 | + |
| 19 | +# Function to print colored output |
| 20 | +log() { |
| 21 | + echo -e "${GREEN}[INFO]${NC} $1" |
| 22 | +} |
| 23 | + |
| 24 | +warn() { |
| 25 | + echo -e "${YELLOW}[WARN]${NC} $1" |
| 26 | +} |
| 27 | + |
| 28 | +error() { |
| 29 | + echo -e "${RED}[ERROR]${NC} $1" |
| 30 | + exit 1 |
| 31 | +} |
| 32 | + |
| 33 | +# Function to check if a command exists |
| 34 | +command_exists() { |
| 35 | + command -v "$1" >/dev/null 2>&1 |
| 36 | +} |
| 37 | + |
| 38 | +# Help function |
| 39 | +show_help() { |
| 40 | + cat << EOF |
| 41 | +Usage: $0 [OPTIONS] |
| 42 | +
|
| 43 | +Build WASM packages from xet-core repository. |
| 44 | +
|
| 45 | +OPTIONS: |
| 46 | + -b, --branch BRANCH Git branch to checkout (default: $DEFAULT_BRANCH) |
| 47 | + -p, --package PACKAGE WASM package to build: hf_xet_thin_wasm or hf_xet_wasm (default: $DEFAULT_PACKAGE) |
| 48 | + -t, --target TARGET JavaScript target: web, nodejs, bundler, no-modules, deno (default: $DEFAULT_JS_TARGET) |
| 49 | + -o, --output DIR Output directory to copy built WASM files |
| 50 | + -c, --clean Clean clone directory before starting |
| 51 | + -h, --help Show this help message |
| 52 | +
|
| 53 | +EXAMPLES: |
| 54 | + $0 # Build hf_xet_thin_wasm from main branch |
| 55 | + $0 -b feature-branch # Build from specific branch |
| 56 | + $0 -p hf_xet_wasm # Build the full WASM package |
| 57 | + $0 -o ./my-project/wasm # Copy output to specific directory |
| 58 | + $0 -t nodejs -o ./dist # Build for Node.js and copy to dist |
| 59 | +
|
| 60 | +REQUIREMENTS: |
| 61 | + - Git |
| 62 | + - Rust (will install nightly toolchain automatically) |
| 63 | + - Internet connection for downloading dependencies |
| 64 | +
|
| 65 | +EOF |
| 66 | +} |
| 67 | + |
| 68 | +# Parse command line arguments |
| 69 | +BRANCH="$DEFAULT_BRANCH" |
| 70 | +PACKAGE="$DEFAULT_PACKAGE" |
| 71 | +JS_TARGET="$DEFAULT_JS_TARGET" |
| 72 | +OUTPUT_DIR="" |
| 73 | +CLEAN=false |
| 74 | +ORIGINAL_DIR=$(pwd) |
| 75 | + |
| 76 | +while [[ $# -gt 0 ]]; do |
| 77 | + case $1 in |
| 78 | + -b|--branch) |
| 79 | + BRANCH="$2" |
| 80 | + shift 2 |
| 81 | + ;; |
| 82 | + -p|--package) |
| 83 | + PACKAGE="$2" |
| 84 | + if [[ "$PACKAGE" != "hf_xet_thin_wasm" && "$PACKAGE" != "hf_xet_wasm" ]]; then |
| 85 | + error "Invalid package: $PACKAGE. Must be 'hf_xet_thin_wasm' or 'hf_xet_wasm'" |
| 86 | + fi |
| 87 | + shift 2 |
| 88 | + ;; |
| 89 | + -t|--target) |
| 90 | + JS_TARGET="$2" |
| 91 | + shift 2 |
| 92 | + ;; |
| 93 | + -o|--output) |
| 94 | + OUTPUT_DIR="$2" |
| 95 | + shift 2 |
| 96 | + ;; |
| 97 | + -c|--clean) |
| 98 | + CLEAN=true |
| 99 | + shift |
| 100 | + ;; |
| 101 | + -h|--help) |
| 102 | + show_help |
| 103 | + exit 0 |
| 104 | + ;; |
| 105 | + *) |
| 106 | + error "Unknown option: $1. Use -h for help." |
| 107 | + ;; |
| 108 | + esac |
| 109 | +done |
| 110 | + |
| 111 | +# Check prerequisites |
| 112 | +log "Checking prerequisites..." |
| 113 | + |
| 114 | +if ! command_exists git; then |
| 115 | + error "Git is not installed. Please install Git first." |
| 116 | +fi |
| 117 | + |
| 118 | +if ! command_exists rustup; then |
| 119 | + error "Rustup is not installed. Please install Rust from https://rustup.rs/" |
| 120 | +fi |
| 121 | + |
| 122 | +# Clean previous build if requested |
| 123 | +if [[ "$CLEAN" == true && -d "$CLONE_DIR" ]]; then |
| 124 | + log "Cleaning previous build directory: $CLONE_DIR" |
| 125 | + rm -rf "$CLONE_DIR" |
| 126 | +fi |
| 127 | + |
| 128 | +# Clone the repository |
| 129 | +if [[ -d "$CLONE_DIR" ]]; then |
| 130 | + log "Directory $CLONE_DIR already exists. Using existing clone." |
| 131 | + cd "$CLONE_DIR" |
| 132 | + log "Fetching latest changes..." |
| 133 | + git fetch origin |
| 134 | + git checkout "$BRANCH" |
| 135 | + git reset --hard "origin/$BRANCH" |
| 136 | +else |
| 137 | + log "Cloning xet-core repository (branch: $BRANCH, depth: 1)..." |
| 138 | + git clone --depth=1 --branch="$BRANCH" "$REPO_URL" "$CLONE_DIR" |
| 139 | + cd "$CLONE_DIR" |
| 140 | +fi |
| 141 | + |
| 142 | +log "Repository cloned successfully. Current directory: $(pwd)" |
| 143 | + |
| 144 | +# Install required Rust toolchain and components |
| 145 | +log "Setting up Rust toolchain..." |
| 146 | + |
| 147 | +# # Install nightly toolchain |
| 148 | +# log "Installing Rust nightly toolchain..." |
| 149 | +# rustup toolchain install nightly |
| 150 | + |
| 151 | +# # Add WASM target |
| 152 | +# log "Adding wasm32-unknown-unknown target..." |
| 153 | +# rustup target add wasm32-unknown-unknown --toolchain nightly |
| 154 | + |
| 155 | +# # Add rust-src component for nightly |
| 156 | +# log "Adding rust-src component..." |
| 157 | +# rustup component add rust-src --toolchain nightly |
| 158 | + |
| 159 | +# Install required tools |
| 160 | +log "Installing wasm-pack and wasm-bindgen-cli..." |
| 161 | +if ! command_exists wasm-pack; then |
| 162 | + cargo install wasm-pack |
| 163 | +else |
| 164 | + log "wasm-pack already installed" |
| 165 | +fi |
| 166 | + |
| 167 | +if ! command_exists wasm-bindgen; then |
| 168 | + cargo install wasm-bindgen-cli |
| 169 | +else |
| 170 | + log "wasm-bindgen-cli already installed" |
| 171 | +fi |
| 172 | + |
| 173 | +# Change to the package directory |
| 174 | +log "Building WASM package: $PACKAGE" |
| 175 | +cd "$PACKAGE" |
| 176 | + |
| 177 | +# Set environment variable for JS target |
| 178 | +export JS_TARGET="$JS_TARGET" |
| 179 | + |
| 180 | +# Build the WASM package |
| 181 | +log "Starting WASM build (target: $JS_TARGET)..." |
| 182 | +if [[ "$PACKAGE" == "hf_xet_thin_wasm" ]]; then |
| 183 | + # Use the existing build script for thin WASM |
| 184 | + chmod +x build_wasm.sh |
| 185 | + ./build_wasm.sh |
| 186 | +else |
| 187 | + # For hf_xet_wasm, use the more complex build process |
| 188 | + chmod +x build_wasm.sh |
| 189 | + ./build_wasm.sh |
| 190 | +fi |
| 191 | + |
| 192 | +log "WASM build completed successfully!" |
| 193 | + |
| 194 | +# Check if pkg directory exists (created by wasm-pack) |
| 195 | +if [[ -d "pkg" ]]; then |
| 196 | + log "Generated files in pkg directory:" |
| 197 | + ls -la pkg/ |
| 198 | + |
| 199 | + # Copy to output directory if specified |
| 200 | + if [[ -n "$OUTPUT_DIR" ]]; then |
| 201 | + log "Copying WASM files to output directory: $OUTPUT_DIR" |
| 202 | + mkdir -p "$OUTPUT_DIR" |
| 203 | + cp -r pkg/* "$OUTPUT_DIR/" |
| 204 | + log "Files copied to $OUTPUT_DIR" |
| 205 | + log "Contents of output directory:" |
| 206 | + ls -la "$OUTPUT_DIR" |
| 207 | + fi |
| 208 | +else |
| 209 | + warn "pkg directory not found. Build may have failed or used different output location." |
| 210 | +fi |
| 211 | + |
| 212 | +# Return to original directory |
| 213 | +cd "$ORIGINAL_DIR" |
| 214 | + |
| 215 | +log "Build process completed!" |
| 216 | +log "Built package: $PACKAGE" |
| 217 | +log "Branch: $BRANCH" |
| 218 | +log "JavaScript target: $JS_TARGET" |
| 219 | +if [[ -n "$OUTPUT_DIR" ]]; then |
| 220 | + log "Output copied to: $OUTPUT_DIR" |
| 221 | +fi |
| 222 | + |
| 223 | +# copy the generated hf_xet_thin_wasm_bg.js to the hub package and hf_xet_thin_wasm_bg.wasm to the hub package |
| 224 | +cp "$CLONE_DIR/$PACKAGE/pkg/hf_xet_thin_wasm_bg.js" "./src/vendor/xet-chunk/chunker_wasm_bg.js" |
| 225 | +echo "// Generated by build-xet-wasm.sh" > "./src/vendor/xet-chunk/chunker_wasm_bg.wasm.base64.ts" |
| 226 | +echo "export const wasmBase64 = atob(\`" >> "./src/vendor/xet-chunk/chunker_wasm_bg.wasm.base64.ts" |
| 227 | +base64 "$CLONE_DIR/$PACKAGE/pkg/hf_xet_thin_wasm_bg.wasm" | fold -w 100 >> "./src/vendor/xet-chunk/chunker_wasm_bg.wasm.base64.ts" |
| 228 | +cat << 'EOF' >> "./src/vendor/xet-chunk/chunker_wasm_bg.wasm.base64.ts" |
| 229 | +`).trim().replaceAll("\n", ""); |
| 230 | +const wasmBinary = new Uint8Array(wasmBase64.length); |
| 231 | +for (let i = 0; i < wasmBase64.length; i++) { |
| 232 | + wasmBinary[i] = wasmBase64.charCodeAt(i); |
| 233 | +} |
| 234 | +export { wasmBinary }; |
| 235 | +EOF |
| 236 | + |
| 237 | +echo -e "\n${GREEN}🎉 Success!${NC} Your WASM package is ready to use." |
0 commit comments