Skip to content

Commit ccb6a2e

Browse files
committed
Build thin WASM from xet-core directly
1 parent dcf6188 commit ccb6a2e

File tree

5 files changed

+1915
-2359
lines changed

5 files changed

+1915
-2359
lines changed

packages/hub/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
xet-core-wasm-build

packages/hub/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@
4141
"prepare": "pnpm run build",
4242
"test": "vitest run",
4343
"test:browser": "vitest run --browser.name=chrome --browser.headless --config vitest-browser.config.mts",
44-
"check": "tsc"
44+
"check": "tsc",
45+
"build:xet-wasm": "./scripts/build-xet-wasm.sh -t bundler -c -b hoytak/250714-eliminate-mdb-v1"
4546
},
4647
"files": [
4748
"src",
Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
#!/bin/bash
2+
3+
set -euo pipefail
4+
5+
# Configuration
6+
REPO_URL="https://github.com/huggingface/xet-core.git"
7+
DEFAULT_BRANCH="main"
8+
DEFAULT_PACKAGE="hf_xet_thin_wasm"
9+
DEFAULT_JS_TARGET="web"
10+
CLONE_DIR="xet-core-wasm-build"
11+
12+
# Colors for output
13+
RED='\033[0;31m'
14+
GREEN='\033[0;32m'
15+
YELLOW='\033[1;33m'
16+
BLUE='\033[0;34m'
17+
NC='\033[0m' # No Color
18+
19+
# Function to print colored output
20+
log() {
21+
echo -e "${GREEN}[INFO]${NC} $1"
22+
}
23+
24+
warn() {
25+
echo -e "${YELLOW}[WARN]${NC} $1"
26+
}
27+
28+
error() {
29+
echo -e "${RED}[ERROR]${NC} $1"
30+
exit 1
31+
}
32+
33+
# Function to check if a command exists
34+
command_exists() {
35+
command -v "$1" >/dev/null 2>&1
36+
}
37+
38+
# Help function
39+
show_help() {
40+
cat << EOF
41+
Usage: $0 [OPTIONS]
42+
43+
Build WASM packages from xet-core repository.
44+
45+
OPTIONS:
46+
-b, --branch BRANCH Git branch to checkout (default: $DEFAULT_BRANCH)
47+
-p, --package PACKAGE WASM package to build: hf_xet_thin_wasm or hf_xet_wasm (default: $DEFAULT_PACKAGE)
48+
-t, --target TARGET JavaScript target: web, nodejs, bundler, no-modules, deno (default: $DEFAULT_JS_TARGET)
49+
-o, --output DIR Output directory to copy built WASM files
50+
-c, --clean Clean clone directory before starting
51+
-h, --help Show this help message
52+
53+
EXAMPLES:
54+
$0 # Build hf_xet_thin_wasm from main branch
55+
$0 -b feature-branch # Build from specific branch
56+
$0 -p hf_xet_wasm # Build the full WASM package
57+
$0 -o ./my-project/wasm # Copy output to specific directory
58+
$0 -t nodejs -o ./dist # Build for Node.js and copy to dist
59+
60+
REQUIREMENTS:
61+
- Git
62+
- Rust (will install nightly toolchain automatically)
63+
- Internet connection for downloading dependencies
64+
65+
EOF
66+
}
67+
68+
# Parse command line arguments
69+
BRANCH="$DEFAULT_BRANCH"
70+
PACKAGE="$DEFAULT_PACKAGE"
71+
JS_TARGET="$DEFAULT_JS_TARGET"
72+
OUTPUT_DIR=""
73+
CLEAN=false
74+
ORIGINAL_DIR=$(pwd)
75+
76+
while [[ $# -gt 0 ]]; do
77+
case $1 in
78+
-b|--branch)
79+
BRANCH="$2"
80+
shift 2
81+
;;
82+
-p|--package)
83+
PACKAGE="$2"
84+
if [[ "$PACKAGE" != "hf_xet_thin_wasm" && "$PACKAGE" != "hf_xet_wasm" ]]; then
85+
error "Invalid package: $PACKAGE. Must be 'hf_xet_thin_wasm' or 'hf_xet_wasm'"
86+
fi
87+
shift 2
88+
;;
89+
-t|--target)
90+
JS_TARGET="$2"
91+
shift 2
92+
;;
93+
-o|--output)
94+
OUTPUT_DIR="$2"
95+
shift 2
96+
;;
97+
-c|--clean)
98+
CLEAN=true
99+
shift
100+
;;
101+
-h|--help)
102+
show_help
103+
exit 0
104+
;;
105+
*)
106+
error "Unknown option: $1. Use -h for help."
107+
;;
108+
esac
109+
done
110+
111+
# Check prerequisites
112+
log "Checking prerequisites..."
113+
114+
if ! command_exists git; then
115+
error "Git is not installed. Please install Git first."
116+
fi
117+
118+
if ! command_exists rustup; then
119+
error "Rustup is not installed. Please install Rust from https://rustup.rs/"
120+
fi
121+
122+
# Clean previous build if requested
123+
if [[ "$CLEAN" == true && -d "$CLONE_DIR" ]]; then
124+
log "Cleaning previous build directory: $CLONE_DIR"
125+
rm -rf "$CLONE_DIR"
126+
fi
127+
128+
# Clone the repository
129+
if [[ -d "$CLONE_DIR" ]]; then
130+
log "Directory $CLONE_DIR already exists. Using existing clone."
131+
cd "$CLONE_DIR"
132+
log "Fetching latest changes..."
133+
git fetch origin
134+
git checkout "$BRANCH"
135+
git reset --hard "origin/$BRANCH"
136+
else
137+
log "Cloning xet-core repository (branch: $BRANCH, depth: 1)..."
138+
git clone --depth=1 --branch="$BRANCH" "$REPO_URL" "$CLONE_DIR"
139+
cd "$CLONE_DIR"
140+
fi
141+
142+
log "Repository cloned successfully. Current directory: $(pwd)"
143+
144+
# Install required Rust toolchain and components
145+
log "Setting up Rust toolchain..."
146+
147+
# # Install nightly toolchain
148+
# log "Installing Rust nightly toolchain..."
149+
# rustup toolchain install nightly
150+
151+
# # Add WASM target
152+
# log "Adding wasm32-unknown-unknown target..."
153+
# rustup target add wasm32-unknown-unknown --toolchain nightly
154+
155+
# # Add rust-src component for nightly
156+
# log "Adding rust-src component..."
157+
# rustup component add rust-src --toolchain nightly
158+
159+
# Install required tools
160+
log "Installing wasm-pack and wasm-bindgen-cli..."
161+
if ! command_exists wasm-pack; then
162+
cargo install wasm-pack
163+
else
164+
log "wasm-pack already installed"
165+
fi
166+
167+
if ! command_exists wasm-bindgen; then
168+
cargo install wasm-bindgen-cli
169+
else
170+
log "wasm-bindgen-cli already installed"
171+
fi
172+
173+
# Change to the package directory
174+
log "Building WASM package: $PACKAGE"
175+
cd "$PACKAGE"
176+
177+
# Set environment variable for JS target
178+
export JS_TARGET="$JS_TARGET"
179+
180+
# Build the WASM package
181+
log "Starting WASM build (target: $JS_TARGET)..."
182+
if [[ "$PACKAGE" == "hf_xet_thin_wasm" ]]; then
183+
# Use the existing build script for thin WASM
184+
chmod +x build_wasm.sh
185+
./build_wasm.sh
186+
else
187+
# For hf_xet_wasm, use the more complex build process
188+
chmod +x build_wasm.sh
189+
./build_wasm.sh
190+
fi
191+
192+
log "WASM build completed successfully!"
193+
194+
# Check if pkg directory exists (created by wasm-pack)
195+
if [[ -d "pkg" ]]; then
196+
log "Generated files in pkg directory:"
197+
ls -la pkg/
198+
199+
# Copy to output directory if specified
200+
if [[ -n "$OUTPUT_DIR" ]]; then
201+
log "Copying WASM files to output directory: $OUTPUT_DIR"
202+
mkdir -p "$OUTPUT_DIR"
203+
cp -r pkg/* "$OUTPUT_DIR/"
204+
log "Files copied to $OUTPUT_DIR"
205+
log "Contents of output directory:"
206+
ls -la "$OUTPUT_DIR"
207+
fi
208+
else
209+
warn "pkg directory not found. Build may have failed or used different output location."
210+
fi
211+
212+
# Return to original directory
213+
cd "$ORIGINAL_DIR"
214+
215+
log "Build process completed!"
216+
log "Built package: $PACKAGE"
217+
log "Branch: $BRANCH"
218+
log "JavaScript target: $JS_TARGET"
219+
if [[ -n "$OUTPUT_DIR" ]]; then
220+
log "Output copied to: $OUTPUT_DIR"
221+
fi
222+
223+
# copy the generated hf_xet_thin_wasm_bg.js to the hub package and hf_xet_thin_wasm_bg.wasm to the hub package
224+
cp "$CLONE_DIR/$PACKAGE/pkg/hf_xet_thin_wasm_bg.js" "./src/vendor/xet-chunk/chunker_wasm_bg.js"
225+
echo "// Generated by build-xet-wasm.sh" > "./src/vendor/xet-chunk/chunker_wasm_bg.wasm.base64.ts"
226+
echo "export const wasmBase64 = atob(\`" >> "./src/vendor/xet-chunk/chunker_wasm_bg.wasm.base64.ts"
227+
base64 "$CLONE_DIR/$PACKAGE/pkg/hf_xet_thin_wasm_bg.wasm" | fold -w 100 >> "./src/vendor/xet-chunk/chunker_wasm_bg.wasm.base64.ts"
228+
cat << 'EOF' >> "./src/vendor/xet-chunk/chunker_wasm_bg.wasm.base64.ts"
229+
`).trim().replaceAll("\n", "");
230+
const wasmBinary = new Uint8Array(wasmBase64.length);
231+
for (let i = 0; i < wasmBase64.length; i++) {
232+
wasmBinary[i] = wasmBase64.charCodeAt(i);
233+
}
234+
export { wasmBinary };
235+
EOF
236+
237+
echo -e "\n${GREEN}🎉 Success!${NC} Your WASM package is ready to use."

packages/hub/src/vendor/xet-chunk/chunker_wasm_bg.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ export function __wbg_length_e2d2a49132c1b256(arg0) {
346346
return ret;
347347
};
348348

349-
export function __wbg_log_31c4454272417045(arg0, arg1) {
349+
export function __wbg_log_c5d1a8dc098212af(arg0, arg1) {
350350
console.log(getStringFromWasm0(arg0, arg1));
351351
};
352352

0 commit comments

Comments
 (0)