diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
new file mode 100644
index 0000000..6cd3dec
--- /dev/null
+++ b/.github/workflows/deploy.yml
@@ -0,0 +1,98 @@
+name: Build and Deploy to GitHub Pages
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+  workflow_dispatch:
+
+concurrency:
+  group: pages-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  CARGO_TERM_COLOR: always
+
+jobs:
+  # Build the WASM package and web application
+  build-web:
+    name: Build Web Application
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Rust
+        uses: dtolnay/rust-toolchain@stable
+        with:
+          targets: wasm32-unknown-unknown
+
+      - name: Install wasm-pack
+        run: cargo install wasm-pack
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '20.x'
+
+      - name: Cache cargo registry
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cargo/registry
+            ~/.cargo/git
+            target
+            wasm/target
+          key: ${{ runner.os }}-cargo-wasm-${{ hashFiles('**/Cargo.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-cargo-wasm-
+
+      - name: Cache npm dependencies
+        uses: actions/cache@v4
+        with:
+          path: web/node_modules
+          key: ${{ runner.os }}-npm-${{ hashFiles('web/package-lock.json') }}
+          restore-keys: |
+            ${{ runner.os }}-npm-
+
+      - name: Build WASM package
+        env:
+          RUSTFLAGS: '--cfg getrandom_backend="wasm_js" -C target-feature=+bulk-memory,+mutable-globals,+simd128'
+        run: |
+          cd wasm
+          wasm-pack build --target web --out-dir ../web/src/pkg
+
+      - name: Install npm dependencies
+        run: |
+          cd web
+          npm install
+
+      - name: Build web application
+        run: |
+          cd web
+          npm run build
+
+      - name: Upload artifact for GitHub Pages
+        if: github.event_name != 'pull_request'
+        uses: actions/upload-pages-artifact@v3
+        with:
+          path: web/dist
+
+  # Deploy to GitHub Pages
+  deploy:
+    name: Deploy to GitHub Pages
+    if: github.event_name != 'pull_request' && github.ref == 'refs/heads/main'
+    needs: build-web
+    runs-on: ubuntu-latest
+    permissions:
+      pages: write
+      id-token: write
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    steps:
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4
diff --git a/.gitignore b/.gitignore
index 5289aa5..f66e43e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -54,3 +54,19 @@ doc/
 # Log files
 *.log
 logs/
+
+# Node.js / Web
+node_modules/
+web/node_modules/
+web/dist/
+web/src/pkg/
+.npm
+npm-debug.log*
+
+# WASM build artifacts
+wasm/pkg/
+wasm/target/
+*.wasm
+
+# Server build artifacts
+server/target/
diff --git a/Cargo.lock b/Cargo.lock
index 871796e..d29beeb 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2,6 +2,65 @@
 # It is not intended for manual editing.
 version = 3
 
+[[package]]
+name = "aho-corasick"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "anstream"
+version = "0.6.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
+
+[[package]]
+name = "anstyle-parse"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
+dependencies = [
+ "anstyle",
+ "once_cell_polyfill",
+ "windows-sys 0.61.2",
+]
+
 [[package]]
 name = "async-stream"
 version = "0.3.6"
@@ -24,12 +83,146 @@ dependencies = [
  "syn",
 ]
 
+[[package]]
+name = "atomic-waker"
+version = "1.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
+
+[[package]]
+name = "axum"
+version = "0.8.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8"
+dependencies = [
+ "axum-core",
+ "bytes",
+ "form_urlencoded",
+ "futures-util",
+ "http",
+ "http-body",
+ "http-body-util",
+ "hyper",
+ "hyper-util",
+ "itoa",
+ "matchit",
+ "memchr",
+ "mime",
+ "percent-encoding",
+ "pin-project-lite",
+ "serde_core",
+ "serde_json",
+ "serde_path_to_error",
+ "serde_urlencoded",
+ "sync_wrapper",
+ "tokio",
+ "tower",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "axum-core"
+version = "0.5.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "http",
+ "http-body",
+ "http-body-util",
+ "mime",
+ "pin-project-lite",
+ "sync_wrapper",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "bitflags"
+version = "2.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
+
 [[package]]
 name = "bytes"
 version = "1.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3"
 
+[[package]]
+name = "cfg-if"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+
+[[package]]
+name = "clap"
+version = "4.5.53"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8"
+dependencies = [
+ "clap_builder",
+ "clap_derive",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.5.53"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00"
+dependencies = [
+ "anstream",
+ "anstyle",
+ "clap_lex",
+ "strsim",
+]
+
+[[package]]
+name = "clap_derive"
+version = "4.5.49"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "clap_lex"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
+
+[[package]]
+name = "colorchoice"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
+
+[[package]]
+name = "form_urlencoded"
+version = "1.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf"
+dependencies = [
+ "percent-encoding",
+]
+
+[[package]]
+name = "futures-channel"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
+dependencies = [
+ "futures-core",
+]
+
 [[package]]
 name = "futures-core"
 version = "0.3.31"
@@ -37,19 +230,261 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
 
 [[package]]
-name = "my-package"
+name = "futures-sink"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7"
+
+[[package]]
+name = "futures-task"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
+
+[[package]]
+name = "futures-util"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "pin-project-lite",
+ "pin-utils",
+]
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "http"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a"
+dependencies = [
+ "bytes",
+ "itoa",
+]
+
+[[package]]
+name = "http-body"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
+dependencies = [
+ "bytes",
+ "http",
+]
+
+[[package]]
+name = "http-body-util"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "http",
+ "http-body",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "http-range-header"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9171a2ea8a68358193d15dd5d70c1c10a2afc3e7e4c5bc92bc9f025cebd7359c"
+
+[[package]]
+name = "httparse"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
+
+[[package]]
+name = "httpdate"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
+
+[[package]]
+name = "hyper"
+version = "1.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11"
+dependencies = [
+ "atomic-waker",
+ "bytes",
+ "futures-channel",
+ "futures-core",
+ "http",
+ "http-body",
+ "httparse",
+ "httpdate",
+ "itoa",
+ "pin-project-lite",
+ "pin-utils",
+ "smallvec",
+ "tokio",
+]
+
+[[package]]
+name = "hyper-util"
+version = "0.1.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "http",
+ "http-body",
+ "hyper",
+ "pin-project-lite",
+ "tokio",
+ "tower-service",
+]
+
+[[package]]
+name = "is_terminal_polyfill"
+version = "1.70.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
+
+[[package]]
+name = "itoa"
+version = "1.0.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "libc"
+version = "0.2.178"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091"
+
+[[package]]
+name = "log"
+version = "0.4.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
+
+[[package]]
+name = "matchers"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9"
+dependencies = [
+ "regex-automata",
+]
+
+[[package]]
+name = "matchit"
+version = "0.8.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3"
+
+[[package]]
+name = "memchr"
+version = "2.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
+
+[[package]]
+name = "mime"
+version = "0.3.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
+
+[[package]]
+name = "mime_guess"
+version = "2.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e"
+dependencies = [
+ "mime",
+ "unicase",
+]
+
+[[package]]
+name = "mio"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc"
+dependencies = [
+ "libc",
+ "wasi",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "model-in-browser"
 version = "0.1.0"
 dependencies = [
  "tokio",
  "tokio-test",
 ]
 
+[[package]]
+name = "model-server"
+version = "0.1.0"
+dependencies = [
+ "axum",
+ "clap",
+ "tokio",
+ "tower",
+ "tower-http",
+ "tracing",
+ "tracing-subscriber",
+]
+
+[[package]]
+name = "nu-ansi-term"
+version = "0.50.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+
+[[package]]
+name = "once_cell_polyfill"
+version = "1.70.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
+
+[[package]]
+name = "percent-encoding"
+version = "2.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
+
 [[package]]
 name = "pin-project-lite"
 version = "0.2.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
 
+[[package]]
+name = "pin-utils"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+
 [[package]]
 name = "proc-macro2"
 version = "1.0.103"
@@ -68,6 +503,125 @@ dependencies = [
  "proc-macro2",
 ]
 
+[[package]]
+name = "regex-automata"
+version = "0.4.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
+
+[[package]]
+name = "ryu"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984"
+
+[[package]]
+name = "serde"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
+dependencies = [
+ "serde_core",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.148"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3084b546a1dd6289475996f182a22aba973866ea8e8b02c51d9f46b1336a22da"
+dependencies = [
+ "itoa",
+ "memchr",
+ "serde",
+ "serde_core",
+ "zmij",
+]
+
+[[package]]
+name = "serde_path_to_error"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457"
+dependencies = [
+ "itoa",
+ "serde",
+ "serde_core",
+]
+
+[[package]]
+name = "serde_urlencoded"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
+dependencies = [
+ "form_urlencoded",
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "sharded-slab"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
+dependencies = [
+ "lazy_static",
+]
+
+[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+
+[[package]]
+name = "socket2"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881"
+dependencies = [
+ "libc",
+ "windows-sys 0.60.2",
+]
+
+[[package]]
+name = "strsim"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
+
 [[package]]
 name = "syn"
 version = "2.0.111"
@@ -79,14 +633,34 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "sync_wrapper"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
+
+[[package]]
+name = "thread_local"
+version = "1.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
+dependencies = [
+ "cfg-if",
+]
+
 [[package]]
 name = "tokio"
 version = "1.48.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408"
 dependencies = [
+ "bytes",
+ "libc",
+ "mio",
  "pin-project-lite",
+ "socket2",
  "tokio-macros",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -124,8 +698,256 @@ dependencies = [
  "tokio-stream",
 ]
 
+[[package]]
+name = "tokio-util"
+version = "0.7.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "futures-sink",
+ "pin-project-lite",
+ "tokio",
+]
+
+[[package]]
+name = "tower"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
+dependencies = [
+ "futures-core",
+ "futures-util",
+ "pin-project-lite",
+ "sync_wrapper",
+ "tokio",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "tower-http"
+version = "0.6.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8"
+dependencies = [
+ "bitflags",
+ "bytes",
+ "futures-core",
+ "futures-util",
+ "http",
+ "http-body",
+ "http-body-util",
+ "http-range-header",
+ "httpdate",
+ "mime",
+ "mime_guess",
+ "percent-encoding",
+ "pin-project-lite",
+ "tokio",
+ "tokio-util",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "tower-layer"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
+
+[[package]]
+name = "tower-service"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
+
+[[package]]
+name = "tracing"
+version = "0.1.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
+dependencies = [
+ "log",
+ "pin-project-lite",
+ "tracing-attributes",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-attributes"
+version = "0.1.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tracing-core"
+version = "0.1.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
+dependencies = [
+ "once_cell",
+ "valuable",
+]
+
+[[package]]
+name = "tracing-log"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
+dependencies = [
+ "log",
+ "once_cell",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-subscriber"
+version = "0.3.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e"
+dependencies = [
+ "matchers",
+ "nu-ansi-term",
+ "once_cell",
+ "regex-automata",
+ "sharded-slab",
+ "smallvec",
+ "thread_local",
+ "tracing",
+ "tracing-core",
+ "tracing-log",
+]
+
+[[package]]
+name = "unicase"
+version = "2.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539"
+
 [[package]]
 name = "unicode-ident"
 version = "1.0.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
+
+[[package]]
+name = "utf8parse"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
+
+[[package]]
+name = "valuable"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
+
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
+[[package]]
+name = "windows-sys"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.53.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
+dependencies = [
+ "windows-link",
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
+
+[[package]]
+name = "zmij"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0f4a4e8e9dc5c62d159f04fcdbe07f4c3fb710415aab4754bf11505501e3251d"
diff --git a/Cargo.toml b/Cargo.toml
index 2da51af..69f63e4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,22 +1,29 @@
+[workspace]
+members = [".", "server"]
+# Note: "wasm" is excluded from workspace as it requires wasm32 target
+# Build it separately with: cd wasm && wasm-pack build --target web
+exclude = ["wasm"]
+resolver = "2"
+
 [package]
-name = "my-package"
+name = "model-in-browser"
 version = "0.1.0"
 edition = "2021"
-description = "A Rust package template for AI-driven development"
+description = "Browser-based LLM inference using SmolLM2 and WebAssembly"
 readme = "README.md"
 license = "Unlicense"
-keywords = ["template", "rust", "ai-driven"]
-categories = ["development-tools"]
-repository = "https://github.com/link-foundation/rust-ai-driven-development-pipeline-template"
-documentation = "https://github.com/link-foundation/rust-ai-driven-development-pipeline-template"
+keywords = ["llm", "wasm", "browser", "smollm2", "ai"]
+categories = ["wasm", "web-programming"]
+repository = "https://github.com/link-assistant/model-in-browser"
+documentation = "https://github.com/link-assistant/model-in-browser"
 rust-version = "1.70"
 
 [lib]
-name = "my_package"
+name = "model_in_browser"
 path = "src/lib.rs"
 
 [[bin]]
-name = "my-package"
+name = "model-in-browser"
 path = "src/main.rs"
 
 [dependencies]
diff --git a/README.md b/README.md
index 920048d..0b6a4a8 100644
--- a/README.md
+++ b/README.md
@@ -1,286 +1,182 @@
-# rust-ai-driven-development-pipeline-template
+# Model in Browser
 
-A comprehensive template for AI-driven Rust development with full CI/CD pipeline support.
+Run [SmolLM2](https://huggingface.co/collections/HuggingFaceTB/smollm2) language model directly in your web browser using WebAssembly - no server processing required!
 
-[![CI/CD Pipeline](https://github.com/link-foundation/rust-ai-driven-development-pipeline-template/workflows/CI%2FCD%20Pipeline/badge.svg)](https://github.com/link-foundation/rust-ai-driven-development-pipeline-template/actions)
-[![Rust Version](https://img.shields.io/badge/rust-1.70%2B-blue.svg)](https://www.rust-lang.org/)
+[![CI/CD Pipeline](https://github.com/link-assistant/model-in-browser/workflows/CI%2FCD%20Pipeline/badge.svg)](https://github.com/link-assistant/model-in-browser/actions)
+[![Deploy to GitHub Pages](https://github.com/link-assistant/model-in-browser/workflows/Build%20and%20Deploy%20to%20GitHub%20Pages/badge.svg)](https://github.com/link-assistant/model-in-browser/actions)
 [![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](http://unlicense.org/)
 
 ## Features
 
-- **Rust stable support**: Works with Rust stable version
-- **Cross-platform testing**: CI runs on Ubuntu, macOS, and Windows
-- **Comprehensive testing**: Unit tests, integration tests, and doc tests
-- **Code quality**: rustfmt + Clippy with pedantic lints
-- **Pre-commit hooks**: Automated code quality checks before commits
-- **CI/CD pipeline**: GitHub Actions with multi-platform support
-- **Changelog management**: Fragment-based changelog (like Changesets/Scriv)
-- **Release automation**: Automatic GitHub releases
+- **100% Client-Side**: All AI inference happens in your browser - no data sent to servers
+- **WebAssembly Powered**: Rust compiled to WASM for near-native performance
+- **Web Worker**: Model runs in background thread for responsive UI
+- **React Chat UI**: Modern chat interface using [@chatscope/chat-ui-kit-react](https://github.com/chatscope/chat-ui-kit-react)
+- **SmolLM2-135M**: Compact 135M parameter model optimized for edge deployment
+- **GitHub Pages Ready**: Deploy as a static site with no backend required
 
-## Quick Start
+## Demo
 
-### Using This Template
+Visit the [live demo](https://link-assistant.github.io/model-in-browser/) to try the model in your browser.
 
-1. Click "Use this template" on GitHub to create a new repository
-2. Clone your new repository
-3. Update `Cargo.toml` with your package name and description
-4. Rename the library and binary in `Cargo.toml`
-5. Update imports in tests and examples
-6. Build and start developing!
+> **Note**: First load downloads ~270MB of model weights. The model is cached by your browser for subsequent visits.
 
-### Development Setup
+## Architecture
 
-```bash
-# Clone the repository
-git clone https://github.com/link-foundation/rust-ai-driven-development-pipeline-template.git
-cd rust-ai-driven-development-pipeline-template
-
-# Build the project
-cargo build
+```
+┌─────────────────────────────────────────────────────────────┐
+│                        Browser                               │
+│  ┌─────────────────┐    ┌─────────────────────────────────┐ │
+│  │   React Chat    │    │        Web Worker                │ │
+│  │   UI (Main      │◄──►│  ┌─────────────────────────┐    │ │
+│  │   Thread)       │    │  │   WASM Module           │    │ │
+│  │                 │    │  │  ┌─────────────────┐    │    │ │
+│  │  @chatscope/    │    │  │  │  Candle (Rust)  │    │    │ │
+│  │  chat-ui-kit    │    │  │  │  SmolLM2-135M   │    │    │ │
+│  └─────────────────┘    │  │  └─────────────────┘    │    │ │
+│                         │  └─────────────────────────┘    │ │
+│                         └─────────────────────────────────┘ │
+└─────────────────────────────────────────────────────────────┘
+```
 
-# Run tests
-cargo test
+## Quick Start
 
-# Run the example binary
-cargo run
+### Prerequisites
 
-# Run an example
-cargo run --example basic_usage
-```
+- [Rust](https://rustup.rs/) (1.70+)
+- [wasm-pack](https://rustwasm.github.io/wasm-pack/installer/)
+- [Node.js](https://nodejs.org/) (18+)
 
-### Running Tests
+### Development Setup
 
 ```bash
-# Run all tests
-cargo test
-
-# Run tests with verbose output
-cargo test --verbose
-
-# Run doc tests
-cargo test --doc
+# Clone the repository
+git clone https://github.com/link-assistant/model-in-browser.git
+cd model-in-browser
 
-# Run a specific test
-cargo test test_add_positive_numbers
+# Build the WASM package
+./scripts/build-wasm.sh
 
-# Run tests with output
-cargo test -- --nocapture
+# Install web dependencies and start dev server
+cd web
+npm install
+npm run dev
 ```
 
-### Code Quality Checks
+Open http://localhost:5173 in your browser.
 
-```bash
-# Format code
-cargo fmt
-
-# Check formatting (CI style)
-cargo fmt --check
+### Build for Production
 
-# Run Clippy lints
-cargo clippy --all-targets --all-features
+```bash
+# Build WASM
+cd wasm && wasm-pack build --target web --out-dir ../web/src/pkg
 
-# Check file size limits
-python3 scripts/check_file_size.py
+# Build web app
+cd web && npm run build
 
-# Run all checks
-cargo fmt --check && cargo clippy --all-targets --all-features && python3 scripts/check_file_size.py
+# Serve with the Rust server
+cargo run --manifest-path server/Cargo.toml -- --dir web/dist
 ```
 
 ## Project Structure
 
 ```
 .
-├── .github/
-│   └── workflows/
-│       └── release.yml         # CI/CD pipeline configuration
-├── changelog.d/                # Changelog fragments
-│   ├── README.md               # Fragment instructions
-│   └── *.md                    # Individual changelog entries
-├── examples/
-│   └── basic_usage.rs          # Usage examples
-├── scripts/
-│   ├── bump_version.py         # Version bumping utility
-│   ├── check_file_size.py      # File size validation script
-│   ├── collect_changelog.py    # Changelog collection script
-│   ├── create_github_release.py # GitHub release creation
-│   └── version_and_commit.py   # CI/CD version management
-├── src/
-│   ├── lib.rs                  # Library entry point
-│   └── main.rs                 # Binary entry point
-├── tests/
-│   └── integration_test.rs     # Integration tests
-├── .gitignore                  # Git ignore patterns
-├── .pre-commit-config.yaml     # Pre-commit hooks configuration
-├── Cargo.toml                  # Project configuration
-├── CHANGELOG.md                # Project changelog
-├── CONTRIBUTING.md             # Contribution guidelines
-├── LICENSE                     # Unlicense (public domain)
-└── README.md                   # This file
+├── wasm/                    # Rust WASM library for model inference
+│   ├── src/lib.rs           # SmolLM2 WASM bindings
+│   └── Cargo.toml           # WASM package config
+├── web/                     # React web application
+│   ├── src/
+│   │   ├── App.tsx          # Main chat component
+│   │   ├── worker.ts        # Web Worker for inference
+│   │   └── pkg/             # Built WASM package
+│   ├── package.json
+│   └── vite.config.ts
+├── server/                  # Local development server
+│   └── src/main.rs          # Axum server with CORS
+├── .github/workflows/
+│   ├── release.yml          # CI/CD pipeline
+│   └── deploy.yml           # GitHub Pages deployment
+└── scripts/
+    ├── build-wasm.sh        # Build WASM package
+    └── dev.sh               # Start development environment
 ```
 
-## Design Choices
+## How It Works
 
-### Code Quality Tools
+1. **Model Loading**: When you click "Load Model", the web app downloads:
+   - Model weights (~270MB safetensors file)
+   - Tokenizer configuration
+   - Model configuration
 
-- **rustfmt**: Standard Rust code formatter
-  - Ensures consistent code style across the project
-  - Configured to run on all Rust files
+2. **Web Worker**: The WASM module runs in a Web Worker to keep the UI responsive during inference.
 
-- **Clippy**: Rust linter with comprehensive checks
-  - Pedantic and nursery lints enabled for strict code quality
-  - Catches common mistakes and suggests improvements
-  - Enforces best practices
+3. **Text Generation**: The model uses the LLaMA architecture implemented in [Candle](https://github.com/huggingface/candle), HuggingFace's minimalist ML framework for Rust.
 
-- **Pre-commit hooks**: Automated checks before each commit
-  - Runs rustfmt to ensure formatting
-  - Runs Clippy to catch issues early
-  - Runs tests to prevent broken commits
+4. **Streaming Output**: Tokens are generated one at a time and streamed to the chat UI for real-time response display.
 
-### Testing Strategy
+## Technology Stack
 
-The template supports multiple levels of testing:
+- **Inference Engine**: [Candle](https://github.com/huggingface/candle) - Rust ML framework with WASM support
+- **Model**: [SmolLM2-135M-Instruct](https://huggingface.co/HuggingFaceTB/SmolLM2-135M-Instruct)
+- **Frontend**: React 18 with TypeScript
+- **Chat UI**: [@chatscope/chat-ui-kit-react](https://chatscope.io/)
+- **Build Tool**: Vite
+- **WASM Toolchain**: wasm-pack, wasm-bindgen
 
-- **Unit tests**: In `src/lib.rs` using `#[cfg(test)]` modules
-- **Integration tests**: In `tests/` directory
-- **Doc tests**: In documentation examples using `///` comments
-- **Examples**: In `examples/` directory (also serve as documentation)
+## Browser Requirements
 
-### Changelog Management
+- Modern browser with WebAssembly support
+- ~512MB free memory for model loading
+- Chrome, Firefox, Safari, or Edge (latest versions)
 
-This template uses a fragment-based changelog system similar to:
-- [Changesets](https://github.com/changesets/changesets) (JavaScript)
-- [Scriv](https://scriv.readthedocs.io/) (Python)
+## Development
 
-Benefits:
-- **No merge conflicts**: Multiple PRs can add fragments without conflicts
-- **Per-PR documentation**: Each PR documents its own changes
-- **Automated collection**: Fragments are collected during release
-- **Consistent format**: Template ensures consistent changelog entries
+### Running Tests
 
 ```bash
-# Create a changelog fragment
-touch changelog.d/$(date +%Y%m%d_%H%M%S)_my_change.md
-
-# Edit the fragment to document your changes
-```
-
-### CI/CD Pipeline
-
-The GitHub Actions workflow provides:
-
-1. **Linting**: rustfmt and Clippy checks
-2. **Changelog check**: Warns if PRs are missing changelog fragments
-3. **Test matrix**: 3 OS (Ubuntu, macOS, Windows) with Rust stable
-4. **Building**: Release build and package validation
-5. **Release**: Automated GitHub releases when version changes
-
-### Release Automation
-
-The release workflow supports:
-
-- **Auto-release**: Automatically creates releases when version in Cargo.toml changes
-- **Manual release**: Trigger releases via workflow_dispatch with version bump type
-- **Changelog collection**: Automatically collects fragments during release
-- **GitHub releases**: Automatic creation with CHANGELOG content
-
-## Configuration
-
-### Updating Package Name
-
-After creating a repository from this template:
-
-1. Update `Cargo.toml`:
-   - Change `name` field
-   - Update `repository` and `documentation` URLs
-   - Change `[lib]` and `[[bin]]` names
-
-2. Rename the crate in imports:
-   - `tests/integration_test.rs`
-   - `examples/basic_usage.rs`
-   - `src/main.rs`
-
-### Clippy Configuration
-
-Clippy is configured in `Cargo.toml` under `[lints.clippy]`:
-
-- Pedantic lints enabled for strict code quality
-- Nursery lints enabled for additional checks
-- Some common patterns allowed (e.g., `module_name_repetitions`)
-
-### rustfmt Configuration
-
-Uses default rustfmt settings. To customize, create a `rustfmt.toml`:
+# Rust tests
+cargo test
 
-```toml
-edition = "2021"
-max_width = 100
-tab_spaces = 4
+# Web tests
+cd web && npm test
 ```
 
-## Scripts Reference
-
-| Script                              | Description                    |
-| ----------------------------------- | ------------------------------ |
-| `cargo test`                        | Run all tests                  |
-| `cargo fmt`                         | Format code                    |
-| `cargo clippy`                      | Run lints                      |
-| `cargo run --example basic_usage`   | Run example                    |
-| `python3 scripts/check_file_size.py`| Check file size limits         |
-| `python3 scripts/bump_version.py`   | Bump version                   |
+### Code Quality
 
-## Example Usage
-
-```rust
-use my_package::{add, multiply, delay};
-
-fn main() {
-    // Basic arithmetic
-    let sum = add(2, 3);     // 5
-    let product = multiply(2, 3);  // 6
+```bash
+# Format Rust code
+cargo fmt
 
-    println!("2 + 3 = {sum}");
-    println!("2 * 3 = {product}");
-}
+# Run Clippy lints
+cargo clippy --all-targets --all-features
 
-// Async operations
-#[tokio::main]
-async fn main() {
-    delay(1.0).await;  // Wait for 1 second
-}
+# Lint web code
+cd web && npm run lint
 ```
 
-See `examples/basic_usage.rs` for more examples.
-
 ## Contributing
 
 Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
 
-### Development Workflow
-
 1. Fork the repository
-2. Create a feature branch: `git checkout -b feature/my-feature`
-3. Make your changes and add tests
-4. Run quality checks: `cargo fmt && cargo clippy && cargo test`
-5. Add a changelog fragment
-6. Commit your changes (pre-commit hooks will run automatically)
-7. Push and create a Pull Request
+2. Create a feature branch
+3. Make your changes with tests
+4. Add a changelog fragment in `changelog.d/`
+5. Submit a pull request
 
 ## License
 
 [Unlicense](LICENSE) - Public Domain
 
-This is free and unencumbered software released into the public domain. See [LICENSE](LICENSE) for details.
-
 ## Acknowledgments
 
-Inspired by:
-- [js-ai-driven-development-pipeline-template](https://github.com/link-foundation/js-ai-driven-development-pipeline-template)
-- [python-ai-driven-development-pipeline-template](https://github.com/link-foundation/python-ai-driven-development-pipeline-template)
+- [HuggingFace](https://huggingface.co/) for SmolLM2 and Candle
+- [Candle](https://github.com/huggingface/candle) team for the WASM-compatible ML framework
+- [chatscope](https://chatscope.io/) for the React chat UI components
 
 ## Resources
 
-- [Rust Book](https://doc.rust-lang.org/book/)
-- [Cargo Book](https://doc.rust-lang.org/cargo/)
-- [Clippy Documentation](https://rust-lang.github.io/rust-clippy/)
-- [rustfmt Documentation](https://rust-lang.github.io/rustfmt/)
-- [Pre-commit Documentation](https://pre-commit.com/)
+- [SmolLM2 Collection](https://huggingface.co/collections/HuggingFaceTB/smollm2)
+- [Candle WASM Examples](https://github.com/huggingface/candle/tree/main/candle-wasm-examples)
+- [WebAssembly Rust Guide](https://rustwasm.github.io/docs/book/)
diff --git a/changelog.d/20251229_smollm2_browser_inference.md b/changelog.d/20251229_smollm2_browser_inference.md
new file mode 100644
index 0000000..6f0f6b8
--- /dev/null
+++ b/changelog.d/20251229_smollm2_browser_inference.md
@@ -0,0 +1,8 @@
+### Added
+- SmolLM2 language model running entirely in browser via WebAssembly
+- Rust WASM library using Candle ML framework for model inference
+- Web Worker for background model processing to keep UI responsive
+- React chat UI using @chatscope/chat-ui-kit-react
+- Local Rust development server with CORS support
+- GitHub Pages deployment workflow
+- Support for streaming token generation
diff --git a/examples/basic_usage.rs b/examples/basic_usage.rs
index bdbd1fc..afa5e65 100644
--- a/examples/basic_usage.rs
+++ b/examples/basic_usage.rs
@@ -1,10 +1,10 @@
-//! Basic usage example for my-package.
+//! Basic usage example for model-in-browser.
 //!
 //! This example demonstrates the basic functionality of the package.
 //!
 //! Run with: `cargo run --example basic_usage`
 
-use my_package::{add, delay, multiply};
+use model_in_browser::{add, delay, multiply};
 
 #[tokio::main]
 async fn main() {
diff --git a/scripts/build-wasm.sh b/scripts/build-wasm.sh
new file mode 100755
index 0000000..87dbb17
--- /dev/null
+++ b/scripts/build-wasm.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+# Build the WASM package for browser use
+
+set -e
+
+echo "Building SmolLM2 WASM package..."
+
+cd "$(dirname "$0")/../wasm"
+
+# Check if wasm-pack is installed
+if ! command -v wasm-pack &> /dev/null; then
+    echo "Error: wasm-pack is not installed."
+    echo "Install it with: cargo install wasm-pack"
+    exit 1
+fi
+
+# Build for web target
+wasm-pack build --target web --out-dir ../web/src/pkg
+
+echo "WASM package built successfully!"
+echo "Output: web/src/pkg/"
diff --git a/scripts/dev.sh b/scripts/dev.sh
new file mode 100755
index 0000000..ec912c0
--- /dev/null
+++ b/scripts/dev.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+# Start the development environment
+
+set -e
+
+cd "$(dirname "$0")/.."
+
+echo "Starting development environment..."
+
+# Build WASM if not already built
+if [ ! -d "web/src/pkg" ]; then
+    echo "Building WASM package..."
+    ./scripts/build-wasm.sh
+fi
+
+# Install npm dependencies if needed
+if [ ! -d "web/node_modules" ]; then
+    echo "Installing npm dependencies..."
+    cd web && npm install && cd ..
+fi
+
+# Start the Vite dev server
+echo "Starting Vite dev server..."
+cd web && npm run dev
diff --git a/server/Cargo.toml b/server/Cargo.toml
new file mode 100644
index 0000000..4980a21
--- /dev/null
+++ b/server/Cargo.toml
@@ -0,0 +1,29 @@
+[package]
+name = "model-server"
+version = "0.1.0"
+edition = "2021"
+description = "Local development server for the browser-based LLM demo"
+license = "Unlicense"
+repository = "https://github.com/link-assistant/model-in-browser"
+
+[[bin]]
+name = "model-server"
+path = "src/main.rs"
+
+[dependencies]
+axum = "0.8"
+tokio = { version = "1.0", features = ["rt-multi-thread", "macros", "fs"] }
+tower = "0.5"
+tower-http = { version = "0.6", features = ["cors", "fs"] }
+tracing = "0.1"
+tracing-subscriber = { version = "0.3", features = ["env-filter"] }
+clap = { version = "4.5", features = ["derive"] }
+
+[lints.rust]
+unsafe_code = "forbid"
+
+[lints.clippy]
+all = { level = "warn", priority = -1 }
+pedantic = { level = "warn", priority = -1 }
+nursery = { level = "warn", priority = -1 }
+module_name_repetitions = "allow"
diff --git a/server/src/main.rs b/server/src/main.rs
new file mode 100644
index 0000000..b242cd5
--- /dev/null
+++ b/server/src/main.rs
@@ -0,0 +1,98 @@
+//! Local development server for the browser-based LLM demo.
+//!
+//! This server provides:
+//! - Static file serving for the web application
+//! - CORS headers for local development
+//! - Cross-Origin-Opener-Policy and Cross-Origin-Embedder-Policy headers
+//!   required for SharedArrayBuffer (used by some WASM features)
+
+use axum::Router;
+use clap::Parser;
+use std::net::SocketAddr;
+use std::path::PathBuf;
+use tower_http::{
+    cors::{Any, CorsLayer},
+    services::ServeDir,
+};
+use tracing::{info, Level};
+use tracing_subscriber::FmtSubscriber;
+
+/// Local development server for the browser-based LLM demo.
+#[derive(Parser, Debug)]
+#[command(author, version, about, long_about = None)]
+struct Args {
+    /// Port to listen on
+    #[arg(short, long, default_value_t = 8080)]
+    port: u16,
+
+    /// Directory to serve static files from
+    #[arg(short, long, default_value = "../web/dist")]
+    dir: PathBuf,
+
+    /// Enable verbose logging
+    #[arg(short, long)]
+    verbose: bool,
+}
+
+#[tokio::main]
+async fn main() {
+    let args = Args::parse();
+
+    // Initialize logging
+    let log_level = if args.verbose {
+        Level::DEBUG
+    } else {
+        Level::INFO
+    };
+
+    let subscriber = FmtSubscriber::builder().with_max_level(log_level).finish();
+
+    tracing::subscriber::set_global_default(subscriber).expect("Failed to set tracing subscriber");
+
+    // Verify the static directory exists
+    if !args.dir.exists() {
+        eprintln!(
+            "Error: Static directory '{}' does not exist.",
+            args.dir.display()
+        );
+        eprintln!("Make sure to build the web application first:");
+        eprintln!("  cd ../web && npm run build");
+        std::process::exit(1);
+    }
+
+    // Setup CORS for local development
+    let cors = CorsLayer::new()
+        .allow_origin(Any)
+        .allow_methods(Any)
+        .allow_headers(Any);
+
+    // Create the router with static file serving
+    let app = Router::new()
+        .nest_service("/", ServeDir::new(&args.dir))
+        .layer(cors)
+        .layer(tower::ServiceBuilder::new().map_response(
+            |mut response: axum::response::Response| {
+                // Add headers required for SharedArrayBuffer
+                response
+                    .headers_mut()
+                    .insert("Cross-Origin-Opener-Policy", "same-origin".parse().unwrap());
+                response.headers_mut().insert(
+                    "Cross-Origin-Embedder-Policy",
+                    "require-corp".parse().unwrap(),
+                );
+                response
+            },
+        ));
+
+    let addr = SocketAddr::from(([127, 0, 0, 1], args.port));
+
+    info!("Starting server at http://{}", addr);
+    info!("Serving files from: {}", args.dir.display());
+    info!("Press Ctrl+C to stop");
+
+    let listener = tokio::net::TcpListener::bind(addr)
+        .await
+        .expect("Failed to bind to address");
+
+    axum::serve(listener, app).await.expect("Server failed");
+}
diff --git a/src/lib.rs b/src/lib.rs
index ff6e523..ba77f6b 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,6 +1,7 @@
-//! Example module entry point.
+//! Browser-based LLM inference library using `SmolLM2` and WebAssembly.
 //!
-//! Replace this with your actual implementation.
+//! This crate provides utilities for running the `SmolLM2` language model
+//! directly in web browsers via WebAssembly, with no server-side processing.
 
 /// Package version (matches Cargo.toml version).
 pub const VERSION: &str = env!("CARGO_PKG_VERSION");
@@ -19,7 +20,7 @@ pub const VERSION: &str = env!("CARGO_PKG_VERSION");
 /// # Examples
 ///
 /// ```
-/// use my_package::add;
+/// use model_in_browser::add;
 /// assert_eq!(add(2, 3), 5);
 /// ```
 #[must_use]
@@ -41,7 +42,7 @@ pub const fn add(a: i64, b: i64) -> i64 {
 /// # Examples
 ///
 /// ```
-/// use my_package::multiply;
+/// use model_in_browser::multiply;
 /// assert_eq!(multiply(2, 3), 6);
 /// ```
 #[must_use]
@@ -58,7 +59,7 @@ pub const fn multiply(a: i64, b: i64) -> i64 {
 /// # Examples
 ///
 /// ```
-/// use my_package::delay;
+/// use model_in_browser::delay;
 ///
 /// #[tokio::main]
 /// async fn main() {
diff --git a/src/main.rs b/src/main.rs
index bffc9ba..dd4d728 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,29 +1,33 @@
-//! Example binary entry point.
+//! CLI for the browser-based LLM project.
 //!
-//! This is a simple CLI that demonstrates the library functionality.
+//! This binary provides information about the project and can be used
+//! for local testing.
 
-use my_package::{add, delay, multiply};
+use model_in_browser::{add, delay, multiply};
 
 #[tokio::main]
 async fn main() {
-    println!("my-package v{}", my_package::VERSION);
+    println!("Model in Browser v{}", model_in_browser::VERSION);
+    println!();
+    println!("This project enables running SmolLM2 language model");
+    println!("directly in web browsers via WebAssembly.");
     println!();
 
-    // Example 1: Basic arithmetic
-    println!("Example 1: Basic arithmetic");
-    println!("2 + 3 = {}", add(2, 3));
-    println!("2 * 3 = {}", multiply(2, 3));
+    // Quick functionality test
+    println!("Quick self-test:");
+    println!("  2 + 3 = {}", add(2, 3));
+    println!("  2 * 3 = {}", multiply(2, 3));
     println!();
 
-    // Example 2: Working with larger numbers
-    println!("Example 2: Working with larger numbers");
-    println!("1000 + 2000 = {}", add(1000, 2000));
-    println!("100 * 200 = {}", multiply(100, 200));
+    println!("Testing async functionality...");
+    delay(0.5).await;
+    println!("Async test complete!");
     println!();
 
-    // Example 3: Async delay
-    println!("Example 3: Async delay");
-    println!("Waiting for 1 second...");
-    delay(1.0).await;
-    println!("Done!");
+    println!("To start the web application:");
+    println!("  1. Build the WASM package: ./scripts/build-wasm.sh");
+    println!("  2. Start the dev server:   cd web && npm run dev");
+    println!();
+    println!("For the production server:");
+    println!("  cargo run --manifest-path server/Cargo.toml");
 }
diff --git a/tests/integration_test.rs b/tests/integration_test.rs
index 89fa538..ec462c1 100644
--- a/tests/integration_test.rs
+++ b/tests/integration_test.rs
@@ -1,8 +1,8 @@
-//! Integration tests for my-package.
+//! Integration tests for model-in-browser.
 //!
 //! These tests verify the public API works correctly.
 
-use my_package::{add, delay, multiply};
+use model_in_browser::{add, delay, multiply};
 
 mod add_integration_tests {
     use super::*;
@@ -73,7 +73,7 @@ mod delay_integration_tests {
 }
 
 mod version_tests {
-    use my_package::VERSION;
+    use model_in_browser::VERSION;
 
     #[test]
     fn test_version_is_not_empty() {
diff --git a/wasm/.cargo/config.toml b/wasm/.cargo/config.toml
new file mode 100644
index 0000000..851d929
--- /dev/null
+++ b/wasm/.cargo/config.toml
@@ -0,0 +1,11 @@
+# Configuration for building the WASM package
+# This is required for getrandom to work on wasm32-unknown-unknown target
+
+[target.wasm32-unknown-unknown]
+rustflags = [
+    '--cfg', 'getrandom_backend="wasm_js"',
+    '-C', 'target-feature=+bulk-memory,+mutable-globals,+simd128'
+]
+
+[build]
+target = "wasm32-unknown-unknown"
diff --git a/wasm/Cargo.toml b/wasm/Cargo.toml
new file mode 100644
index 0000000..c6fadd8
--- /dev/null
+++ b/wasm/Cargo.toml
@@ -0,0 +1,72 @@
+# This package is intentionally NOT part of the parent workspace
+# because it requires the wasm32-unknown-unknown target
+[workspace]
+
+[package]
+name = "smollm2-wasm"
+version = "0.1.0"
+edition = "2021"
+description = "SmolLM2 inference compiled to WebAssembly for browser-based LLM"
+license = "Unlicense"
+repository = "https://github.com/link-assistant/model-in-browser"
+
+[lib]
+crate-type = ["cdylib", "rlib"]
+
+[dependencies]
+wasm-bindgen = "0.2"
+wasm-bindgen-futures = "0.4"
+js-sys = "0.3"
+web-sys = { version = "0.3", features = [
+    "console",
+    "Window",
+    "Performance",
+    "WorkerGlobalScope",
+] }
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+serde-wasm-bindgen = "0.6"
+console_error_panic_hook = "0.1"
+
+# Random number generation for WASM
+# getrandom 0.2.x is used by some dependencies, 0.3.x by others
+# Both need JS feature enabled for wasm32-unknown-unknown
+getrandom_02 = { package = "getrandom", version = "0.2", features = ["js"] }
+getrandom = { version = "0.3", features = ["wasm_js"] }
+
+# Candle ML framework
+candle-core = { version = "0.8", default-features = false }
+candle-nn = { version = "0.8", default-features = false }
+candle-transformers = { version = "0.8", default-features = false }
+
+# Tokenizer
+tokenizers = { version = "0.20", default-features = false, features = ["unstable_wasm"] }
+
+[dev-dependencies]
+wasm-bindgen-test = "0.3"
+
+[profile.release]
+lto = true
+opt-level = "z"
+codegen-units = 1
+strip = true
+
+# Configure wasm-opt to enable required WASM features
+# Required since Rust 1.87+ / LLVM 20 generates modern WASM features by default:
+# - bulk-memory: For memory.copy, memory.fill operations
+# - mutable-globals: For mutable global variables
+# - simd: For SIMD vector operations (used by candle)
+# - nontrapping-float-to-int: For saturating float conversions (i32.trunc_sat_*)
+# - sign-ext: For sign extension operations
+# - reference-types: For reference type operations
+[package.metadata.wasm-pack.profile.release]
+wasm-opt = ["-O", "--enable-bulk-memory", "--enable-mutable-globals", "--enable-simd", "--enable-nontrapping-float-to-int", "--enable-sign-ext", "--enable-reference-types"]
+
+[package.metadata.wasm-pack.profile.dev]
+wasm-opt = false
+
+[lints.rust]
+unsafe_code = "warn"
+
+[lints.clippy]
+all = { level = "warn", priority = -1 }
diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs
new file mode 100644
index 0000000..fb894d1
--- /dev/null
+++ b/wasm/src/lib.rs
@@ -0,0 +1,322 @@
+//! SmolLM2 WASM inference library for browser-based LLM.
+//!
+//! This module provides WebAssembly bindings for running SmolLM2-135M
+//! language model inference directly in the browser without server processing.
+
+use candle_core::{Device, Tensor};
+use candle_nn::VarBuilder;
+use candle_transformers::generation::LogitsProcessor;
+use candle_transformers::models::llama::{Cache, Config, Llama, LlamaConfig};
+use serde::{Deserialize, Serialize};
+use std::sync::Mutex;
+use tokenizers::Tokenizer;
+use wasm_bindgen::prelude::*;
+
+// Console logging for debugging
+#[wasm_bindgen]
+extern "C" {
+    #[wasm_bindgen(js_namespace = console)]
+    fn log(s: &str);
+    #[wasm_bindgen(js_namespace = console)]
+    fn error(s: &str);
+}
+
+/// Macro for console logging in WASM context.
+macro_rules! console_log {
+    ($($t:tt)*) => (log(&format!($($t)*)))
+}
+
+/// Generation parameters for text completion.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct GenerationParams {
+    /// Maximum number of tokens to generate.
+    pub max_tokens: usize,
+    /// Temperature for sampling (0.0 = greedy, higher = more random).
+    pub temperature: f64,
+    /// Top-p (nucleus) sampling threshold.
+    pub top_p: f64,
+    /// Repeat penalty to avoid repetitive text.
+    pub repeat_penalty: f32,
+    /// Number of tokens to consider for repeat penalty.
+    pub repeat_last_n: usize,
+    /// Random seed for reproducibility.
+    pub seed: u64,
+}
+
+impl Default for GenerationParams {
+    fn default() -> Self {
+        Self {
+            max_tokens: 256,
+            temperature: 0.7,
+            top_p: 0.9,
+            repeat_penalty: 1.1,
+            repeat_last_n: 64,
+            seed: 42,
+        }
+    }
+}
+
+/// SmolLM2 model wrapper for WASM.
+pub struct SmolLM2Model {
+    model: Llama,
+    tokenizer: Tokenizer,
+    config: Config,
+    device: Device,
+    cache: Cache,
+}
+
+/// Static model storage for the worker context.
+static MODEL: Mutex<Option<SmolLM2Model>> = Mutex::new(None);
+
+/// Initialize panic hook for better error messages.
+#[wasm_bindgen(start)]
+pub fn init_panic_hook() {
+    console_error_panic_hook::set_once();
+}
+
+/// Get the library version.
+#[wasm_bindgen]
+pub fn get_version() -> String {
+    env!("CARGO_PKG_VERSION").to_string()
+}
+
+/// Check if model is loaded.
+#[wasm_bindgen]
+pub fn is_model_loaded() -> bool {
+    MODEL.lock().map(|m| m.is_some()).unwrap_or(false)
+}
+
+/// Load the SmolLM2 model from provided weights and tokenizer data.
+///
+/// # Arguments
+/// * `model_weights` - The model weights as a byte array (safetensors format)
+/// * `tokenizer_json` - The tokenizer configuration as JSON string
+/// * `config_json` - The model configuration as JSON string
+#[wasm_bindgen]
+pub async fn load_model(
+    model_weights: &[u8],
+    tokenizer_json: &str,
+    config_json: &str,
+) -> Result<(), JsValue> {
+    console_log!("SmolLM2: Starting model load...");
+
+    // Parse config
+    let config: LlamaConfig = serde_json::from_str(config_json)
+        .map_err(|e| JsValue::from_str(&format!("Failed to parse config: {}", e)))?;
+
+    let config = config.into_config(false);
+
+    console_log!(
+        "SmolLM2: Config loaded - vocab_size: {}, hidden_size: {}",
+        config.vocab_size,
+        config.hidden_size
+    );
+
+    // Initialize tokenizer
+    let tokenizer = Tokenizer::from_bytes(tokenizer_json.as_bytes())
+        .map_err(|e| JsValue::from_str(&format!("Failed to load tokenizer: {}", e)))?;
+
+    console_log!("SmolLM2: Tokenizer loaded");
+
+    // Use CPU device for WASM
+    let device = Device::Cpu;
+
+    // Load model weights using safetensors
+    let tensors = candle_core::safetensors::load_buffer(model_weights, &device)
+        .map_err(|e| JsValue::from_str(&format!("Failed to load weights: {}", e)))?;
+
+    let vb = VarBuilder::from_tensors(tensors, candle_core::DType::F32, &device);
+
+    // Build the model
+    let model = Llama::load(vb, &config)
+        .map_err(|e| JsValue::from_str(&format!("Failed to build model: {}", e)))?;
+
+    console_log!("SmolLM2: Model built successfully");
+
+    // Create KV cache for efficient generation
+    let cache = Cache::new(true, candle_core::DType::F32, &config, &device)
+        .map_err(|e| JsValue::from_str(&format!("Failed to create cache: {}", e)))?;
+
+    // Store model in global state
+    let smol_model = SmolLM2Model {
+        model,
+        tokenizer,
+        config,
+        device,
+        cache,
+    };
+
+    *MODEL
+        .lock()
+        .map_err(|e| JsValue::from_str(&format!("Lock error: {}", e)))? = Some(smol_model);
+
+    console_log!("SmolLM2: Model ready for inference");
+    Ok(())
+}
+
+/// Generate text from a prompt.
+///
+/// # Arguments
+/// * `prompt` - The input prompt text
+/// * `params_json` - Generation parameters as JSON (optional, uses defaults if empty)
+/// * `callback` - JavaScript callback function called for each generated token
+///
+/// # Returns
+/// The complete generated text
+#[wasm_bindgen]
+pub async fn generate(
+    prompt: &str,
+    params_json: &str,
+    callback: js_sys::Function,
+) -> Result<String, JsValue> {
+    let params: GenerationParams = if params_json.is_empty() {
+        GenerationParams::default()
+    } else {
+        serde_json::from_str(params_json)
+            .map_err(|e| JsValue::from_str(&format!("Invalid params: {}", e)))?
+    };
+
+    console_log!(
+        "SmolLM2: Generating with max_tokens={}, temp={}",
+        params.max_tokens,
+        params.temperature
+    );
+
+    let mut model_guard = MODEL
+        .lock()
+        .map_err(|e| JsValue::from_str(&format!("Lock error: {}", e)))?;
+
+    let model_wrapper = model_guard
+        .as_mut()
+        .ok_or_else(|| JsValue::from_str("Model not loaded"))?;
+
+    // Tokenize the prompt
+    let encoding = model_wrapper
+        .tokenizer
+        .encode(prompt, true)
+        .map_err(|e| JsValue::from_str(&format!("Tokenization failed: {}", e)))?;
+
+    let tokens: Vec<u32> = encoding.get_ids().to_vec();
+    let prompt_len = tokens.len();
+
+    console_log!("SmolLM2: Prompt tokenized to {} tokens", prompt_len);
+
+    // Setup logits processor for sampling
+    let mut logits_processor =
+        LogitsProcessor::new(params.seed, Some(params.temperature), Some(params.top_p));
+
+    let mut generated_text = String::new();
+    let mut all_tokens = tokens.clone();
+
+    // Get EOS token ID
+    let eos_token_id = model_wrapper
+        .tokenizer
+        .token_to_id("</s>")
+        .or_else(|| model_wrapper.tokenizer.token_to_id("<|endoftext|>"))
+        .unwrap_or(2);
+
+    // Generation loop
+    for i in 0..params.max_tokens {
+        let context_size = if i == 0 { tokens.len() } else { 1 };
+        let start_pos = all_tokens.len().saturating_sub(context_size);
+
+        let input_tokens = &all_tokens[start_pos..];
+        let input_tensor = Tensor::new(input_tokens, &model_wrapper.device)
+            .map_err(|e| JsValue::from_str(&format!("Failed to create input tensor: {}", e)))?;
+        let input_tensor = input_tensor
+            .unsqueeze(0)
+            .map_err(|e| JsValue::from_str(&format!("Failed to unsqueeze: {}", e)))?;
+
+        // Forward pass with cache
+        let logits = model_wrapper
+            .model
+            .forward(&input_tensor, start_pos, &mut model_wrapper.cache)
+            .map_err(|e| JsValue::from_str(&format!("Forward pass failed: {}", e)))?;
+
+        // Get logits for next token prediction
+        let logits = logits
+            .squeeze(0)
+            .map_err(|e| JsValue::from_str(&format!("Squeeze failed: {}", e)))?;
+
+        let seq_len = logits
+            .dim(0)
+            .map_err(|e| JsValue::from_str(&format!("Failed to get dim: {}", e)))?;
+
+        let logits = logits
+            .get(seq_len - 1)
+            .map_err(|e| JsValue::from_str(&format!("Get logits failed: {}", e)))?;
+
+        // Apply repeat penalty
+        let logits = if params.repeat_penalty != 1.0 {
+            let start_at = all_tokens.len().saturating_sub(params.repeat_last_n);
+            candle_transformers::utils::apply_repeat_penalty(
+                &logits,
+                params.repeat_penalty,
+                &all_tokens[start_at..],
+            )
+            .map_err(|e| JsValue::from_str(&format!("Repeat penalty failed: {}", e)))?
+        } else {
+            logits
+        };
+
+        // Sample next token
+        let next_token = logits_processor
+            .sample(&logits)
+            .map_err(|e| JsValue::from_str(&format!("Sampling failed: {}", e)))?;
+
+        // Check for EOS
+        if next_token == eos_token_id {
+            console_log!("SmolLM2: EOS token reached");
+            break;
+        }
+
+        all_tokens.push(next_token);
+
+        // Decode the new token
+        if let Ok(text) = model_wrapper.tokenizer.decode(&[next_token], false) {
+            generated_text.push_str(&text);
+
+            // Call the callback with the new token
+            let this = JsValue::NULL;
+            let token_js = JsValue::from_str(&text);
+            let _ = callback.call1(&this, &token_js);
+        }
+    }
+
+    console_log!(
+        "SmolLM2: Generation complete, {} tokens generated",
+        all_tokens.len() - prompt_len
+    );
+
+    Ok(generated_text)
+}
+
+/// Clear the loaded model from memory.
+#[wasm_bindgen]
+pub fn clear_model() -> Result<(), JsValue> {
+    *MODEL
+        .lock()
+        .map_err(|e| JsValue::from_str(&format!("Lock error: {}", e)))? = None;
+    console_log!("SmolLM2: Model cleared from memory");
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_default_params() {
+        let params = GenerationParams::default();
+        assert_eq!(params.max_tokens, 256);
+        assert!((params.temperature - 0.7).abs() < f64::EPSILON);
+    }
+
+    #[test]
+    fn test_params_serialization() {
+        let params = GenerationParams::default();
+        let json = serde_json::to_string(&params).unwrap();
+        let parsed: GenerationParams = serde_json::from_str(&json).unwrap();
+        assert_eq!(params.max_tokens, parsed.max_tokens);
+    }
+}
diff --git a/web/index.html b/web/index.html
new file mode 100644
index 0000000..d90150b
--- /dev/null
+++ b/web/index.html
@@ -0,0 +1,14 @@
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <link rel="icon" type="image/svg+xml" href="/brain.svg" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <meta name="description" content="SmolLM2 language model running entirely in your browser via WebAssembly - no server processing required" />
+    <title>SmolLM2 in Browser - AI Chat</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.tsx"></script>
+  </body>
+</html>
diff --git a/web/package.json b/web/package.json
new file mode 100644
index 0000000..94a3e2f
--- /dev/null
+++ b/web/package.json
@@ -0,0 +1,45 @@
+{
+  "name": "model-in-browser",
+  "version": "0.1.0",
+  "description": "SmolLM2 language model running in the browser via WebAssembly",
+  "private": true,
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "tsc && vite build",
+    "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0",
+    "preview": "vite preview",
+    "test": "vitest"
+  },
+  "dependencies": {
+    "@chatscope/chat-ui-kit-react": "^2.1.1",
+    "@chatscope/chat-ui-kit-styles": "^1.4.0",
+    "react": "^18.3.1",
+    "react-dom": "^18.3.1"
+  },
+  "devDependencies": {
+    "@types/react": "^18.3.14",
+    "@types/react-dom": "^18.3.2",
+    "@typescript-eslint/eslint-plugin": "^8.18.0",
+    "@typescript-eslint/parser": "^8.18.0",
+    "@vitejs/plugin-react": "^4.3.4",
+    "eslint": "^9.17.0",
+    "eslint-plugin-react-hooks": "^5.0.0",
+    "eslint-plugin-react-refresh": "^0.4.16",
+    "typescript": "^5.7.2",
+    "vite": "^6.0.5",
+    "vitest": "^2.1.8"
+  },
+  "browserslist": {
+    "production": [
+      ">0.2%",
+      "not dead",
+      "not op_mini all"
+    ],
+    "development": [
+      "last 1 chrome version",
+      "last 1 firefox version",
+      "last 1 safari version"
+    ]
+  }
+}
diff --git a/web/public/brain.svg b/web/public/brain.svg
new file mode 100644
index 0000000..ee7cbc4
--- /dev/null
+++ b/web/public/brain.svg
@@ -0,0 +1,4 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+  <path d="M9.5 2A2.5 2.5 0 0 1 12 4.5v15a2.5 2.5 0 0 1-4.96.44 2.5 2.5 0 0 1-2.96-3.08 3 3 0 0 1-.34-5.58 2.5 2.5 0 0 1 1.32-4.24 2.5 2.5 0 0 1 4.44-2.54"/>
+  <path d="M14.5 2A2.5 2.5 0 0 0 12 4.5v15a2.5 2.5 0 0 0 4.96.44 2.5 2.5 0 0 0 2.96-3.08 3 3 0 0 0 .34-5.58 2.5 2.5 0 0 0-1.32-4.24 2.5 2.5 0 0 0-4.44-2.54"/>
+</svg>
diff --git a/web/src/App.tsx b/web/src/App.tsx
new file mode 100644
index 0000000..e9e388c
--- /dev/null
+++ b/web/src/App.tsx
@@ -0,0 +1,252 @@
+import { useState, useCallback, useRef, useEffect } from 'react';
+import '@chatscope/chat-ui-kit-styles/dist/default/styles.min.css';
+import {
+  MainContainer,
+  ChatContainer,
+  MessageList,
+  Message,
+  MessageInput,
+  TypingIndicator,
+  MessageModel,
+} from '@chatscope/chat-ui-kit-react';
+import type { WorkerMessage, LoadPayload, GeneratePayload } from './worker';
+
+// Model configuration
+const MODEL_CONFIG = {
+  // Using SmolLM2-135M-Instruct from HuggingFace
+  modelUrl:
+    'https://huggingface.co/HuggingFaceTB/SmolLM2-135M-Instruct/resolve/main/model.safetensors',
+  tokenizerUrl:
+    'https://huggingface.co/HuggingFaceTB/SmolLM2-135M-Instruct/resolve/main/tokenizer.json',
+  configUrl:
+    'https://huggingface.co/HuggingFaceTB/SmolLM2-135M-Instruct/resolve/main/config.json',
+};
+
+type ModelStatus = 'idle' | 'loading' | 'ready' | 'error';
+
+interface ProgressInfo {
+  label: string;
+  loaded: number;
+  total: number;
+  progress: number;
+}
+
+function App() {
+  const [messages, setMessages] = useState<MessageModel[]>([
+    {
+      message:
+        "Hello! I'm SmolLM2, a small language model running entirely in your browser. Load me to start chatting!",
+      sentTime: 'just now',
+      sender: 'SmolLM2',
+      direction: 'incoming',
+      position: 'single',
+    },
+  ]);
+  const [status, setStatus] = useState<ModelStatus>('idle');
+  const [statusText, setStatusText] = useState('Model not loaded');
+  const [isTyping, setIsTyping] = useState(false);
+  const [progress, setProgress] = useState<ProgressInfo | null>(null);
+
+  const workerRef = useRef<Worker | null>(null);
+  const currentResponseRef = useRef<string>('');
+
+  // Initialize the worker
+  useEffect(() => {
+    // Create worker from worker.ts
+    const worker = new Worker(new URL('./worker.ts', import.meta.url), {
+      type: 'module',
+    });
+
+    worker.onmessage = (event: MessageEvent<WorkerMessage>) => {
+      const { type, payload } = event.data;
+
+      switch (type) {
+        case 'status':
+          setStatusText(payload as string);
+          break;
+
+        case 'progress':
+          setProgress(payload as ProgressInfo);
+          break;
+
+        case 'token':
+          // Append token to current response
+          currentResponseRef.current += payload as string;
+          // Update the last message with the streaming response
+          setMessages((prev) => {
+            const updated = [...prev];
+            const lastIdx = updated.length - 1;
+            if (lastIdx >= 0 && updated[lastIdx].sender === 'SmolLM2') {
+              updated[lastIdx] = {
+                ...updated[lastIdx],
+                message: currentResponseRef.current,
+              };
+            }
+            return updated;
+          });
+          break;
+
+        case 'complete': {
+          const action = (payload as { action: string }).action;
+          if (action === 'load') {
+            setStatus('ready');
+            setStatusText('Model ready');
+            setProgress(null);
+          } else if (action === 'generate') {
+            setIsTyping(false);
+          }
+          break;
+        }
+
+        case 'error':
+          setStatus('error');
+          setStatusText(`Error: ${payload}`);
+          setIsTyping(false);
+          setProgress(null);
+          break;
+      }
+    };
+
+    workerRef.current = worker;
+
+    return () => {
+      worker.terminate();
+    };
+  }, []);
+
+  // Load the model
+  const handleLoadModel = useCallback(() => {
+    if (!workerRef.current || status === 'loading' || status === 'ready') return;
+
+    setStatus('loading');
+    setStatusText('Initializing...');
+
+    const loadPayload: LoadPayload = {
+      modelUrl: MODEL_CONFIG.modelUrl,
+      tokenizerUrl: MODEL_CONFIG.tokenizerUrl,
+      configUrl: MODEL_CONFIG.configUrl,
+    };
+
+    workerRef.current.postMessage({ type: 'load', payload: loadPayload });
+  }, [status]);
+
+  // Send a message
+  const handleSend = useCallback(
+    (text: string) => {
+      if (!workerRef.current || status !== 'ready' || isTyping) return;
+
+      // Add user message
+      const userMessage: MessageModel = {
+        message: text,
+        sentTime: 'just now',
+        sender: 'You',
+        direction: 'outgoing',
+        position: 'single',
+      };
+
+      // Add placeholder for AI response
+      const aiPlaceholder: MessageModel = {
+        message: '',
+        sentTime: 'just now',
+        sender: 'SmolLM2',
+        direction: 'incoming',
+        position: 'single',
+      };
+
+      setMessages((prev) => [...prev, userMessage, aiPlaceholder]);
+      setIsTyping(true);
+      currentResponseRef.current = '';
+
+      // Format prompt for the instruct model
+      const prompt = `<|im_start|>user\n${text}<|im_end|>\n<|im_start|>assistant\n`;
+
+      const generatePayload: GeneratePayload = {
+        prompt,
+        params: {
+          maxTokens: 256,
+          temperature: 0.7,
+          topP: 0.9,
+        },
+      };
+
+      workerRef.current.postMessage({ type: 'generate', payload: generatePayload });
+    },
+    [status, isTyping]
+  );
+
+  const getStatusIndicatorClass = () => {
+    switch (status) {
+      case 'loading':
+        return 'loading';
+      case 'ready':
+        return 'ready';
+      case 'error':
+        return 'error';
+      default:
+        return '';
+    }
+  };
+
+  return (
+    <div className="app-container">
+      <header className="header">
+        <h1>SmolLM2 in Browser</h1>
+        <p>AI language model running entirely on your device via WebAssembly</p>
+      </header>
+
+      <div className="status-bar">
+        <div className={`status-indicator ${getStatusIndicatorClass()}`} />
+        <span>{statusText}</span>
+        {status === 'idle' && (
+          <button className="load-button" onClick={handleLoadModel}>
+            Load Model (~270MB)
+          </button>
+        )}
+      </div>
+
+      {progress && (
+        <div className="progress-bar">
+          <div
+            className="progress-bar-fill"
+            style={{ width: `${progress.progress}%` }}
+          />
+        </div>
+      )}
+
+      <div className="chat-container">
+        <MainContainer>
+          <ChatContainer>
+            <MessageList
+              typingIndicator={
+                isTyping ? (
+                  <TypingIndicator content="SmolLM2 is thinking..." />
+                ) : null
+              }
+            >
+              {messages.map((msg, index) => (
+                <Message key={index} model={msg} />
+              ))}
+            </MessageList>
+            <MessageInput
+              placeholder={
+                status === 'ready'
+                  ? 'Type your message...'
+                  : 'Load the model first to start chatting'
+              }
+              onSend={handleSend}
+              disabled={status !== 'ready' || isTyping}
+              attachButton={false}
+            />
+          </ChatContainer>
+        </MainContainer>
+      </div>
+
+      <p className="model-info">
+        Using SmolLM2-135M-Instruct | No data sent to servers | All processing
+        happens locally
+      </p>
+    </div>
+  );
+}
+
+export default App;
diff --git a/web/src/index.css b/web/src/index.css
new file mode 100644
index 0000000..71dce1b
--- /dev/null
+++ b/web/src/index.css
@@ -0,0 +1,206 @@
+:root {
+  font-family: Inter, system-ui, Avenir, Helvetica, Arial, sans-serif;
+  line-height: 1.5;
+  font-weight: 400;
+
+  color-scheme: light dark;
+  color: rgba(255, 255, 255, 0.87);
+  background-color: #1a1a1a;
+
+  font-synthesis: none;
+  text-rendering: optimizeLegibility;
+  -webkit-font-smoothing: antialiased;
+  -moz-osx-font-smoothing: grayscale;
+}
+
+* {
+  margin: 0;
+  padding: 0;
+  box-sizing: border-box;
+}
+
+html, body, #root {
+  height: 100%;
+  width: 100%;
+}
+
+body {
+  margin: 0;
+  display: flex;
+  min-width: 320px;
+  min-height: 100vh;
+}
+
+#root {
+  display: flex;
+  flex-direction: column;
+}
+
+.app-container {
+  display: flex;
+  flex-direction: column;
+  height: 100%;
+  max-width: 1200px;
+  margin: 0 auto;
+  padding: 1rem;
+}
+
+.header {
+  text-align: center;
+  padding: 1rem 0;
+  border-bottom: 1px solid #333;
+  margin-bottom: 1rem;
+}
+
+.header h1 {
+  font-size: 1.5rem;
+  margin-bottom: 0.5rem;
+  color: #61dafb;
+}
+
+.header p {
+  font-size: 0.875rem;
+  color: #888;
+}
+
+.status-bar {
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  gap: 0.5rem;
+  padding: 0.75rem;
+  background: #252525;
+  border-radius: 0.5rem;
+  margin-bottom: 1rem;
+  font-size: 0.875rem;
+}
+
+.status-indicator {
+  width: 10px;
+  height: 10px;
+  border-radius: 50%;
+  animation: pulse 2s infinite;
+}
+
+.status-indicator.loading {
+  background-color: #f0ad4e;
+}
+
+.status-indicator.ready {
+  background-color: #5cb85c;
+  animation: none;
+}
+
+.status-indicator.error {
+  background-color: #d9534f;
+  animation: none;
+}
+
+@keyframes pulse {
+  0% { opacity: 1; }
+  50% { opacity: 0.5; }
+  100% { opacity: 1; }
+}
+
+.chat-container {
+  flex: 1;
+  display: flex;
+  flex-direction: column;
+  min-height: 0;
+  background: #252525;
+  border-radius: 0.5rem;
+  overflow: hidden;
+}
+
+.progress-bar {
+  width: 100%;
+  height: 4px;
+  background: #333;
+  border-radius: 2px;
+  overflow: hidden;
+  margin-top: 0.5rem;
+}
+
+.progress-bar-fill {
+  height: 100%;
+  background: linear-gradient(90deg, #61dafb, #21a1f1);
+  transition: width 0.3s ease;
+}
+
+.load-button {
+  background: linear-gradient(135deg, #61dafb, #21a1f1);
+  color: #1a1a1a;
+  border: none;
+  padding: 0.75rem 1.5rem;
+  border-radius: 0.5rem;
+  font-size: 1rem;
+  font-weight: 600;
+  cursor: pointer;
+  transition: transform 0.2s, box-shadow 0.2s;
+  margin: 0.5rem 0;
+}
+
+.load-button:hover:not(:disabled) {
+  transform: translateY(-2px);
+  box-shadow: 0 4px 12px rgba(97, 218, 251, 0.3);
+}
+
+.load-button:disabled {
+  opacity: 0.6;
+  cursor: not-allowed;
+}
+
+.model-info {
+  font-size: 0.75rem;
+  color: #666;
+  text-align: center;
+  margin-top: 0.5rem;
+}
+
+/* Override chatscope styles for dark theme */
+.cs-main-container {
+  background: #252525 !important;
+  border: none !important;
+}
+
+.cs-chat-container {
+  background: #252525 !important;
+}
+
+.cs-message-list {
+  background: #252525 !important;
+}
+
+.cs-message__content {
+  background: #333 !important;
+  color: #fff !important;
+}
+
+.cs-message--incoming .cs-message__content {
+  background: #2d4a5e !important;
+}
+
+.cs-message--outgoing .cs-message__content {
+  background: #4a5e2d !important;
+}
+
+.cs-message-input {
+  background: #333 !important;
+  border-top: 1px solid #444 !important;
+}
+
+.cs-message-input__content-editor-wrapper {
+  background: #1a1a1a !important;
+}
+
+.cs-message-input__content-editor {
+  color: #fff !important;
+}
+
+.cs-button--send {
+  color: #61dafb !important;
+}
+
+.cs-typing-indicator__dot {
+  background-color: #61dafb !important;
+}
diff --git a/web/src/main.tsx b/web/src/main.tsx
new file mode 100644
index 0000000..2339d59
--- /dev/null
+++ b/web/src/main.tsx
@@ -0,0 +1,10 @@
+import React from 'react';
+import ReactDOM from 'react-dom/client';
+import App from './App';
+import './index.css';
+
+ReactDOM.createRoot(document.getElementById('root')!).render(
+  <React.StrictMode>
+    <App />
+  </React.StrictMode>
+);
diff --git a/web/src/worker.ts b/web/src/worker.ts
new file mode 100644
index 0000000..02889ff
--- /dev/null
+++ b/web/src/worker.ts
@@ -0,0 +1,272 @@
+/**
+ * SmolLM2 Web Worker for background model inference.
+ *
+ * This worker handles model loading and text generation in the background,
+ * keeping the main UI thread responsive during inference.
+ */
+
+// Message types for worker communication
+export interface WorkerMessage {
+  type:
+    | 'init'
+    | 'load'
+    | 'generate'
+    | 'clear'
+    | 'status'
+    | 'token'
+    | 'complete'
+    | 'error'
+    | 'progress';
+  payload?: unknown;
+}
+
+export interface LoadPayload {
+  modelUrl: string;
+  tokenizerUrl: string;
+  configUrl: string;
+}
+
+export interface GeneratePayload {
+  prompt: string;
+  params?: GenerationParams;
+}
+
+export interface GenerationParams {
+  maxTokens?: number;
+  temperature?: number;
+  topP?: number;
+  repeatPenalty?: number;
+  repeatLastN?: number;
+  seed?: number;
+}
+
+// WASM module interface
+interface SmolLM2Wasm {
+  default: (input?: RequestInfo | URL) => Promise<void>;
+  init_panic_hook: () => void;
+  get_version: () => string;
+  is_model_loaded: () => boolean;
+  load_model: (
+    weights: Uint8Array,
+    tokenizer: string,
+    config: string
+  ) => Promise<void>;
+  generate: (
+    prompt: string,
+    paramsJson: string,
+    callback: (token: string) => void
+  ) => Promise<string>;
+  clear_model: () => void;
+}
+
+let wasm: SmolLM2Wasm | null = null;
+
+/**
+ * Post a message to the main thread.
+ */
+function postMessage(message: WorkerMessage): void {
+  self.postMessage(message);
+}
+
+/**
+ * Fetch a file as an ArrayBuffer with progress tracking.
+ */
+async function fetchWithProgress(
+  url: string,
+  label: string
+): Promise<ArrayBuffer> {
+  const response = await fetch(url);
+
+  if (!response.ok) {
+    throw new Error(`Failed to fetch ${label}: ${response.statusText}`);
+  }
+
+  const contentLength = response.headers.get('Content-Length');
+  const total = contentLength ? parseInt(contentLength, 10) : 0;
+
+  if (!response.body) {
+    return response.arrayBuffer();
+  }
+
+  const reader = response.body.getReader();
+  const chunks: Uint8Array[] = [];
+  let loaded = 0;
+
+  while (true) {
+    const { done, value } = await reader.read();
+
+    if (done) break;
+
+    chunks.push(value);
+    loaded += value.length;
+
+    if (total > 0) {
+      const progress = (loaded / total) * 100;
+      postMessage({
+        type: 'progress',
+        payload: { label, loaded, total, progress },
+      });
+    }
+  }
+
+  // Combine chunks into single buffer
+  const result = new Uint8Array(loaded);
+  let offset = 0;
+  for (const chunk of chunks) {
+    result.set(chunk, offset);
+    offset += chunk.length;
+  }
+
+  return result.buffer;
+}
+
+/**
+ * Initialize the WASM module.
+ */
+async function initWasm(): Promise<void> {
+  if (wasm) return;
+
+  try {
+    postMessage({ type: 'status', payload: 'Initializing WASM module...' });
+
+    // Dynamic import of the WASM module
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    const wasmModule = (await import('./pkg/smollm2_wasm.js')) as unknown as SmolLM2Wasm;
+    await wasmModule.default();
+    wasmModule.init_panic_hook();
+
+    wasm = wasmModule;
+
+    const version = wasm.get_version();
+    postMessage({
+      type: 'status',
+      payload: `WASM module initialized (v${version})`,
+    });
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    throw new Error(`Failed to initialize WASM: ${message}`);
+  }
+}
+
+/**
+ * Load the SmolLM2 model.
+ */
+async function loadModel(payload: LoadPayload): Promise<void> {
+  await initWasm();
+
+  if (!wasm) {
+    throw new Error('WASM module not initialized');
+  }
+
+  postMessage({ type: 'status', payload: 'Downloading model files...' });
+
+  // Fetch all required files
+  const [weightsBuffer, tokenizerResponse, configResponse] = await Promise.all([
+    fetchWithProgress(payload.modelUrl, 'Model weights'),
+    fetch(payload.tokenizerUrl),
+    fetch(payload.configUrl),
+  ]);
+
+  if (!tokenizerResponse.ok) {
+    throw new Error(`Failed to fetch tokenizer: ${tokenizerResponse.statusText}`);
+  }
+  if (!configResponse.ok) {
+    throw new Error(`Failed to fetch config: ${configResponse.statusText}`);
+  }
+
+  const tokenizerJson = await tokenizerResponse.text();
+  const configJson = await configResponse.text();
+
+  postMessage({ type: 'status', payload: 'Loading model into memory...' });
+
+  // Load the model
+  await wasm.load_model(
+    new Uint8Array(weightsBuffer),
+    tokenizerJson,
+    configJson
+  );
+
+  postMessage({ type: 'status', payload: 'Model loaded successfully!' });
+  postMessage({ type: 'complete', payload: { action: 'load' } });
+}
+
+/**
+ * Generate text from a prompt.
+ */
+async function generateText(payload: GeneratePayload): Promise<void> {
+  if (!wasm || !wasm.is_model_loaded()) {
+    throw new Error('Model not loaded');
+  }
+
+  const params = {
+    max_tokens: payload.params?.maxTokens ?? 256,
+    temperature: payload.params?.temperature ?? 0.7,
+    top_p: payload.params?.topP ?? 0.9,
+    repeat_penalty: payload.params?.repeatPenalty ?? 1.1,
+    repeat_last_n: payload.params?.repeatLastN ?? 64,
+    seed: payload.params?.seed ?? Math.floor(Math.random() * 1000000),
+  };
+
+  postMessage({ type: 'status', payload: 'Generating response...' });
+
+  // Callback for streaming tokens
+  const tokenCallback = (token: string): void => {
+    postMessage({ type: 'token', payload: token });
+  };
+
+  const fullText = await wasm.generate(
+    payload.prompt,
+    JSON.stringify(params),
+    tokenCallback
+  );
+
+  postMessage({ type: 'complete', payload: { action: 'generate', text: fullText } });
+}
+
+/**
+ * Clear the model from memory.
+ */
+function clearModel(): void {
+  if (wasm) {
+    wasm.clear_model();
+    postMessage({ type: 'status', payload: 'Model cleared from memory' });
+    postMessage({ type: 'complete', payload: { action: 'clear' } });
+  }
+}
+
+/**
+ * Handle incoming messages from the main thread.
+ */
+self.onmessage = async (event: MessageEvent<WorkerMessage>): Promise<void> => {
+  const { type, payload } = event.data;
+
+  try {
+    switch (type) {
+      case 'init':
+        await initWasm();
+        break;
+
+      case 'load':
+        await loadModel(payload as LoadPayload);
+        break;
+
+      case 'generate':
+        await generateText(payload as GeneratePayload);
+        break;
+
+      case 'clear':
+        clearModel();
+        break;
+
+      default:
+        console.warn('Unknown message type:', type);
+    }
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    console.error('Worker error:', message);
+    postMessage({ type: 'error', payload: message });
+  }
+};
+
+// Signal that the worker is ready
+postMessage({ type: 'status', payload: 'Worker initialized' });
diff --git a/web/tsconfig.json b/web/tsconfig.json
new file mode 100644
index 0000000..65a5d7b
--- /dev/null
+++ b/web/tsconfig.json
@@ -0,0 +1,21 @@
+{
+  "compilerOptions": {
+    "target": "ES2020",
+    "useDefineForClassFields": true,
+    "lib": ["ES2020", "DOM", "DOM.Iterable", "WebWorker"],
+    "module": "ESNext",
+    "skipLibCheck": true,
+    "moduleResolution": "bundler",
+    "allowImportingTsExtensions": true,
+    "resolveJsonModule": true,
+    "isolatedModules": true,
+    "noEmit": true,
+    "jsx": "react-jsx",
+    "strict": true,
+    "noUnusedLocals": true,
+    "noUnusedParameters": true,
+    "noFallthroughCasesInSwitch": true
+  },
+  "include": ["src"],
+  "references": [{ "path": "./tsconfig.node.json" }]
+}
diff --git a/web/tsconfig.node.json b/web/tsconfig.node.json
new file mode 100644
index 0000000..97ede7e
--- /dev/null
+++ b/web/tsconfig.node.json
@@ -0,0 +1,11 @@
+{
+  "compilerOptions": {
+    "composite": true,
+    "skipLibCheck": true,
+    "module": "ESNext",
+    "moduleResolution": "bundler",
+    "allowSyntheticDefaultImports": true,
+    "strict": true
+  },
+  "include": ["vite.config.ts"]
+}
diff --git a/web/vite.config.ts b/web/vite.config.ts
new file mode 100644
index 0000000..5be9fdf
--- /dev/null
+++ b/web/vite.config.ts
@@ -0,0 +1,24 @@
+import { defineConfig } from 'vite';
+import react from '@vitejs/plugin-react';
+
+// https://vitejs.dev/config/
+export default defineConfig({
+  plugins: [react()],
+  base: './',
+  build: {
+    outDir: 'dist',
+    assetsDir: 'assets',
+  },
+  server: {
+    headers: {
+      'Cross-Origin-Opener-Policy': 'same-origin',
+      'Cross-Origin-Embedder-Policy': 'require-corp',
+    },
+  },
+  optimizeDeps: {
+    exclude: ['smollm2-wasm'],
+  },
+  worker: {
+    format: 'es',
+  },
+});