diff --git a/Dockerfile b/Dockerfile index 0e650bf..9ce6136 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,28 +1,62 @@ -FROM ubuntu:latest +# Single-stage Dockerfile using slim Python base +FROM python:3.11-slim -COPY . ./app -WORKDIR /app - -RUN apt update; apt install -y wget lsb-release software-properties-common gnupg curl ca-certificates - -RUN wget -qO- https://apt.llvm.org/llvm.sh | bash -s -- 21 -RUN apt install -y libmlir-21-dev mlir-21-tools +# Environment variables +ENV DEBIAN_FRONTEND=noninteractive \ + PYTHONUNBUFFERED=1 \ + PIP_NO_CACHE_DIR=1 \ + PIP_DEFAULT_TIMEOUT=100 \ + NEXT_TELEMETRY_DISABLED=1 -RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - -RUN apt install -y nodejs -RUN npm install +ARG APP_ENV=production +ENV NODE_ENV=$APP_ENV -RUN add-apt-repository -y ppa:deadsnakes/ppa; apt install -y python3-pip python3.11-venv - -RUN python3.11 -m venv mlir_venv +WORKDIR /app -RUN mlir_venv/bin/pip install --upgrade pip -RUN mlir_venv/bin/pip install --pre torch-mlir torchvision \ - --extra-index-url https://download.pytorch.org/whl/nightly/cpu \ - -f https://github.com/llvm/torch-mlir-release/releases/expanded_assets/dev-wheels +# Install minimal tooling +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + ca-certificates wget curl gnupg lsb-release software-properties-common && \ + rm -rf /var/lib/apt/lists/* -RUN mlir_venv/bin/pip install fastapi uvicorn pytest httpx +# Add LLVM 21 repository +RUN wget -qO- https://apt.llvm.org/llvm.sh | bash -s -- 21 +# Add Node.js 20 repository and install runtime deps +RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + libmlir-21-dev mlir-21-tools nodejs && \ + rm -rf /var/lib/apt/lists/* + +# Copy application code +COPY --chown=10001:10001 . /app + +# Install JS dependencies, then install 'concurrently' globally +RUN npm install && \ + npm install -g concurrently + +# Create Python venv and install Python packages +RUN python3 -m venv /opt/venv && \ + /opt/venv/bin/pip install --upgrade pip setuptools wheel && \ + /opt/venv/bin/pip install --pre torch-mlir torchvision \ + --extra-index-url=https://download.pytorch.org/whl/nightly/cpu \ + -f https://github.com/llvm/torch-mlir-release/releases/expanded_assets/dev-wheels && \ + /opt/venv/bin/pip install fastapi uvicorn pytest httpx + +# Create non-root user and fix permissions +RUN useradd -u 10001 -m --shell /usr/sbin/nologin appuser && \ + mkdir -p /home/appuser/.cache && \ + chown -R appuser:appuser /home/appuser/.cache /app +USER appuser + +# Update PATH for venv and LLVM +ENV PATH="/opt/venv/bin:/usr/lib/llvm-21/bin:$PATH" + +# Expose ports and add healthcheck EXPOSE 3000 8000 +HEALTHCHECK --interval=30s --timeout=5s --start-period=5s \ + CMD curl -f http://localhost:8000/health || exit 1 -CMD ["npm", "run", "start:all"] +# Default to interactive shell +CMD ["/bin/sh"] diff --git a/README.md b/README.md index ccd8c9d..25e7f17 100644 --- a/README.md +++ b/README.md @@ -47,49 +47,89 @@ Current version of the application is tested on Ubuntu 22.04 windows subsystem u ### Install dependencies -In case of missing prerequisites here are some scripts to help set them up (runs on Debian and its derivatives). - +Clone the repository: ```bash git clone https://github.com/MrSidims/PytorchExplorer.git cd PytorchExplorer +``` + +Install frontend dependencies: +```bash source setup_frontend.sh ``` -When you have venv suitable for `torch-mlir` work, install `fastapi`, `uvicorn` etc in venv like this: +Set up backend (Torch, MLIR, etc.): +```bash +source setup_backend.sh +``` +If you already have a working venv for Torch-MLIR, you can just install FastAPI and testing dependencies: ```bash pip install fastapi uvicorn pytest httpx ``` -Otherwise here is the script to setup `torch`, `llvm` etc: +To use custom builds of `torch-mlir-opt`, `mlir-opt`, etc. without placing them in your `$PATH`, configure the following environment variables: +- `TORCH_MLIR_OPT_PATH` +- `LLVM_BIN_PATH` +- `TRITON_OPT_PATH` +### Run the application +#### Development mode (local) ```bash -source setup_backend.sh +npm run dev:all ``` +Then open http://localhost:3000/ -If you want to use your builds of the tools like `torch-mlir-opt`, `mlir-opt` etc without placing them in `PATH` please setup `TORCH_MLIR_OPT_PATH` and `LLVM_BIN_PATH` environment variables. - -### Run the application - +#### Production mode (local) ```bash +npm run build npm run start:all ``` Then open http://localhost:3000/ in your browser and enjoy! -### Run in a docker - -Build image with: +#### Run in a container (Docker or Podman) +Build the image (change APP_ENV between development/production, default is production): ```bash -docker build -t pytorch_explorer . +docker build -t pytorch_explorer --build-arg APP_ENV=development . ``` -Run it: +Run the container in **production mode**: ```bash docker run -p 3000:3000 -p 8000:8000 pytorch_explorer ``` +Then inside the container: +```bash +npm run build +npm run start:all +``` + +To run in **development mode**: +```bash +docker run -it --rm \ + -e NODE_ENV=development \ + -p 3000:3000 -p 8000:8000 \ + pytorch_explorer +``` +Then inside the container: +```bash +npm run dev:all +``` + +Secure run (in cases, when you don't trust tested samples): +```bash +podman run --rm -it \ + --read-only \ + --cap-drop=ALL \ + --security-opt=no-new-privileges \ + --tmpfs /app/.next:rw,size=256m \ + -v stored_sessions:/app/StoredSessions:rw \ + -p8000:8000 -p3000:3000 \ + -e NODE_ENV=production \ + pytorch_explorer +``` ### Run the tests diff --git a/backend/server.py b/backend/server.py index fee1f36..f6d75c2 100644 --- a/backend/server.py +++ b/backend/server.py @@ -121,7 +121,7 @@ def fix_input(input_obj): def split_cmd_arguments(cmd: str) -> List[str]: # Split the command string into arguments, handling quoted strings. - cmd_split = re.split(r''' (?=(?:[^'"]|'[^']*'|"[^"]*")*$)''', cmd.strip()) + cmd_split = re.split(r""" (?=(?:[^'"]|'[^']*'|"[^"]*")*$)""", cmd.strip()) # Remove quotes from each argument. cmd_split = [arg.replace('"', "").replace("'", "") for arg in cmd_split] return cmd_split @@ -131,13 +131,21 @@ def split_cmd_arguments(cmd: str) -> List[str]: def run_external_opt_tool_file( input_path: str, cmd: str, tool: str, output_path: str ) -> Tuple[bool, str]: + args = [tool] + split_cmd_arguments(cmd) + [input_path, "-o", output_path] try: - args = [tool] + split_cmd_arguments(cmd) + [input_path, "-o", output_path] - result = subprocess.run(args, capture_output=True, text=True) - return (result.returncode == 0, result.stderr if result.stderr else "") + result = subprocess.run(args, capture_output=True, text=True, check=True) + return (True, result.stderr or "") + except subprocess.CalledProcessError as e: + logger.error( + f"Tool '{tool}' failed with return code {e.returncode}:\n{e.stderr}" + ) + return (False, e.stderr or f"{tool} failed unexpectedly.") + except FileNotFoundError as e: + logger.error(f"Tool not found: {tool}", exc_info=True) + raise CompilerPipelineError(f"Compiler tool '{tool}' not found.") except Exception as e: - logger.error(f"Failed to run tool '{tool}': {e}", exc_info=True) - raise CompilerPipelineError(f"Failed to run compiler tool '{tool}' : {e}") + logger.error(f"Unexpected error running tool '{tool}': {e}", exc_info=True) + raise CompilerPipelineError(f"Unexpected error while running '{tool}': {e}") # Utility for custom pipeline. @@ -296,31 +304,36 @@ def lower_to_llvm_mlir(model, example_input) -> str: f.flush() input_path = f.name - result = subprocess.run( - [ - LLVM_BIN_PATH + "mlir-opt", - '--one-shot-bufferize="bufferize-function-boundaries"', - "-convert-linalg-to-loops", - "-convert-scf-to-cf", - "-convert-cf-to-llvm", - "-lower-affine", - "-finalize-memref-to-llvm", - "-convert-math-to-llvm", - "-convert-arith-to-llvm", - "-convert-func-to-llvm", - "-reconcile-unrealized-casts", - input_path, - ], - capture_output=True, - text=True, - ) - - os.remove(input_path) - - if result.returncode != 0: - raise CompilerPipelineError(f"mlir-opt failed: {result.stderr}") + cmd = [ + LLVM_BIN_PATH + "mlir-opt", + '--one-shot-bufferize="bufferize-function-boundaries"', + "-convert-linalg-to-loops", + "-convert-scf-to-cf", + "-convert-cf-to-llvm", + "-lower-affine", + "-finalize-memref-to-llvm", + "-convert-math-to-llvm", + "-convert-arith-to-llvm", + "-convert-func-to-llvm", + "-reconcile-unrealized-casts", + input_path, + ] - return result.stdout + try: + result = subprocess.run(cmd, capture_output=True, text=True, check=True) + return result.stdout + except subprocess.CalledProcessError as e: + raise CompilerPipelineError( + f"mlir-opt failed with code {e.returncode}: {e.stderr}" + ) + except FileNotFoundError: + raise CompilerPipelineError(f"'mlir-opt' not found at path: {cmd[0]}") + finally: + # Prevent tmp leaks + try: + os.remove(input_path) + except Exception: + pass # Generate LLVM MLIR. @@ -351,19 +364,27 @@ def generate_llvm_ir( [LLVM_BIN_PATH + "mlir-translate", "--mlir-to-llvmir", input_path], capture_output=True, text=True, + check=True, ) - os.remove(input_path) - - if result.returncode != 0: - raise CompilerPipelineError(f"mlir-translate failed: {result.stderr}") - llvm_ir = result.stdout return apply_optional_passes(llvm_ir, pipeline, dump_each) + + except subprocess.CalledProcessError as e: + logger.error(f"mlir-translate failed: {e.stderr}", exc_info=True) + raise CompilerPipelineError(f"mlir-translate failed: {e.stderr}") + except Exception as e: logger.exception("Error generating LLVM IR.") raise IRGenerationError("Failed to generate LLVM IR.") + finally: + # Prevent tmp leaks + try: + os.remove(input_path) + except Exception: + pass + # Generate NVPTX, AMDGPU or SPIR-V. def generate_target_gpu_ir(model, example_input, target: str) -> str: @@ -408,20 +429,27 @@ def compile_triton_ir( tmp_file.flush() tmp_path = tmp_file.name - result = subprocess.run( - ["python3", tmp_path], - capture_output=True, - text=True, - env=env, - timeout=20, - ) - - os.remove(tmp_path) - - if result.returncode != 0: + try: + result = subprocess.run( + ["python3", tmp_path], + capture_output=True, + text=True, + env=env, + timeout=60, + check=True, + ) + except subprocess.CalledProcessError as e: shutil.rmtree(cache_dir, ignore_errors=True) - logger.exception("User code execution failed.") + logger.error( + f"Triton code execution failed:\n{e.stderr}", exc_info=True + ) raise TritonExecutionError("Triton code execution raised an exception.") + finally: + # Prevent tmp leaks + try: + os.remove(tmp_path) + except Exception: + pass cached_triton_runs[code_hash] = {"cache_dir": cache_dir, "active_users": 0} diff --git a/entrypoint.js b/entrypoint.js new file mode 100644 index 0000000..88ff33d --- /dev/null +++ b/entrypoint.js @@ -0,0 +1,11 @@ +const { execSync } = require("child_process"); + +const mode = process.env.NODE_ENV; + +if (mode === "development") { + console.log("Running in development mode..."); + execSync("npm run dev:all", { stdio: "inherit" }); +} else { + console.log("Running in production mode..."); + execSync("npm run start:all", { stdio: "inherit" }); +} diff --git a/package.json b/package.json index 4ff04c5..278f0a0 100644 --- a/package.json +++ b/package.json @@ -3,11 +3,14 @@ "version": "0.1.0", "private": true, "scripts": { - "dev:ui": "next dev", + "dev:ui": "next dev", + "start:ui": "next start -p 3000", "dev:api": "./mlir_venv/bin/python -m uvicorn backend.server:app --host 0.0.0.0 --port 8000 --reload", - "start:all": "concurrently \"npm run dev:ui\" \"npm run dev:api\"", + "start:api": "./mlir_venv/bin/python -m uvicorn backend.server:app --host 0.0.0.0 --port 8000", + "dev:all": "concurrently \"npm run dev:ui\" \"npm run dev:api\"", + "start:all": "concurrently \"npm run start:ui\" \"npm run start:api\"", "build": "next build", - "start": "next start", + "start": "node entrypoint.js", "lint": "next lint" }, "dependencies": { diff --git a/src/app/SessionContext.js b/src/app/SessionContext.js index 05efa4b..9ac3ae8 100644 --- a/src/app/SessionContext.js +++ b/src/app/SessionContext.js @@ -29,7 +29,7 @@ class MyModel(nn.Module): model = MyModel() example_input = torch.randn(4, 4) # If you have multiple models, wrap each model and input tensor pair using: -# __explore__(model, input_tensor) +# __explore__(model, example_input) `, irWindows: [ {