diff --git a/README.md b/README.md index b9d3ce2..6a7a09c 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,10 @@ # BioGPT This repository contains the implementation of [BioGPT: Generative Pre-trained Transformer for Biomedical Text Generation and Mining](https://academic.oup.com/bib/advance-article/doi/10.1093/bib/bbac409/6713511?guestAccessKey=a66d9b5d-4f83-4017-bb52-405815c907b9), by Renqian Luo, Liai Sun, Yingce Xia, Tao Qin, Sheng Zhang, Hoifung Poon and Tie-Yan Liu. +# Run BioGPT on Google Cloud Platform using Pulumi + +*To run BioGPT on Google Cloud Platform using Pulumi follow the instructions [here](https://github.com/calufa/BioGPT-Kubernetes-Pulumi-GCP/tree/main/infra).* + # Requirements and Installation diff --git a/infra/.gitignore b/infra/.gitignore new file mode 100644 index 0000000..51b0887 --- /dev/null +++ b/infra/.gitignore @@ -0,0 +1,3 @@ +gcp.json +data +Pulumi.dev.yaml diff --git a/infra/.sample.env b/infra/.sample.env new file mode 100644 index 0000000..0048e50 --- /dev/null +++ b/infra/.sample.env @@ -0,0 +1,8 @@ +export NAME=biogpt +export PROJECT={PROJECT} # <-- replace +export REGION={REGION} # <-- replace +export NODE_COUNT={NODE_COUNT} # <-- replace +export MACHINE_TYPE={MACHINE_TYPE} # <-- replace +export REPLICAS={REPLICAS} # <-- replace +export PULUMI_CONFIG_PASSPHRASE={PULUMI_CONFIG_PASSPHRASE} # <-- replace +export GOOGLE_APPLICATION_CREDENTIALS=./gcp.json diff --git a/infra/Dockerfile b/infra/Dockerfile new file mode 100644 index 0000000..de89fe1 --- /dev/null +++ b/infra/Dockerfile @@ -0,0 +1,37 @@ +FROM pytorch/pytorch:1.12.0-cuda11.3-cudnn8-runtime + +RUN apt-get update && \ + apt-get install -y \ + git \ + gcc \ + build-essential \ + wget + +WORKDIR /app + +# Install Fairseq +RUN git clone https://github.com/pytorch/fairseq && \ + cd fairseq && \ + git checkout v0.12.0 && \ + pip install . && \ + python setup.py build_ext --inplace + +# Install Moses +RUN git clone https://github.com/moses-smt/mosesdecoder.git +ENV MOSES=/app/mosesdecoder + +# Install python dependencies +RUN pip install \ + sacremoses==0.0.53 \ + scikit-learn==1.0.2 \ + fastBPE==0.1.0 \ + flask==2.2.4 + +# Copy BioGPT into container +COPY data data +COPY server.py server.py +COPY cmd.sh cmd.sh + +# Start server +EXPOSE 80 +CMD ["bash", "cmd.sh"] diff --git a/infra/Pulumi.yaml b/infra/Pulumi.yaml new file mode 100644 index 0000000..202bfff --- /dev/null +++ b/infra/Pulumi.yaml @@ -0,0 +1,2 @@ +name: biogpt +runtime: python diff --git a/infra/__main__.py b/infra/__main__.py new file mode 100644 index 0000000..790afaf --- /dev/null +++ b/infra/__main__.py @@ -0,0 +1,317 @@ +import pulumi +from pulumi_gcp import projects, container, config +from pulumi_docker import Image +from pulumi_kubernetes import Provider +from pulumi_kubernetes.core.v1 import Service +from pulumi_kubernetes.apps.v1 import Deployment +import google.auth +from google.auth.transport.requests import Request +from pulumi_kubernetes.apps.v1 import DaemonSet + + +config = pulumi.Config() +name = config.require("name") +project = config.require("project") +location = config.require("region") +node_count = config.require_int("node_count") +machine_type = config.require("machine_type") +replicas = config.require_int("replicas") + + +# Fetch access token from credentials +def get_access_token(): + scopes = ["https://www.googleapis.com/auth/cloud-platform"] + creds, _ = google.auth.default(scopes=scopes) + + if not creds.token: + creds.refresh(Request()) + + return creds.token + + +# Enable services +container_api = projects.Service( + "container.googleapis.com", + service="container.googleapis.com", + project=project, +) +cloud_resource_manager_api = projects.Service( + "cloudresourcemanager.googleapis.com", + service="cloudresourcemanager.googleapis.com", + project=project, +) + +# Build and push Docker image to container registry +image = Image( + name, + image_name=f"gcr.io/{project}/{name}", + build={ + "context": ".", + "platform": "linux/amd64", + }, + registry={ + "server": "gcr.io", + "username": "oauth2accesstoken", + "password": pulumi.Output.from_input(get_access_token()), + }, + opts=pulumi.ResourceOptions(depends_on=[container_api, cloud_resource_manager_api]), +) + +# Fetch GKE engine versions +def get_engine_versions(digest): + return container.get_engine_versions(project=project, location=location) + + +engine_versions = pulumi.Output.all([image.repo_digest]).apply(get_engine_versions) + +# Create Kubernetes cluster +cluster = container.Cluster( + name, + project=project, + location=location, + initial_node_count=node_count, + min_master_version=engine_versions.latest_master_version, + node_version=engine_versions.latest_master_version, + node_config={ + "machine_type": machine_type, + "oauth_scopes": [ + "https://www.googleapis.com/auth/compute", + "https://www.googleapis.com/auth/devstorage.read_only", + "https://www.googleapis.com/auth/logging.write", + "https://www.googleapis.com/auth/monitoring", + ], + "image_type": "COS_CONTAINERD", + "guest_accelerator": [ + { + "type": "nvidia-tesla-k80", + "count": 1, + } + ], + }, + opts=pulumi.ResourceOptions(depends_on=[image]), +) + + +def generate_kubeconfig(name, endpoint, master_auth): + context = f"{project}_{location}_{name}" + return f"""apiVersion: v1 +clusters: +- cluster: + certificate-authority-data: {master_auth['cluster_ca_certificate']} + server: https://{endpoint} + name: {context} +contexts: +- context: + cluster: {context} + user: {context} + name: {context} +current-context: {context} +kind: Config +preferences: {{}} +users: +- name: {context} + user: + exec: + apiVersion: client.authentication.k8s.io/v1beta1 + command: gke-gcloud-auth-plugin + installHint: Install gke-gcloud-auth-plugin for use with kubectl by following + https://cloud.google.com/blog/products/containers-kubernetes/kubectl-auth-changes-in-gke + provideClusterInfo: true +""" + + +kubeconfig = pulumi.Output.all( + cluster.name, cluster.endpoint, cluster.master_auth +).apply(lambda args: generate_kubeconfig(*args)) + +# Create a Kubernetes provider +cluster_provider = Provider(name, kubeconfig=kubeconfig) + +# Deploy NVIDIA daemon set +nvidia_gpu_device_plugin = DaemonSet( + "nvidia-gpu-device-plugin", + metadata={ + "name": "nvidia-driver-installer", + "namespace": "kube-system", + "labels": {"k8s-app": "nvidia-driver-installer"}, + }, + spec={ + "selector": {"matchLabels": {"k8s-app": "nvidia-driver-installer"}}, + "updateStrategy": {"type": "RollingUpdate"}, + "template": { + "metadata": { + "labels": { + "name": "nvidia-driver-installer", + "k8s-app": "nvidia-driver-installer", + } + }, + "spec": { + "priorityClassName": "system-node-critical", + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchExpressions": [ + { + "key": "cloud.google.com/gke-accelerator", + "operator": "Exists", + }, + { + "key": "cloud.google.com/gke-gpu-driver-version", + "operator": "DoesNotExist", + }, + ] + } + ] + } + } + }, + "tolerations": [{"operator": "Exists"}], + "hostNetwork": True, + "hostPID": True, + "volumes": [ + {"name": "dev", "hostPath": {"path": "/dev"}}, + { + "name": "vulkan-icd-mount", + "hostPath": { + "path": "/home/kubernetes/bin/nvidia/vulkan/icd.d" + }, + }, + { + "name": "nvidia-install-dir-host", + "hostPath": {"path": "/home/kubernetes/bin/nvidia"}, + }, + {"name": "root-mount", "hostPath": {"path": "/"}}, + {"name": "cos-tools", "hostPath": {"path": "/var/lib/cos-tools"}}, + {"name": "nvidia-config", "hostPath": {"path": "/etc/nvidia"}}, + ], + "initContainers": [ + { + "image": "cos-nvidia-installer:fixed", + "imagePullPolicy": "Never", + "name": "nvidia-driver-installer", + "resources": {"requests": {"cpu": "150m"}}, + "securityContext": {"privileged": True}, + "env": [ + { + "name": "NVIDIA_INSTALL_DIR_HOST", + "value": "/home/kubernetes/bin/nvidia", + }, + { + "name": "NVIDIA_INSTALL_DIR_CONTAINER", + "value": "/usr/local/nvidia", + }, + { + "name": "VULKAN_ICD_DIR_HOST", + "value": "/home/kubernetes/bin/nvidia/vulkan/icd.d", + }, + { + "name": "VULKAN_ICD_DIR_CONTAINER", + "value": "/etc/vulkan/icd.d", + }, + {"name": "ROOT_MOUNT_DIR", "value": "/root"}, + { + "name": "COS_TOOLS_DIR_HOST", + "value": "/var/lib/cos-tools", + }, + { + "name": "COS_TOOLS_DIR_CONTAINER", + "value": "/build/cos-tools", + }, + ], + "volumeMounts": [ + { + "name": "nvidia-install-dir-host", + "mountPath": "/usr/local/nvidia", + }, + { + "name": "vulkan-icd-mount", + "mountPath": "/etc/vulkan/icd.d", + }, + {"name": "dev", "mountPath": "/dev"}, + {"name": "root-mount", "mountPath": "/root"}, + {"name": "cos-tools", "mountPath": "/build/cos-tools"}, + ], + }, + { + "image": "gcr.io/gke-release/nvidia-partition-gpu@sha256:c54fd003948fac687c2a93a55ea6e4d47ffbd641278a9191e75e822fe72471c2", + "name": "partition-gpus", + "env": [ + { + "name": "LD_LIBRARY_PATH", + "value": "/usr/local/nvidia/lib64", + } + ], + "resources": {"requests": {"cpu": "150m"}}, + "securityContext": {"privileged": True}, + "volumeMounts": [ + { + "name": "nvidia-install-dir-host", + "mountPath": "/usr/local/nvidia", + }, + {"name": "dev", "mountPath": "/dev"}, + {"name": "nvidia-config", "mountPath": "/etc/nvidia"}, + ], + }, + ], + "containers": [ + {"image": "gcr.io/google-containers/pause:2.0", "name": "pause"} + ], + }, + }, + }, + opts=pulumi.ResourceOptions(provider=cluster_provider), +) + +# Create Kubernetes deployment +deployment = Deployment( + name, + metadata={"name": name}, + spec={ + "strategy": { + "type": "Recreate", + }, + "replicas": replicas, + "selector": {"matchLabels": {"app": name}}, + "template": { + "metadata": {"labels": {"app": name}}, + "spec": { + "containers": [ + { + "name": name, + "image": image.repo_digest, + "resources": {"limits": {"nvidia.com/gpu": 1}}, + "ports": [{"containerPort": 80}], + }, + ], + }, + }, + }, + opts=pulumi.ResourceOptions( + provider=cluster_provider, depends_on=[nvidia_gpu_device_plugin] + ), +) + +# Create Kubernetes service to expose port 80 +service = Service( + name, + spec={ + "type": "LoadBalancer", + "selector": {"app": name}, + "ports": [ + { + "protocol": "TCP", + "port": 80, + "targetPort": 80, + }, + ], + }, + opts=pulumi.ResourceOptions(provider=cluster_provider, depends_on=[deployment]), +) + +# Export IP address of the LoadBalancer +pulumi.export( + "load_balancer_ip", + service.status.apply(lambda status: status.load_balancer.ingress[0].ip), +) diff --git a/infra/cmd.sh b/infra/cmd.sh new file mode 100755 index 0000000..c0029f2 --- /dev/null +++ b/infra/cmd.sh @@ -0,0 +1,6 @@ +mkdir checkpoints +cd checkpoints +wget https://msramllasc.blob.core.windows.net/modelrelease/BioGPT/checkpoints/Pre-trained-BioGPT.tgz +tar -zxvf Pre-trained-BioGPT.tgz +cd .. +python server.py diff --git a/infra/destroy.sh b/infra/destroy.sh new file mode 100755 index 0000000..1f7d82a --- /dev/null +++ b/infra/destroy.sh @@ -0,0 +1,2 @@ +source .env +pulumi destroy --yes --stack dev diff --git a/infra/readme.md b/infra/readme.md new file mode 100644 index 0000000..1b51747 --- /dev/null +++ b/infra/readme.md @@ -0,0 +1,56 @@ +## BioGPT on Google Cloud Platform using Pulumi + +### Requirements + +- Python 3 +- Pulumi, https://www.pulumi.com/docs/get-started/install/ + +### Instructions + +1. Create a service account in Google Cloud Platform as follows: + + * Log in to the Google Cloud Console (console.cloud.google.com) + * Select the project in which you want to create a service account + * Click on the "IAM & Admin" option in the left-hand menu + * Click on "Service Accounts" in the left-hand menu + * Click the "Create Service Account" button + * Enter a name for the service account + * Select "Editor" role for the service account + * Select "Furnish a new private key" option and choose JSON + * Click "Create" to create the service account + * Once you have created the service account, you will be prompted to download the private key file + +2. Rename service account private key file to `gcp.json` and place it inside the `/infra` directory +3. Rename `.sample.env` to `.env` and edit its contents +4. Execute in your terminal `./start.sh` to: + + * Enable Google Cloud Services + * Build and push a Docker image to Google Container Registry + * Spin up a Kubernetes cluster running a K80 GPU + * Install NVIDIA driver into Kubernetes cluster + * Launch the BioGPT Kubernetes deployment + * Expose BioGPT to the public internet using a Kubernetes Service + +### How to use + +Once `./start.sh` finishes running it will output `load_balancer_ip`, for example: `load_balancer_ip: "34.172.48.137"`. Use the IP provided to query BioGPT. + +Parameters: +- text (required): The text you want to send as a query to BioGPT +- min_len (optional, default: 100): The minimum length of the generated response +- max_len_b (optional, default: 1024): The maximum length of the generated response +- beam (optional, default: 5) + +For example: `http://34.172.48.137:5000/?text=Your_Query_Here&min_len=100&max_len_b=1024&beam=5`. Replace `Your_Query_Here` with your desired query text, and adjust the values for `min_len`, `max_len_b`, and `beam` as needed. + +Remember to URL-encode the text parameter if it contains special characters or spaces. For example, you can replace spaces with `%20`. + +### Delete cluster and revert all changes + +To delete the cluster and revert all changes, execute in your terminal: `./destroy.sh`. + +### Support + +If you like this project and find it useful, please consider giving it a star. Your support is appreciated! :hearts: + +If you have any questions or suggestions, feel free to reach out to Carlos at calufa@gmail.com or connecting on LinkedIn: https://www.linkedin.com/in/carloschinchilla/. diff --git a/infra/requirements.txt b/infra/requirements.txt new file mode 100644 index 0000000..a3d79cd --- /dev/null +++ b/infra/requirements.txt @@ -0,0 +1,5 @@ +pulumi==3.64.0 +pulumi-gcp==6.54.0 +pulumi-docker==4.1.2 +pulumi-kubernetes==3.25.0 +google-auth==2.17.3 diff --git a/infra/server.py b/infra/server.py new file mode 100644 index 0000000..98ea459 --- /dev/null +++ b/infra/server.py @@ -0,0 +1,33 @@ +from flask import Flask, request, jsonify +from fairseq.models.transformer_lm import TransformerLanguageModel + + +app = Flask(__name__) + + +@app.route("/", methods=["GET"]) +def index(): + min_len = request.args.get("min_len", 100) + max_len_b = request.args.get("max_len_b", 1024) + beam = request.args.get("beam", 5) + text = request.args["text"] + + model = TransformerLanguageModel.from_pretrained( + "checkpoints/Pre-trained-BioGPT", + "checkpoint.pt", + "data", + tokenizer="moses", + bpe="fastbpe", + bpe_codes="data/bpecodes", + min_len=min_len, + max_len_b=max_len_b, + ) + model.cuda() + src_tokens = model.encode(text) + generate = model.generate([src_tokens], beam=beam)[0] + outputs = model.decode(generate[0]["tokens"]) + return jsonify(outputs) + + +if __name__ == "__main__": + app.run(host="0.0.0.0", port=80, debug=True) diff --git a/infra/start.sh b/infra/start.sh new file mode 100755 index 0000000..375db3b --- /dev/null +++ b/infra/start.sh @@ -0,0 +1,9 @@ +source .env +cp -r ../data . +pulumi config set name $NAME --stack dev +pulumi config set project $PROJECT --stack dev +pulumi config set region $REGION --stack dev +pulumi config set node_count $NODE_COUNT --stack dev +pulumi config set machine_type $MACHINE_TYPE --stack dev +pulumi config set replicas $REPLICAS --stack dev +pulumi up --yes --stack dev