Jupyterhub setup (#2)

npatta01 · web-flow · commit 05c46bf05ffd · 2022-07-09T17:21:59.000-04:00
- jupyterhub helm instructions setup
- fixed bug in timing notebook
- add collaborators section
diff --git a/.gitignore b/.gitignore
@@ -9,4 +9,8 @@ __pycache__
 *.zip
 notebooks/trainer_*/ 
 notebooks/wandb/
-.venv
+.venv
+workshop_infra/cert/*
+workshop_infra/config.yaml
+
+!/**/.gitkeep
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,34 @@
+#FROM jupyter/scipy-notebook:python-3.10.5
+
+FROM jupyter/scipy-notebook:python-3.8.8
+
+
+
+USER root
+
+
+RUN apt-get update && apt-get --yes install apt-utils && \
+    apt-get --yes install htop tmux graphviz openjdk-11-jre-headless curl && \
+    apt-get clean;
+
+
+# set the user back to original setting
+USER $NB_UID
+
+
+
+# Install from requirements.txt file
+COPY --chown=${NB_UID}:${NB_GID} requirements.txt /tmp/
+
+RUN pip install --no-cache-dir --requirement /tmp/requirements.txt && \
+    fix-permissions "${CONDA_DIR}" && \
+    fix-permissions "/home/${NB_USER}"
+
+
+#COPY --chown=${NB_UID}:${NB_GID} docker-setup.sh /tmp/
+
+COPY --chown=${NB_UID}:${NB_GID} setup.ipynb /tmp/
+
+RUN papermill /tmp/setup.ipynb /tmp/setup__out.ipynb -k python3 --log-output --log-level INFO --progress-bar && \
+    fix-permissions "${CONDA_DIR}" && \
+    fix-permissions "/home/${NB_USER}"
diff --git a/README.md b/README.md
@@ -52,7 +52,9 @@ Notebook that shows how to use TorchServe to serve models
 
 [![Watch the video](assets/slides_cover.png)](https://www.slideshare.net/nidhinpattaniyil/serving-bert-models-in-production-with-torchserve)
 
+## Video
 
+[![PyData Video](https://img.youtube.com/vi/sDGxzkOvxqY/0.jpg)](https://www.youtube.com/watch?v=sDGxzkOvxqY&ab_channel=PyData)
 
 
 ## References
@@ -72,4 +74,8 @@ For more reference, refer to this [paper](https://arxiv.org/abs/2110.06199)
 
 ## Contact
 
-For help or feedback, please reach out to [Nidhin Pattaniyil](https://www.linkedin.com/in/nidhinpattaniyil/) or [Adway Dhillon](https://www.linkedin.com/in/adwaydhillon/)
+For help or feedback, please reach out to :
+
+- [Nidhin Pattaniyil](https://www.linkedin.com/in/nidhinpattaniyil/)   
+- [Adway Dhillon](https://www.linkedin.com/in/adwaydhillon/)    
+- [Vishal Rathi](https://www.linkedin.com/in/vishalkumarrathi/)   
diff --git a/notebooks/02_timing.ipynb b/notebooks/02_timing.ipynb
@@ -320,7 +320,7 @@
    ],
    "source": [
     "%%timeit -r 3 -n 5\n",
-    "if torch.cuda.is_available:\n",
+    "if torch.cuda.is_available():\n",
     "    model_cuda = model.cuda()\n",
     "    model_cuda(res['input_ids'].cuda(),res['attention_mask'].cuda())"
    ]
@@ -408,7 +408,7 @@
    ],
    "source": [
     "%%timeit -r 3 -n 5\n",
-    "if torch.cuda.is_available:\n",
+    "if torch.cuda.is_available():\n",
     "    model_cuda = model.cuda()\n",
     "    model_cuda(res['input_ids'].cuda(),res['attention_mask'].cuda())"
    ]
@@ -496,7 +496,7 @@
    ],
    "source": [
     "%%timeit -r 3 -n 5\n",
-    "if torch.cuda.is_available:\n",
+    "if torch.cuda.is_available():\n",
     "    model_cuda = model.cuda()\n",
     "    model_cuda(res['input_ids'].cuda(),res['attention_mask'].cuda())"
    ]
diff --git a/notebooks/04_packaging.ipynb b/notebooks/04_packaging.ipynb
@@ -823,9 +823,9 @@
      "text": [
       "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
       "                                 Dload  Upload   Total   Spent    Left  Speed\n",
-      "\r",
-      "  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0\r",
-      "100   265  100   214  100    51   6154   1466 --:--:-- --:--:-- --:--:--  7571\r",
+      "\r\n",
+      "  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0\r\n",
+      "100   265  100   214  100    51   6154   1466 --:--:-- --:--:-- --:--:--  7571\r\n",
       "100   265  100   214  100    51   6140   1463 --:--:-- --:--:-- --:--:--  7571\n"
      ]
     }
@@ -845,7 +845,25 @@
    "id": "african-donna",
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "import requests"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aa92e2d6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "payload = {\"text\":\"herbal tea\",\"request_id\":\"test_id\"}\n",
+    "\n",
+    "endpoint = \"http://localhost:9080/predictions/pt_classifier\"\n",
+    "\n",
+    "res = requests.post(endpoint, json = payload)\n",
+    "\n",
+    "res.json()"
+   ]
   }
  ],
  "metadata": {
diff --git a/requirements.txt b/requirements.txt
@@ -9,3 +9,4 @@ torchviz==0.0.2
 scikit-learn==1.0
 plotly==5.8.*
 wandb==0.12.*
+papermill==2.3.*
diff --git a/requirements_new.txt b/requirements_new.txt
@@ -0,0 +1,12 @@
+torch==1.12.*
+torch-model-archiver==0.6.0
+torchmetrics==0.9.*
+torchserve==0.6.*
+torchvision==0.13.*
+datasets==2.3.*
+transformers==4.20.*
+torchviz==0.0.2
+scikit-learn==1.0.*
+plotly==5.9.*
+wandb==0.12.*
+papermill==2.3.*
diff --git a/setup.ipynb b/setup.ipynb
@@ -24,42 +24,25 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "homeless-announcement",
+   "cell_type": "markdown",
+   "id": "acd07905",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "%%bash\n",
-    "git clone https://github.com/npatta01/pytorch-serving-workshop.git -b main --depth 1"
+    "we are cloning to tmp for jupyterhub setup \n"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "spectacular-scott",
+   "id": "homeless-announcement",
    "metadata": {},
    "outputs": [],
    "source": [
     "%%bash\n",
-    "cd pytorch-serving-workshop\n",
-    "mkdir -p artifacts/dataset_processed/amazon\n",
-    "mkdir -p artifacts/dataset_processed/model\n",
-    "\n",
-    "\n",
-    "cd artifacts\n",
-    "\n",
     "\n",
-    "# dataset\n",
-    "echo \"downloading dataset\"\n",
-    "wget https://github.com/npatta01/pytorch-serving-workshop/releases/download/v0.0.1/dataset_processed.zip\n",
-    "unzip dataset_processed.zip\n",
-    "\n",
-    "\n",
-    "# model trained on above dataset\n",
-    "echo \"downloading model\"\n",
-    "wget https://github.com/npatta01/pytorch-serving-workshop/releases/download/v0.0.1/model.zip    \n",
-    "unzip model.zip\n"
+    "mkdir -p /tmp/workshop\n",
+    "cd /tmp/workshop\n",
+    "git clone https://github.com/npatta01/pytorch-serving-workshop.git -b main --depth 1"
    ]
   },
   {
@@ -77,64 +60,94 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!pip install -r pytorch-serving-workshop/requirements.txt"
+    "!pip install -r /tmp/workshop/pytorch-serving-workshop/requirements.txt"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "neither-shipping",
+   "cell_type": "markdown",
+   "id": "875c286c",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "source": [
+    "download processed dataset and model"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "checked-priest",
+   "id": "spectacular-scott",
    "metadata": {},
    "outputs": [],
    "source": [
-    "!sudo apt-get install htop tmux -y"
+    "%%bash\n",
+    "\n",
+    "cd /tmp/workshop\n",
+    "\n",
+    "cd pytorch-serving-workshop\n",
+    "mkdir -p artifacts/dataset_processed/amazon\n",
+    "mkdir -p artifacts/dataset_processed/model\n",
+    "\n",
+    "\n",
+    "cd artifacts\n",
+    "\n",
+    "\n",
+    "# dataset\n",
+    "echo \"downloading dataset\"\n",
+    "wget https://github.com/npatta01/pytorch-serving-workshop/releases/download/v0.0.1/dataset_processed.zip\n",
+    "unzip dataset_processed.zip\n",
+    "\n",
+    "\n",
+    "# model trained on above dataset\n",
+    "echo \"downloading model\"\n",
+    "wget https://github.com/npatta01/pytorch-serving-workshop/releases/download/v0.0.1/model.zip    \n",
+    "unzip model.zip\n"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "immune-institution",
+   "id": "0ce1ca35",
    "metadata": {},
    "source": [
-    "## Colabcode\n",
-    "Colab doesn't support multiple notebooks, so we use colabcode to setup vscode + ngrok"
+    "Download transformer models"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "insured-navigator",
+   "id": "9073adaa",
    "metadata": {},
    "outputs": [],
    "source": [
-    "!pip install colabcode==0.3.0"
+    "import transformers"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "capital-major",
+   "id": "c66c5e14",
    "metadata": {},
    "outputs": [],
    "source": [
-    "from colabcode import ColabCode\n"
+    "for model_name in [\"bert-large-uncased\",\"bert-base-uncased\",\"distilbert-base-uncased\"]:\n",
+    "    model = transformers.AutoModelForSequenceClassification.from_pretrained(model_name)\n",
+    "\n",
+    "    tokenizer = transformers.AutoTokenizer.from_pretrained(\n",
+    "                model_name\n",
+    "            )\n",
+    "\n",
+    "    query = \"men shoes\"\n",
+    "    res = tokenizer.encode_plus(query, return_tensors=\"pt\",  padding=\"max_length\", truncation=True)\n",
+    "\n",
+    "    model_res = model(**res)\n"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "operational-spotlight",
+   "id": "neither-shipping",
    "metadata": {},
    "outputs": [],
    "source": [
-    "ColabCode(port=10000, mount_drive=False)"
+    "!cp -r /tmp/workshop/ $HOME/workshop/"
    ]
   }
  ],
@@ -144,11 +157,6 @@
    "type": "gcloud",
    "uri": "gcr.io/deeplearning-platform-release/rapids-gpu.0-18:m65"
   },
-  "kernelspec": {
-   "display_name": "Python [conda env:pytorch]",
-   "language": "python",
-   "name": "conda-env-pytorch-py"
-  },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
diff --git a/workshop_infra/cert/.gitkeep b/workshop_infra/cert/.gitkeep
diff --git a/workshop_infra/config.enc.yaml b/workshop_infra/config.enc.yaml
diff --git a/workshop_infra/docker-setup.sh b/workshop_infra/docker-setup.sh
diff --git a/workshop_infra/setup.md b/workshop_infra/setup.md