Add debugging workflow (#7778)

DN6 · sayakpaul · web-flow · commit b1c5817a896f · 2024-04-29T13:44:39.000+05:30
add debug workflow

Co-authored-by: Sayak Paul &lt;spsayakpaul@gmail.com&gt;
diff --git a/.github/workflows/ssh-runner.yml b/.github/workflows/ssh-runner.yml
@@ -0,0 +1,55 @@
+name: SSH into runners
+
+on:
+  workflow_dispatch:
+    inputs:
+      runner_type:
+        description: 'Type of runner to test (a10 or t4)'
+        required: true
+      docker_image:
+        description: 'Name of the Docker image'
+        required: true
+
+env:
+  IS_GITHUB_CI: "1"
+  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
+  HF_HOME: /mnt/cache
+  DIFFUSERS_IS_CI: yes
+  OMP_NUM_THREADS: 8
+  MKL_NUM_THREADS: 8
+  RUN_SLOW: yes
+
+jobs:
+  ssh_runner:
+    name: "SSH"
+    runs-on: [single-gpu, nvidia-gpu, "${{ github.event.inputs.runner_type }}", ci]
+    container:
+      image: ${{ github.event.inputs.docker_image }}
+      options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+
+    steps:
+      - name: Update clone
+        working-directory: /diffusers
+        run: |
+          git fetch && git checkout ${{ github.sha }}
+      - name: Cleanup
+        working-directory: /diffusers
+        run: |
+          rm -rf tests/__pycache__
+          rm -rf tests/models/__pycache__
+          rm -rf reports
+      - name: Show installed libraries and their versions
+        working-directory: /diffusers
+        run: pip freeze
+
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+
+      - name: Tailscale # In order to be able to SSH when a test fails
+        uses: huggingface/tailscale-action@v1
+        with:
+          authkey: ${{ secrets.TAILSCALE_SSH_AUTHKEY }}
+          slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}
+          slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
+          waitForSSH: true