deepjavalibrary · ethnzhng · Oct 30, 2025 · Oct 27, 2025 · Oct 30, 2025
@@ -17,7 +17,7 @@ Please delete options that are not relevant.
 ## Checklist:
 - [ ] Please add the link of [**Integration Tests Executor** run](https://github.com/deepjavalibrary/djl-serving/actions/workflows/integration_execute.yml) with related tests.
 - [ ] Have you [manually built the docker image](https://github.com/deepjavalibrary/djl-serving/blob/master/serving/docker/README.md#build-docker-image) and verify the change?
-- [ ] Have you run related tests? Check [how to set up the test environment here](https://github.com/deepjavalibrary/djl-serving/blob/master/.github/workflows/integration_execute.yml#L72); One example would be `pytest tests.py -k "TestCorrectnessLmiDist"  -m "lmi_dist"`
+- [ ] Have you run related tests? Check [how to set up the test environment here](https://github.com/deepjavalibrary/djl-serving/blob/master/.github/workflows/integration_execute.yml#L72)
 - [ ] Have you added tests that prove your fix is effective or that this feature works?
 - [ ] Has code been commented, particularly in hard-to-understand areas?
 - [ ] Have you made corresponding changes to the documentation?

@@ -29,83 +29,6 @@ jobs:
     outputs:
       p4d_instance_id: ${{ steps.create_gpu_p4d.outputs.action_lmic_p4d_instance_id }}
 
-  lmi-dist-test:
-    if: contains(fromJson('["", "aiccl"]'), github.event.inputs.run_test)
-    runs-on: [ self-hosted, p4d ]
-    timeout-minutes: 120
-    needs: create-runners-p4d
-    steps:
-      - uses: actions/checkout@v4
-      - name: Clean env
-        run: |
-          yes | docker system prune -a --volumes
-          sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
-          echo "wait dpkg lock..."
-          while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
-      - name: Set up Python3
-        uses: actions/setup-python@v5
-        with:
-          python-version: '3.10.x'
-      - name: Install pip dependencies
-        run: pip3 install pytest requests "numpy<2" pillow huggingface_hub tqdm
-      - name: Build container name
-        run: ./serving/docker/scripts/docker_name_builder.sh lmi ${{ github.event.inputs.djl-version }}
-      - name: Download models and dockers
-        working-directory: tests/integration
-        run: |
-          docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
-      - name: Test Mixtral-8x7B
-        working-directory: tests/integration
-        run: |
-          rm -rf models
-          python3 llm/prepare.py lmi_dist_aiccl mixtral-8x7b-aiccl
-          ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
-          serve
-          python3 llm/client.py lmi_dist_aiccl mixtral-8x7b-aiccl
-          ./remove_container.sh
-      - name: Test Llama-2-70B
-        working-directory: tests/integration
-        run: |
-          rm -rf models
-          python3 llm/prepare.py lmi_dist_aiccl llama-2-70b-aiccl
-          ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
-          serve
-          python3 llm/client.py lmi_dist_aiccl llama-2-70b-aiccl
-          ./remove_container.sh
-      - name: Test codellama/CodeLlama-34b-hf
-        working-directory: tests/integration
-        run: |
-          rm -rf models
-          python3 llm/prepare.py lmi_dist_aiccl codellama-34b-aiccl
-          ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
-          serve
-          python3 llm/client.py lmi_dist_aiccl codellama-34b-aiccl
-          ./remove_container.sh
-      - name: Test tiiuae/falcon-40b
-        working-directory: tests/integration
-        run: |
-          rm -rf models
-          python3 llm/prepare.py lmi_dist_aiccl falcon-40b-aiccl
-          ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
-          serve
-          python3 llm/client.py lmi_dist_aiccl falcon-40b-aiccl
-          ./remove_container.sh
-      - name: Remove models dir
-        working-directory: tests/integration
-        run: |
-          sudo rm -rf models
-      - name: On fail step
-        if: ${{ failure() }}
-        working-directory: tests/integration
-        run: |
-          sudo rm -rf models
-          ./remove_container.sh || true
-          cat logs/serving.log
-      - name: Upload test logs
-        uses: actions/upload-artifact@v4
-        with:
-          name: lmi-dist-aiccl-logs
-          path: tests/integration/logs/
 
   trtllm-test:
     runs-on: [ self-hosted, p4d ]
@@ -228,7 +151,7 @@ jobs:
   stop-runners-p4d:
     if: always()
     runs-on: [ self-hosted, scheduler ]
-    needs: [ create-runners-p4d, lmi-dist-test, trtllm-test, vllm-test ]
+    needs: [ create-runners-p4d, trtllm-test, vllm-test ]
     steps:
       - name: Stop all instances
         run: |

@@ -47,7 +47,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        engine: [lmi-dist, trtllm]
+        engine: [trtllm]
     steps:
       - uses: actions/checkout@v4
       - name: Set up Python3

@@ -23,7 +23,7 @@ def is_chat_completions_request(inputs: Dict) -> bool:
 def parse_mistral_chat_request_inputs(messages, tokenizer):
     # TODO: get rid of this mess of an integration
     # Mistral has their own tokenizer with custom tokenization logic for chat type requests
-    # This dependency is only available in vllm/lmi-dist, so we import it here as necessary
+    # This dependency is only available in vllm, so we import it here as necessary
     from mistral_common.protocol.instruct.request import ChatCompletionRequest
     chat_request = ChatCompletionRequest(messages=messages)
     # The tokenized object contains the converted prompt, token ids, and images
@@ -76,8 +76,8 @@ def parse_chat_completions_request(
         images.extend(message.get_images())
 
     # Less than ideal, but need a working solution for now
-    # is_mistral_tokenizer can only be true if lmi-dist or vllm
-    # mistral tokenization only works with these engines if we pass token ids directly, not text.
+    # is_mistral_tokenizer can only be true if vllm
+    # mistral tokenization only works with this engine if we pass token ids directly, not text.
     # every other use case is designed for the actual string prompt being provided...
     if is_mistral_tokenizer:
         text_inputs = parse_mistral_chat_request_inputs(messages, tokenizer)

@@ -166,7 +166,7 @@ def set_adapter_class(self):
     def can_use_continuous_batching(self) -> bool:
         """
         Set configuration for continuous batching, currently all vllm implementations are continuous batching
-        and batch size greater than 1 for tnx and lmi-dist support rolling batch.
+        and batch size greater than 1 for tnx support rolling batch.
 
         :return: bool indicating if continuous batching can be used
         """

@@ -64,9 +64,9 @@ def set_quantize_for_backward_compatibility(self):
             self.quantize = "bitsandbytes8"
 
         # TODO remove this after refactor of all handlers
-        # parsing bitsandbytes8, so it can be directly passed to lmi dist model loader.
+        # parsing bitsandbytes8, so it can be directly passed to vllm model loader.
         if self.quantize == "bitsandbytes8" \
-                and self.rolling_batch == RollingBatchEnum.lmidist:
+                and self.rolling_batch == RollingBatchEnum.vllm:
             self.quantize = "bitsandbytes"
         return self
 
@@ -123,9 +123,8 @@ def construct_kwargs_quantize(self):
             return self
 
         # TODO remove this after refactor of all handlers
-        # device map is not required for lmi dist and vllm
+        # device map is not required for vllm
         if self.rolling_batch in {
-                RollingBatchEnum.lmidist,
                 RollingBatchEnum.vllm,
         }:
             return self