xorbitsai · OliverBryant · Oct 22, 2025 · Oct 22, 2025 · Oct 22, 2025 · Oct 22, 2025
diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml
@@ -73,14 +73,21 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ "ubuntu-latest", "macos-13", "windows-latest" ]
-        python-version: [ "3.9", "3.10", "3.11", "3.12" ]
+        os: [ "ubuntu-latest", "macos-13", "windows-latest", "macos-latest" ]
+        python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13" ]
         module: [ "xinference" ]
         exclude:
           - { os: macos-13, python-version: 3.10 }
           - { os: macos-13, python-version: 3.11 }
+          - { os: macos-13, python-version: 3.12 }
+          - { os: macos-13, python-version: 3.13 }
+          - { os: macos-latest, python-version: 3.9 }
+          - { os: macos-latest, python-version: 3.10 }
+          - { os: macos-latest, python-version: 3.11 }
+          - { os: macos-latest, python-version: 3.12 }
           - { os: windows-latest, python-version: 3.10 }
           - { os: windows-latest, python-version: 3.11 }
+          - { os: windows-latest, python-version: 3.12 }
         include:
           - { os: self-hosted, module: gpu, python-version: 3.9}
           - { os: macos-latest, module: metal, python-version: "3.10" }
@@ -99,15 +106,21 @@ jobs:
           python-version: ${{ matrix.python-version }}
           activate-environment: ${{ env.CONDA_ENV }}
 
-      # Important for python == 3.12
+      # Important for python == 3.12 and 3.13
       - name: Update pip and setuptools
-        if: ${{ matrix.python-version == '3.12' }}
+        if: ${{ matrix.python-version == '3.12' || matrix.python-version == '3.13' }}
         run: |
           python -m pip install -U pip setuptools
 
+      # Install torch for Python 3.13 using nightly builds
+      - name: Install torch for Python 3.13
+        if: ${{ matrix.python-version == '3.13'}}
+        run: |
+          python -m pip install torch torchvision torchaudio
+
       - name: Install numpy
         if: |
-          (startsWith(matrix.os, 'macos') && (matrix.python-version == '3.12' || matrix.python-version == '3.9')) || 
+          (startsWith(matrix.os, 'macos') && (matrix.python-version == '3.13' || matrix.python-version == '3.9')) || 
           (startsWith(matrix.os, 'windows') && matrix.python-version == '3.9')
         run: |
           python -m pip install "numpy<2"
@@ -139,7 +152,9 @@ jobs:
             pip install "transformers<4.49"
             pip install attrdict
             pip install "timm>=0.9.16"
-            pip install torch torchvision
+            if [ "${{ matrix.python-version }}" != "3.13" ]; then
+              pip install torch torchvision
+            fi
             pip install accelerate
             pip install sentencepiece
             pip install transformers_stream_generator
@@ -158,9 +173,21 @@ jobs:
           fi
         working-directory: .
 
+      - name: Clean up disk
+        if: |
+          (startsWith(matrix.os, 'ubuntu'))
+        run: |
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /opt/ghc
+          sudo apt-get clean
+          sudo rm -rf /var/lib/apt/lists/*
+          df -h 
+
       - name: Test with pytest
         env:
           MODULE: ${{ matrix.module }}
+          PYTORCH_MPS_HIGH_WATERMARK_RATIO: 1.0
         run: |
           if [ "$MODULE" == "gpu" ]; then
             ${{ env.SELF_HOST_PYTHON }} -m pip install -U -e ".[audio]"
@@ -296,6 +323,7 @@ jobs:
               --ignore xinference/model/llm/sglang \
               --ignore xinference/client/tests/test_client.py \
               --ignore xinference/client/tests/test_async_client.py \
+              --ignore xinference/model/llm/mlx \
               xinference
 
           fi

diff --git a/xinference/core/tests/test_metrics.py b/xinference/core/tests/test_metrics.py
@@ -124,6 +124,7 @@ async def test_disable_metrics_exporter_server(disable_metrics, setup_cluster):
         requests.get(metrics_exporter_address)
 
 
+@pytest.mark.timeout(300)  # 5 minutes timeout to prevent hanging in Python 3.13
 async def test_metrics_exporter_data(setup_cluster):
     endpoint, metrics_exporter_address, supervisor_address = setup_cluster
 

diff --git a/xinference/model/embedding/tests/test_embedding_models.py b/xinference/model/embedding/tests/test_embedding_models.py
@@ -222,11 +222,25 @@ def test_register_custom_embedding():
 
 
 def test_register_fault_embedding():
+    import warnings
+
+    # Set up detailed logging - use print for debug since pytest captures logs
+    print("=== DEBUG: Starting test_register_fault_embedding ===")
+
     from ....constants import XINFERENCE_MODEL_DIR
     from .. import _install
 
-    os.makedirs(os.path.join(XINFERENCE_MODEL_DIR, "v2", "embedding"), exist_ok=True)
-    file_path = os.path.join(XINFERENCE_MODEL_DIR, "v2", "embedding/GTE.json")
+    # Debug: Show XINFERENCE_MODEL_DIR
+    embedding_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "embedding")
+    print(f"DEBUG: XINFERENCE_MODEL_DIR: {XINFERENCE_MODEL_DIR}")
+    print(f"DEBUG: Embedding dir: {embedding_dir}")
+
+    os.makedirs(embedding_dir, exist_ok=True)
+    file_path = os.path.join(embedding_dir, "GTE.json")
+
+    # Debug: Show file path
+    print(f"DEBUG: Test file path: {file_path}")
+
     data = {
         "model_name": "GTE",
         "model_hub": "huggingface",
@@ -244,14 +258,92 @@ def test_register_fault_embedding():
         ],
     }
 
+    # Debug: Show the data being written
+    print(f"DEBUG: Test data being written: {json.dumps(data, indent=2)}")
+
     with open(file_path, "w") as f:
         json.dump(data, f, indent=4)
 
-    with pytest.warns(UserWarning) as record:
+    # Debug: Verify file was created
+    print(f"DEBUG: File exists after writing: {os.path.exists(file_path)}")
+    if os.path.exists(file_path):
+        with open(file_path, "r") as f:
+            print(f"DEBUG: File content: {f.read()}")
+
+    # Capture all warnings
+    all_warnings = []
+
+    def custom_warning_handler(
+        message, category, filename, lineno, file=None, line=None
+    ):
+        warning_info = {
+            "message": str(message),
+            "category": category.__name__,
+            "filename": filename,
+            "lineno": lineno,
+        }
+        all_warnings.append(warning_info)
+        print(f"DEBUG: Warning captured: {warning_info}")
+
+    # Set up custom warning handler
+    old_showwarning = warnings.showwarning
+    warnings.showwarning = custom_warning_handler
+
+    try:
+        print("DEBUG: Starting _install() call...")
         _install()
-    assert any(
-        "Invalid model URI /new_data/cache/gte-Qwen2" in str(r.message) for r in record
-    )
+        print("DEBUG: _install() call completed.")
+
+        # Debug: Show all captured warnings
+        print(f"DEBUG: Total warnings captured: {len(all_warnings)}")
+        for i, warning in enumerate(all_warnings):
+            print(f"DEBUG: Warning {i+1}: {warning}")
+
+        # Restore original warning handler
+        warnings.showwarning = old_showwarning
+
+        # Now run with pytest.warns to capture the official warnings
+        with pytest.warns(UserWarning) as record:
+            _install()
+
+        print(f"DEBUG: pytest.warns captured {len(record)} warnings")
+        for i, warning in enumerate(record):
+            print(f"DEBUG: pytest warning {i+1}: {warning.message}")
+
+        # Check for warning message containing the invalid model URI error
+        # The warning format is: "{user_defined_embedding_dir}/{f} has error, {e}"
+        # where e contains the ValueError message
+        found_warning = False
+        for warning in record:
+            message = str(warning.message)
+            print(f"DEBUG: Checking warning message: {message}")
+            if (
+                "has error" in message
+                and (
+                    "Invalid model URI" in message
+                    or "Model URI cannot be a relative path" in message
+                )
+                and "/new_data/cache/gte-Qwen2" in message
+            ):
+                found_warning = True
+                print(f"DEBUG: Found matching warning: {message}")
+                break
+
+        assert (
+            found_warning
+        ), f"Expected warning about invalid model URI not found. Warnings: {[str(w.message) for w in record]}"
+
+    finally:
+        # Restore original warning handler
+        warnings.showwarning = old_showwarning
+
+    # Debug: Check if file still exists after test
+    print(f"DEBUG: File exists after test: {os.path.exists(file_path)}")
+
+    # Clean up
+    if os.path.exists(file_path):
+        os.remove(file_path)
+        print("DEBUG: Test file cleaned up")
 
 
 def test_convert_ids_to_tokens():