lemonade-sdk
diff --git a/‎.github/workflows/build_and_release.yml‎
Lines changed: 68 additions & 85 deletions b/‎.github/workflows/build_and_release.yml‎
Lines changed: 68 additions & 85 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 12 additions & 7 deletions b/‎CMakeLists.txt‎
Lines changed: 12 additions & 7 deletions
diff --git a/‎README.md‎
Lines changed: 13 additions & 16 deletions b/‎README.md‎
Lines changed: 13 additions & 16 deletions
diff --git a/‎include/ryzenai/inference_engine.h‎
Lines changed: 2 additions & 1 deletion b/‎include/ryzenai/inference_engine.h‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎include/ryzenai/types.h‎
Lines changed: 0 additions & 1 deletion b/‎include/ryzenai/types.h‎
Lines changed: 0 additions & 1 deletion
@@ -13,9 +13,9 @@ permissions:
   contents: write
 
 jobs:
-  build-ryzenai-server:
-    name: Build RyzenAI Server
-    runs-on: [rai-160-sdk, Windows]
+  build-and-test:
+    name: Build and Test RyzenAI Server
+    runs-on: [rai-170-sdk, Windows]
     steps:
       - uses: actions/checkout@v4
         with:
@@ -75,7 +75,7 @@ jobs:
           mkdir build
           cd build
           
-          # Configure - Ryzen AI should be at C:\Program Files\RyzenAI\1.6.0
+          # Configure - Ryzen AI should be at C:\Program Files\RyzenAI\1.7.0
           cmake .. -G "Visual Studio 17 2022" -A x64
           if ($LASTEXITCODE -ne 0) { 
               Write-Host "ERROR: CMake configuration failed!" -ForegroundColor Red
@@ -119,37 +119,6 @@ jobs:
           
           $fileCount = (Get-ChildItem $releaseDir -File | Measure-Object).Count
           Write-Host "`nFound $fileCount files in release directory" -ForegroundColor Green
-          Write-Host "Contents will be uploaded as artifact (GitHub will zip automatically)" -ForegroundColor Gray
-
-      - name: Upload RyzenAI Server Package
-        uses: actions/upload-artifact@v4
-        with:
-          name: ryzenai-server
-          path: |
-            build/bin/Release/*.exe
-            build/bin/Release/*.dll
-            build/bin/Release/*.pdb
-            build/bin/Release/AMD_LICENSE
-          retention-days: 7
-
-  test-ryzenai-server:
-    name: Test RyzenAI Server
-    needs: build-ryzenai-server
-    runs-on: [rai300_400, Windows]
-    strategy:
-      fail-fast: false
-      matrix:
-        mode: [cpu, npu, hybrid]
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          clean: true
-
-      - name: Download RyzenAI Server artifact
-        uses: actions/download-artifact@v4
-        with:
-          name: ryzenai-server
-          path: build/bin/Release
 
       - name: Set up Python
         uses: actions/setup-python@v5
@@ -164,92 +133,106 @@ jobs:
           python -m pip install -r test/requirements.txt
           Write-Host "Test dependencies installed!" -ForegroundColor Green
 
-      - name: Download model checkpoint
+      - name: Test all modes
         shell: PowerShell
         run: |
           $ErrorActionPreference = "Stop"
           
           # Set HF_HOME to local directory
           $env:HF_HOME = "${{ github.workspace }}/hf_home"
-          Write-Host "HF_HOME set to: $env:HF_HOME" -ForegroundColor Cyan
           
-          # Create directory if it doesn't exist
           if (-not (Test-Path $env:HF_HOME)) {
               New-Item -ItemType Directory -Path $env:HF_HOME -Force | Out-Null
           }
           
           # Model mapping based on mode
           $modelMap = @{
-              "npu" = "amd/Qwen2.5-3B-Instruct-onnx-ryzenai-npu"
-              "hybrid" = "amd/Qwen2.5-3B-Instruct-onnx-ryzenai-hybrid"
+              "npu" = "amd/Llama-3.2-1B-Instruct-onnx-ryzenai-npu"
+              "hybrid" = "amd/Qwen2.5-0.5B-Instruct-onnx-ryzenai-1.7-hybrid"
               "cpu" = "amd/Qwen2.5-0.5B-Instruct-quantized_int4-float16-cpu-onnx"
           }
           
-          $mode = "${{ matrix.mode }}"
-          $modelName = $modelMap[$mode]
-          
-          Write-Host "Downloading model for $mode mode: $modelName" -ForegroundColor Cyan
+          $modes = @("cpu", "npu", "hybrid")
+          $failed = @()
           
-          # Download the model using huggingface_hub
-          python -c @"
+          foreach ($mode in $modes) {
+              $modelName = $modelMap[$mode]
+              $logDir = "${{ github.workspace }}/test_logs"
+              
+              Write-Host "`n============================================================" -ForegroundColor Cyan
+              Write-Host "  Testing mode: $mode ($modelName)" -ForegroundColor Cyan
+              Write-Host "============================================================`n" -ForegroundColor Cyan
+              
+              # Download the model
+              Write-Host "Downloading model..." -ForegroundColor Cyan
+              python -c @"
           import os
           os.environ['HF_HOME'] = r'${{ github.workspace }}/hf_home'
-          
           from huggingface_hub import snapshot_download
-          
           model_name = '$modelName'
           print(f'Downloading {model_name}...')
-          
-          local_path = snapshot_download(
-              repo_id=model_name,
-              local_dir=None,  # Use default HF cache
-          )
-          
+          local_path = snapshot_download(repo_id=model_name, local_dir=None)
           print(f'Model downloaded to: {local_path}')
           "@
-          
-          if ($LASTEXITCODE -ne 0) {
-              Write-Host "ERROR: Failed to download model!" -ForegroundColor Red
-              exit $LASTEXITCODE
+              
+              if ($LASTEXITCODE -ne 0) {
+                  Write-Host "ERROR: Failed to download model for $mode!" -ForegroundColor Red
+                  $failed += $mode
+                  continue
+              }
+              
+              # Run tests
+              Write-Host "Running tests for $mode..." -ForegroundColor Cyan
+              python test/test_server.py --mode $mode --server-exe build/bin/Release/ryzenai-server.exe --log-dir $logDir
+              
+              if ($LASTEXITCODE -ne 0) {
+                  Write-Host "ERROR: Tests failed for mode $mode!" -ForegroundColor Red
+                  $failed += $mode
+              } else {
+                  Write-Host "All tests passed for mode: $mode" -ForegroundColor Green
+              }
           }
           
-          Write-Host "Model download complete!" -ForegroundColor Green
-
-      - name: Run verification tests
-        shell: PowerShell
-        run: |
-          $ErrorActionPreference = "Stop"
-          
-          # Set HF_HOME
-          $env:HF_HOME = "${{ github.workspace }}/hf_home"
-          
-          $mode = "${{ matrix.mode }}"
-          $logDir = "${{ github.workspace }}/test_logs"
-          
-          Write-Host "Running verification tests for mode: $mode" -ForegroundColor Cyan
-          Write-Host "Log directory: $logDir" -ForegroundColor Gray
-          
-          # Run the test script with log directory
-          python test/test_server.py --mode $mode --server-exe build/bin/Release/ryzenai-server.exe --log-dir $logDir
-          
-          if ($LASTEXITCODE -ne 0) {
-              Write-Host "ERROR: Tests failed for mode $mode!" -ForegroundColor Red
-              exit $LASTEXITCODE
+          # Summary
+          Write-Host "`n============================================================" -ForegroundColor Cyan
+          Write-Host "  Test Summary" -ForegroundColor Cyan
+          Write-Host "============================================================" -ForegroundColor Cyan
+          foreach ($mode in $modes) {
+              if ($failed -contains $mode) {
+                  Write-Host "  $mode : FAILED" -ForegroundColor Red
+              } else {
+                  Write-Host "  $mode : PASSED" -ForegroundColor Green
+              }
           }
+          Write-Host "============================================================`n" -ForegroundColor Cyan
           
-          Write-Host "All tests passed for mode: $mode" -ForegroundColor Green
+          if ($failed.Count -gt 0) {
+              Write-Host "ERROR: $($failed.Count) mode(s) failed: $($failed -join ', ')" -ForegroundColor Red
+              exit 1
+          }
 
       - name: Upload server logs on failure
         if: failure()
         uses: actions/upload-artifact@v4
         with:
-          name: server-logs-${{ matrix.mode }}
+          name: server-logs
           path: ${{ github.workspace }}/test_logs/
           retention-days: 7
 
+      - name: Upload RyzenAI Server Package
+        uses: actions/upload-artifact@v4
+        with:
+          name: ryzenai-server
+          path: |
+            build/bin/Release/*.exe
+            build/bin/Release/*.dll
+            build/bin/Release/*.pdb
+            build/bin/Release/AMD_LICENSE
+          retention-days: 7
+
   create-release:
     name: Create GitHub Release
-    needs: [build-ryzenai-server, test-ryzenai-server]
+    needs: build-and-test
     runs-on: ubuntu-latest
     if: startsWith(github.ref, 'refs/tags/v')
     steps:
@@ -295,12 +278,12 @@ jobs:
             
             - Windows 11 (64-bit)
             - AMD Ryzen AI 300-series processor
-            - Ryzen AI Software 1.6.0 with LLM patch
+            - Ryzen AI Software 1.7.0
             
             ### Usage
             
             ```cmd
-            ryzenai-server.exe -m C:\path\to\onnx\model --mode hybrid
+            ryzenai-server.exe -m C:\path\to\onnx\model
             ```
             
             See the [README](https://github.com/lemonade-sdk/ryzenai-server#readme) for full documentation.
 
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.20)
-project(ryzenai-server VERSION 1.0.2 LANGUAGES CXX)
+project(ryzenai-server VERSION 1.7.0 LANGUAGES CXX)
 
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
@@ -17,10 +17,10 @@ message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
 find_package(Threads REQUIRED)
 
 # ONNX Runtime GenAI
-set(OGA_ROOT "C:/Program Files/RyzenAI/1.6.0" CACHE PATH "Path to Ryzen AI installation")
+set(OGA_ROOT "C:/Program Files/RyzenAI/1.7.0" CACHE PATH "Path to Ryzen AI installation")
 
 if(NOT EXISTS ${OGA_ROOT})
-    message(FATAL_ERROR "Ryzen AI not found at ${OGA_ROOT}. Please install Ryzen AI 1.6.0 or set OGA_ROOT")
+    message(FATAL_ERROR "Ryzen AI not found at ${OGA_ROOT}. Please install Ryzen AI 1.7.0 or set OGA_ROOT")
 endif()
 
 message(STATUS "Using Ryzen AI from: ${OGA_ROOT}")
@@ -95,8 +95,11 @@ target_link_libraries(ryzenai-server PRIVATE
     Threads::Threads
 )
 
-# Enable multi-threading for cpp-httplib
-target_compile_definitions(ryzenai-server PRIVATE CPPHTTPLIB_THREAD_POOL_COUNT=8)
+# Pass version info from CMake to C++ as compile definitions
+target_compile_definitions(ryzenai-server PRIVATE
+    CPPHTTPLIB_THREAD_POOL_COUNT=8
+    RYZENAI_SERVER_VERSION="${PROJECT_VERSION}"
+)
 
 # Windows-specific settings
 if(WIN32)
@@ -110,13 +113,15 @@ if(WIN32)
         "${OGA_ROOT}/deployment/onnxruntime.dll"
         "${OGA_ROOT}/deployment/onnxruntime_providers_shared.dll"
         "${OGA_ROOT}/deployment/onnxruntime_providers_vitisai.dll"
+        "${OGA_ROOT}/deployment/onnxruntime_providers_ryzenai.dll"
         "${OGA_ROOT}/deployment/onnxruntime_vitisai_ep.dll"
         "${OGA_ROOT}/deployment/onnxruntime_vitis_ai_custom_ops.dll"
         "${OGA_ROOT}/deployment/onnx_custom_ops.dll"
-        "${OGA_ROOT}/deployment/abseil_dll.dll"
-        "${OGA_ROOT}/deployment/libutf8_validity.dll"
         "${OGA_ROOT}/deployment/dyn_dispatch_core.dll"
         "${OGA_ROOT}/deployment/DirectML.dll"
+        "${OGA_ROOT}/deployment/D3D12Core.dll"
+        "${OGA_ROOT}/deployment/flexmlrt.dll"
+        "${OGA_ROOT}/deployment/cert_dtrace.dll"
         "${OGA_ROOT}/deployment/zlib.dll"
         "${OGA_ROOT}/deployment/zstd.dll"
         "${OGA_ROOT}/deployment/ryzenai_onnx_utils.dll"
 
@@ -24,9 +24,8 @@ This server enables running Large Language Models on AMD Ryzen AI 300-series pro
 - Windows 11 (64-bit)
 - Visual Studio 2022
 - CMake 3.20 or higher
-- **Ryzen AI Software 1.6.0** with LLM patch
-  - Base installation must be at `C:\Program Files\RyzenAI\1.6.0`
-  - LLM patch must be applied on top of base installation
+- **Ryzen AI Software 1.7.0**
+  - Default installation path: `C:\Program Files\RyzenAI\1.7.0`
   - Download from: https://ryzenai.docs.amd.com
 
 **Hardware Requirements:**
@@ -67,7 +66,7 @@ All necessary Ryzen AI DLLs are automatically copied to the output directory dur
 If Ryzen AI is installed in a custom location:
 
 ```cmd
-cmake .. -G "Visual Studio 17 2022" -A x64 -DOGA_ROOT="C:\custom\path\to\RyzenAI\1.6.0"
+cmake .. -G "Visual Studio 17 2022" -A x64 -DOGA_ROOT="C:\custom\path\to\RyzenAI\1.7.0"
 ```
 
 ## Code Structure
@@ -133,7 +132,7 @@ ryzenai-server/
 
 **Inference Engine:** Wraps ONNX Runtime GenAI API, managing model loading, generation parameters, and streaming callbacks. Applies chat templates and handles tool call extraction.
 
-**Execution Providers:** Supports three modes:
+**Execution Providers:** Supports three modes (auto-detected from model config):
 - **Hybrid**: NPU + iGPU
 - **NPU**: Pure NPU execution
 - **CPU**: CPU-only fallback
@@ -153,14 +152,11 @@ These dependencies must be manually installed by the developer:
 ### Starting the Server
 
 ```cmd
-# Specify NPU mode
-ryzenai-server.exe -m C:\path\to\onnx\model --mode npu
+# Start the server (execution mode is auto-detected from the model)
+ryzenai-server.exe -m C:\path\to\onnx\model
 
-# Hybrid mode with custom port
-ryzenai-server.exe -m C:\path\to\onnx\model --mode hybrid --port 8081
-
-# CPU mode
-ryzenai-server.exe -m C:\path\to\onnx\model --mode cpu
+# Custom port
+ryzenai-server.exe -m C:\path\to\onnx\model --port 8081
 
 # Verbose logging
 ryzenai-server.exe -m C:\path\to\onnx\model --verbose
@@ -171,12 +167,13 @@ ryzenai-server.exe -m C:\path\to\onnx\model --verbose
 - `-m, --model PATH` - Path to ONNX model directory (required)
 - `--host ADDRESS` - Server host address (default: 127.0.0.1)
 - `-p, --port PORT` - Server port (default: 8080)
-- `--mode MODE` - Execution mode: npu, hybrid, cpu (default: hybrid)
 - `-c, --ctx-size SIZE` - Context size in tokens (default: 2048)
 - `-t, --threads NUM` - Number of CPU threads (default: 4)
 - `-v, --verbose` - Enable verbose logging
 - `-h, --help` - Show help message
 
+The execution mode (NPU, Hybrid, or CPU) is automatically detected from the model's `genai_config.json` configuration.
+
 ### Model Requirements
 
 Models must be in ONNX format compatible with Ryzen AI. Required files:
@@ -206,7 +203,7 @@ Returns server status and Ryzen AI-specific information:
   "model": "phi-3-mini-4k-instruct",
   "execution_mode": "hybrid",
   "max_prompt_length": 4096,
-  "ryzenai_version": "1.6.0"
+  "ryzenai_version": "1.7.0"
 }
 ```
 
@@ -269,7 +266,7 @@ print(response.choices[0].message.content)
 
 **Check:**
 1. Model path is correct and contains required ONNX files
-2. Ryzen AI 1.6.0 is installed at the correct path
+2. Ryzen AI 1.7.0 is installed at the correct path
 3. NPU drivers are up to date (Windows Update)
 4. Model is compatible with your Ryzen AI version
 
@@ -278,7 +275,7 @@ print(response.choices[0].message.content)
 All required DLLs should be automatically copied during build. If you get DLL errors:
 1. Verify Ryzen AI is installed correctly
 2. Rebuild with `cmake --build . --config Release`
-3. Manually copy DLLs from `C:\Program Files\RyzenAI\1.6.0\deployment\` to the executable directory
+3. Manually copy DLLs from `C:\Program Files\RyzenAI\1.7.0\deployment\` to the executable directory
 
 ### Port Already in Use
 
 
@@ -25,7 +25,7 @@ struct CompletionTimingData {
 
 class InferenceEngine {
 public:
-    InferenceEngine(const std::string& model_path, const std::string& mode);
+    InferenceEngine(const std::string& model_path);
     ~InferenceEngine();
 
     // Synchronous completion
@@ -57,6 +57,7 @@ class InferenceEngine {
     void setupExecutionProvider();
     void loadRaiConfig();
     std::string detectRyzenAIVersion();
+    std::string detectExecutionMode();
     std::string resolveModelPath(const std::string& path);
     std::vector<int32_t> truncatePrompt(const std::vector<int32_t>& input_ids);
     bool validateModelDirectory(const std::string& path);
 
@@ -13,7 +13,6 @@ struct CommandLineArgs {
     std::string model_path;           // -m, --model (required)
     std::string host = "127.0.0.1";   // --host
     int port = 8080;                  // --port
-    std::string mode = "hybrid";      // --mode (npu|hybrid|cpu)
     int ctx_size = 2048;              // --ctx-size
     int threads = 4;                  // --threads
     bool verbose = false;             // --verbose