RunanywhereAI
diff --git a/‎examples/ios/RunAnywhereAI/RunAnywhereAI/App/RunAnywhereAIApp.swift‎
Lines changed: 11 additions & 0 deletions b/‎examples/ios/RunAnywhereAI/RunAnywhereAI/App/RunAnywhereAIApp.swift‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/Models/DemoLoRAAdapter.swift‎
Lines changed: 120 additions & 0 deletions b/‎examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/Models/DemoLoRAAdapter.swift‎
Lines changed: 120 additions & 0 deletions
diff --git a/‎examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel.swift‎
Lines changed: 150 additions & 0 deletions b/‎examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel.swift‎
Lines changed: 150 additions & 0 deletions
diff --git a/‎examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModelTypes.swift‎
Lines changed: 27 additions & 0 deletions b/‎examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModelTypes.swift‎
Lines changed: 27 additions & 0 deletions
@@ -192,6 +192,17 @@ struct RunAnywhereAIApp: App {
                 memoryRequirement: 600_000_000
             )
         }
+        // Qwen 2.5 1.5B - LoRA-compatible base model (has publicly available GGUF LoRA adapters)
+        // TODO: [Portal Integration] Remove once portal delivers model + adapter pairings
+        if let qwen15BURL = URL(string: "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q4_k_m.gguf") {
+            RunAnywhere.registerModel(
+                id: "qwen2.5-1.5b-instruct-q4_k_m",
+                name: "Qwen 2.5 1.5B Instruct Q4_K_M",
+                url: qwen15BURL,
+                framework: .llamaCpp,
+                memoryRequirement: 2_500_000_000
+            )
+        }
         if let lfm2Q4URL = URL(string: "https://huggingface.co/LiquidAI/LFM2-350M-GGUF/resolve/main/LFM2-350M-Q4_K_M.gguf") {
             RunAnywhere.registerModel(
                 id: "lfm2-350m-q4_k_m",
 
@@ -0,0 +1,120 @@
+//
+//  DemoLoRAAdapter.swift
+//  RunAnywhereAI
+//
+//  TODO: [Portal Integration] Remove this entire file once adapters are delivered OTA from portal.
+//
+//  =========================================================================================
+//  LoRA Demo Integration Guide
+//  =========================================================================================
+//
+//  WHAT THIS IS
+//  ------------
+//  This file provides a temporary, hardcoded LoRA adapter catalog so we can verify that the
+//  full LoRA pipeline works end-to-end on iOS: download adapter OTA -> apply to model -> generate.
+//  Once the RunAnywhere portal delivers adapter catalogs via its API, this file should be deleted
+//  and replaced with the portal-provided data.
+//
+//
+//  WHY QWEN 2.5 1.5B WAS CHOSEN
+//  -----------------------------
+//  LoRA adapters are architecture-specific: an adapter trained on Model A cannot be used with
+//  Model B, even if they're the same parameter count. We needed a base model + GGUF LoRA adapter
+//  pair that is publicly available and proven to work with llama.cpp.
+//
+//  - SmolLM2 360M:   No GGUF LoRA adapters exist anywhere (no one has published a fine-tune).
+//  - Qwen 2.5 0.5B:  No matching adapter (smallest ggml-org adapter is for 1.5B).
+//  - LFM2 350M:      No LoRA adapters exist. LFM2.5-1.2B adapters are architecturally incompatible
+//                     with the LFM2-1.2B-Tool model in the app (different model version).
+//  - Qwen 2.5 1.5B:  ggml-org (the llama.cpp team) publishes a tested, GGUF-format "abliterated"
+//                     LoRA adapter (~374MB). This is the smallest proven pair available.
+//
+//  The Qwen 2.5 1.5B base model is registered in RunAnywhereAIApp.swift (~986MB Q4_K_M GGUF).
+//
+//
+//  CONTEXT SIZE & MEMORY (C++ CHANGE)
+//  ----------------------------------
+//  Qwen 2.5 1.5B has 1.5B parameters and a 128K training context. The C++ llama.cpp backend
+//  uses adaptive context sizing based on model size:
+//
+//    >= 7B params  -> 2048 context   (fits ~6GB GPU memory)
+//    >= 3B params  -> 4096 context
+//    >= 1B params  -> 2048 context   (** we added this tier **)
+//    <  1B params  -> 8192 context   (tiny models, plenty of headroom)
+//
+//  Without the 1-3B tier, the 1.5B model got 8192 context -> 4,748 MB compute buffer -> OOM crash.
+//  Even at 4096, applying the F16 LoRA adapter pushed the compute buffer to 2,399 MB -> OOM.
+//  At 2048 context, total runtime memory is ~2.5GB (weights + KV cache + LoRA + compute), which
+//  fits on 6GB+ iPhones (iPhone 14 and newer).
+//
+//  This change lives in: sdk/runanywhere-commons/src/backends/llamacpp/llamacpp_backend.cpp
+//  (search for "Small-medium model detected")
+//
+//
+//  LORA SCALE NOTE
+//  ---------------
+//  The demo adapter is F16 (full precision) applied to a Q4_K_M (4-bit quantized) base model.
+//  At scale 1.0, this causes numerical instability -> gibberish output. Scale 0.3 is the tested
+//  sweet spot: coherent output with observable behavior change. The UI slider still allows
+//  adjustment (0.0 - 2.0) for experimentation.
+//
+//
+//  PORTAL INTEGRATION CHECKLIST
+//  ----------------------------
+//  When the portal delivers LoRA adapters OTA, do the following:
+//
+//  1. DELETE this file (DemoLoRAAdapter.swift)
+//  2. DELETE the Qwen 2.5 1.5B model registration in RunAnywhereAIApp.swift
+//     (search for "qwen2.5-1.5b-instruct-q4_k_m" and the TODO above it)
+//  3. In LLMViewModel.swift, REPLACE the demo adapter state & download logic
+//     (search for "TODO: [Portal Integration]") with portal API calls
+//  4. In ChatInterfaceView.swift, UPDATE the "Available for This Model" section
+//     in LoRAManagementSheetView to use portal-provided adapter data
+//     (search for "TODO: [Portal Integration]")
+//  5. The SDK-level LoRA API (RunAnywhere+LoRA.swift, CppBridge+LLM.swift) stays unchanged --
+//     it takes a local file path + scale, which is the same regardless of how the file got there
+//
+//  =========================================================================================
+
+import Foundation
+
+// MARK: - Demo LoRA Adapter Registry
+
+/// Represents a pre-registered LoRA adapter available for OTA download.
+/// TODO: [Portal Integration] Replace with portal-provided adapter catalog model.
+struct DemoLoRAAdapter: Identifiable, Sendable {
+    let id: String
+    let name: String
+    let description: String
+    let downloadURL: URL
+    let fileName: String
+    let compatibleModelIds: Set<String>
+    let fileSize: Int64
+    let defaultScale: Float
+
+    var fileSizeFormatted: String {
+        ByteCountFormatter.string(fromByteCount: fileSize, countStyle: .file)
+    }
+}
+
+// MARK: - Demo Adapter Catalog
+
+/// TODO: [Portal Integration] Remove once adapters are delivered OTA from portal.
+enum DemoLoRAAdapterCatalog {
+    static let adapters: [DemoLoRAAdapter] = [
+        DemoLoRAAdapter(
+            id: "qwen2.5-1.5b-abliterated-lora",
+            name: "Abliterated (Uncensored)",
+            description: "Removes refusal behavior from Qwen2.5-1.5B. From ggml-org.",
+            downloadURL: URL(string: "https://huggingface.co/ggml-org/LoRA-Qwen2.5-1.5B-Instruct-abliterated-F16-GGUF/resolve/main/LoRA-Qwen2.5-1.5B-Instruct-abliterated-f16.gguf")!,
+            fileName: "LoRA-Qwen2.5-1.5B-Instruct-abliterated-f16.gguf",
+            compatibleModelIds: ["qwen2.5-1.5b-instruct-q4_k_m"],
+            fileSize: 374_000_000,
+            defaultScale: 0.3
+        )
+    ]
+
+    static func adapters(forModelId modelId: String) -> [DemoLoRAAdapter] {
+        adapters.filter { $0.compatibleModelIds.contains(modelId) }
+    }
+}
@@ -33,6 +33,19 @@ final class LLMViewModel {
     private(set) var modelSupportsStreaming = true
     private(set) var currentConversation: Conversation?
 
+    // MARK: - LoRA Adapter State
+
+    private(set) var loraAdapters: [LoRAAdapterInfo] = []
+    private(set) var isLoadingLoRA = false
+
+    // MARK: - LoRA Adapter Download State
+    // TODO: [Portal Integration] Remove demo adapter download state once portal delivers adapters OTA.
+
+    private(set) var availableDemoAdapters: [DemoLoRAAdapter] = []
+    private(set) var adapterDownloadProgress: [String: Double] = [:]
+    private(set) var downloadedAdapterPaths: [String: String] = [:]
+    private(set) var isDownloadingAdapter: [String: Bool] = [:]
+
     // MARK: - User Settings
 
     var currentInput = ""
@@ -308,6 +321,142 @@ final class LLMViewModel {
         clearChat()
     }
 
+    // MARK: - LoRA Adapter Management
+
+    func loadLoraAdapter(path: String, scale: Float) async {
+        isLoadingLoRA = true
+        error = nil
+        do {
+            try await RunAnywhere.loadLoraAdapter(LoRAAdapterConfig(path: path, scale: scale))
+            await refreshLoraAdapters()
+            logger.info("LoRA adapter loaded: \(path) (scale=\(scale))")
+        } catch {
+            logger.error("Failed to load LoRA adapter: \(error)")
+            self.error = error
+        }
+        isLoadingLoRA = false
+    }
+
+    func removeLoraAdapter(path: String) async {
+        do {
+            try await RunAnywhere.removeLoraAdapter(path)
+            await refreshLoraAdapters()
+        } catch {
+            logger.error("Failed to remove LoRA adapter: \(error)")
+            self.error = error
+        }
+    }
+
+    func clearLoraAdapters() async {
+        do {
+            try await RunAnywhere.clearLoraAdapters()
+            loraAdapters = []
+        } catch {
+            logger.error("Failed to clear LoRA adapters: \(error)")
+            self.error = error
+        }
+    }
+
+    func refreshLoraAdapters() async {
+        do {
+            loraAdapters = try await RunAnywhere.getLoadedLoraAdapters()
+        } catch {
+            logger.error("Failed to refresh LoRA adapters: \(error)")
+        }
+    }
+
+    // MARK: - Demo LoRA Adapter Download
+    // TODO: [Portal Integration] Remove demo adapter download logic once portal delivers adapters OTA.
+
+    /// Refreshes the list of available demo adapters for the currently loaded model.
+    func refreshAvailableDemoAdapters() {
+        guard let modelId = ModelListViewModel.shared.currentModel?.id else {
+            availableDemoAdapters = []
+            return
+        }
+        availableDemoAdapters = DemoLoRAAdapterCatalog.adapters(forModelId: modelId)
+        syncDownloadedAdapterPaths()
+    }
+
+    /// Checks if a demo adapter's file already exists on disk.
+    func isAdapterDownloaded(_ adapter: DemoLoRAAdapter) -> Bool {
+        downloadedAdapterPaths[adapter.id] != nil
+    }
+
+    /// Returns the local file path for a downloaded adapter, or nil.
+    func localPath(for adapter: DemoLoRAAdapter) -> String? {
+        downloadedAdapterPaths[adapter.id]
+    }
+
+    /// Downloads a demo adapter from its URL, then loads it.
+    func downloadAndLoadAdapter(_ adapter: DemoLoRAAdapter, scale: Float) async {
+        guard isDownloadingAdapter[adapter.id] != true else { return }
+
+        isDownloadingAdapter[adapter.id] = true
+        adapterDownloadProgress[adapter.id] = 0.0
+        error = nil
+
+        do {
+            let localPath: String
+            if let existing = downloadedAdapterPaths[adapter.id] {
+                localPath = existing
+            } else {
+                localPath = try await downloadAdapter(adapter)
+            }
+            await loadLoraAdapter(path: localPath, scale: scale)
+        } catch {
+            logger.error("Failed to download/load adapter \(adapter.id): \(error)")
+            self.error = error
+        }
+
+        isDownloadingAdapter[adapter.id] = false
+        adapterDownloadProgress[adapter.id] = nil
+    }
+
+    /// Downloads the adapter file to the LoRA directory.
+    private func downloadAdapter(_ adapter: DemoLoRAAdapter) async throws -> String {
+        let loraDir = Self.loraDownloadDirectory()
+        try FileManager.default.createDirectory(at: loraDir, withIntermediateDirectories: true)
+        let destinationURL = loraDir.appendingPathComponent(adapter.fileName)
+
+        if FileManager.default.fileExists(atPath: destinationURL.path) {
+            downloadedAdapterPaths[adapter.id] = destinationURL.path
+            return destinationURL.path
+        }
+
+        let delegate = DownloadProgressDelegate { [weak self] progress in
+            Task { @MainActor in
+                self?.adapterDownloadProgress[adapter.id] = progress
+            }
+        }
+
+        let (tempURL, _) = try await URLSession.shared.download(from: adapter.downloadURL, delegate: delegate)
+        if FileManager.default.fileExists(atPath: destinationURL.path) {
+            try FileManager.default.removeItem(at: destinationURL)
+        }
+        try FileManager.default.moveItem(at: tempURL, to: destinationURL)
+
+        downloadedAdapterPaths[adapter.id] = destinationURL.path
+        logger.info("Adapter downloaded to \(destinationURL.path)")
+        return destinationURL.path
+    }
+
+    /// Scans the LoRA directory to populate downloadedAdapterPaths.
+    private func syncDownloadedAdapterPaths() {
+        let loraDir = Self.loraDownloadDirectory()
+        for adapter in availableDemoAdapters {
+            let path = loraDir.appendingPathComponent(adapter.fileName).path
+            if FileManager.default.fileExists(atPath: path) {
+                downloadedAdapterPaths[adapter.id] = path
+            }
+        }
+    }
+
+    static func loraDownloadDirectory() -> URL {
+        let docs = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0]
+        return docs.appendingPathComponent("LoRA", isDirectory: true)
+    }
+
     // MARK: - Private Methods - Message Generation
 
     private func ensureModelIsLoaded() async throws {
@@ -387,6 +536,7 @@ final class LLMViewModel {
                         self.messages.removeFirst()
                     }
                     self.addSystemMessage()
+                    self.refreshAvailableDemoAdapters()
                 }
             } else {
                 await self.checkModelStatus()
 
@@ -31,3 +31,30 @@ struct GenerationMetricsFromSDK: Sendable {
     let tokensPerSecond: Double
     let timeToFirstTokenMs: Double?
 }
+
+// MARK: - Download Progress Delegate
+
+/// URLSession delegate that reports download progress via a callback.
+final class DownloadProgressDelegate: NSObject, URLSessionDownloadDelegate, @unchecked Sendable {
+    private let onProgress: @Sendable (Double) -> Void
+
+    init(onProgress: @escaping @Sendable (Double) -> Void) {
+        self.onProgress = onProgress
+    }
+
+    func urlSession(
+        _ session: URLSession,
+        downloadTask: URLSessionDownloadTask,
+        didWriteData bytesWritten: Int64,
+        totalBytesWritten: Int64,
+        totalBytesExpectedToWrite: Int64
+    ) {
+        guard totalBytesExpectedToWrite > 0 else { return }
+        let progress = Double(totalBytesWritten) / Double(totalBytesExpectedToWrite)
+        onProgress(progress)
+    }
+
+    func urlSession(_ session: URLSession, downloadTask: URLSessionDownloadTask, didFinishDownloadingTo location: URL) {
+        // Handled by the async download call
+    }
+}