Skip to content

Commit 4cf01c6

Browse files
shubhammalhotra28Siddhesh2377
authored andcommitted
Add lora ios (#407)
* ios initial changes * minimal sample needed to test lora * updating docs * addressed the comments
1 parent 13aa8e6 commit 4cf01c6

File tree

12 files changed

+952
-5
lines changed

12 files changed

+952
-5
lines changed

examples/ios/RunAnywhereAI/RunAnywhereAI/App/RunAnywhereAIApp.swift

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,17 @@ struct RunAnywhereAIApp: App {
192192
memoryRequirement: 600_000_000
193193
)
194194
}
195+
// Qwen 2.5 1.5B - LoRA-compatible base model (has publicly available GGUF LoRA adapters)
196+
// TODO: [Portal Integration] Remove once portal delivers model + adapter pairings
197+
if let qwen15BURL = URL(string: "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q4_k_m.gguf") {
198+
RunAnywhere.registerModel(
199+
id: "qwen2.5-1.5b-instruct-q4_k_m",
200+
name: "Qwen 2.5 1.5B Instruct Q4_K_M",
201+
url: qwen15BURL,
202+
framework: .llamaCpp,
203+
memoryRequirement: 2_500_000_000
204+
)
205+
}
195206
if let lfm2Q4URL = URL(string: "https://huggingface.co/LiquidAI/LFM2-350M-GGUF/resolve/main/LFM2-350M-Q4_K_M.gguf") {
196207
RunAnywhere.registerModel(
197208
id: "lfm2-350m-q4_k_m",
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
//
2+
// DemoLoRAAdapter.swift
3+
// RunAnywhereAI
4+
//
5+
// TODO: [Portal Integration] Remove this entire file once adapters are delivered OTA from portal.
6+
//
7+
// =========================================================================================
8+
// LoRA Demo Integration Guide
9+
// =========================================================================================
10+
//
11+
// WHAT THIS IS
12+
// ------------
13+
// This file provides a temporary, hardcoded LoRA adapter catalog so we can verify that the
14+
// full LoRA pipeline works end-to-end on iOS: download adapter OTA -> apply to model -> generate.
15+
// Once the RunAnywhere portal delivers adapter catalogs via its API, this file should be deleted
16+
// and replaced with the portal-provided data.
17+
//
18+
//
19+
// WHY QWEN 2.5 1.5B WAS CHOSEN
20+
// -----------------------------
21+
// LoRA adapters are architecture-specific: an adapter trained on Model A cannot be used with
22+
// Model B, even if they're the same parameter count. We needed a base model + GGUF LoRA adapter
23+
// pair that is publicly available and proven to work with llama.cpp.
24+
//
25+
// - SmolLM2 360M: No GGUF LoRA adapters exist anywhere (no one has published a fine-tune).
26+
// - Qwen 2.5 0.5B: No matching adapter (smallest ggml-org adapter is for 1.5B).
27+
// - LFM2 350M: No LoRA adapters exist. LFM2.5-1.2B adapters are architecturally incompatible
28+
// with the LFM2-1.2B-Tool model in the app (different model version).
29+
// - Qwen 2.5 1.5B: ggml-org (the llama.cpp team) publishes a tested, GGUF-format "abliterated"
30+
// LoRA adapter (~374MB). This is the smallest proven pair available.
31+
//
32+
// The Qwen 2.5 1.5B base model is registered in RunAnywhereAIApp.swift (~986MB Q4_K_M GGUF).
33+
//
34+
//
35+
// CONTEXT SIZE & MEMORY (C++ CHANGE)
36+
// ----------------------------------
37+
// Qwen 2.5 1.5B has 1.5B parameters and a 128K training context. The C++ llama.cpp backend
38+
// uses adaptive context sizing based on model size:
39+
//
40+
// >= 7B params -> 2048 context (fits ~6GB GPU memory)
41+
// >= 3B params -> 4096 context
42+
// >= 1B params -> 2048 context (** we added this tier **)
43+
// < 1B params -> 8192 context (tiny models, plenty of headroom)
44+
//
45+
// Without the 1-3B tier, the 1.5B model got 8192 context -> 4,748 MB compute buffer -> OOM crash.
46+
// Even at 4096, applying the F16 LoRA adapter pushed the compute buffer to 2,399 MB -> OOM.
47+
// At 2048 context, total runtime memory is ~2.5GB (weights + KV cache + LoRA + compute), which
48+
// fits on 6GB+ iPhones (iPhone 14 and newer).
49+
//
50+
// This change lives in: sdk/runanywhere-commons/src/backends/llamacpp/llamacpp_backend.cpp
51+
// (search for "Small-medium model detected")
52+
//
53+
//
54+
// LORA SCALE NOTE
55+
// ---------------
56+
// The demo adapter is F16 (full precision) applied to a Q4_K_M (4-bit quantized) base model.
57+
// At scale 1.0, this causes numerical instability -> gibberish output. Scale 0.3 is the tested
58+
// sweet spot: coherent output with observable behavior change. The UI slider still allows
59+
// adjustment (0.0 - 2.0) for experimentation.
60+
//
61+
//
62+
// PORTAL INTEGRATION CHECKLIST
63+
// ----------------------------
64+
// When the portal delivers LoRA adapters OTA, do the following:
65+
//
66+
// 1. DELETE this file (DemoLoRAAdapter.swift)
67+
// 2. DELETE the Qwen 2.5 1.5B model registration in RunAnywhereAIApp.swift
68+
// (search for "qwen2.5-1.5b-instruct-q4_k_m" and the TODO above it)
69+
// 3. In LLMViewModel.swift, REPLACE the demo adapter state & download logic
70+
// (search for "TODO: [Portal Integration]") with portal API calls
71+
// 4. In ChatInterfaceView.swift, UPDATE the "Available for This Model" section
72+
// in LoRAManagementSheetView to use portal-provided adapter data
73+
// (search for "TODO: [Portal Integration]")
74+
// 5. The SDK-level LoRA API (RunAnywhere+LoRA.swift, CppBridge+LLM.swift) stays unchanged --
75+
// it takes a local file path + scale, which is the same regardless of how the file got there
76+
//
77+
// =========================================================================================
78+
79+
import Foundation
80+
81+
// MARK: - Demo LoRA Adapter Registry
82+
83+
/// Represents a pre-registered LoRA adapter available for OTA download.
84+
/// TODO: [Portal Integration] Replace with portal-provided adapter catalog model.
85+
struct DemoLoRAAdapter: Identifiable, Sendable {
86+
let id: String
87+
let name: String
88+
let description: String
89+
let downloadURL: URL
90+
let fileName: String
91+
let compatibleModelIds: Set<String>
92+
let fileSize: Int64
93+
let defaultScale: Float
94+
95+
var fileSizeFormatted: String {
96+
ByteCountFormatter.string(fromByteCount: fileSize, countStyle: .file)
97+
}
98+
}
99+
100+
// MARK: - Demo Adapter Catalog
101+
102+
/// TODO: [Portal Integration] Remove once adapters are delivered OTA from portal.
103+
enum DemoLoRAAdapterCatalog {
104+
static let adapters: [DemoLoRAAdapter] = [
105+
DemoLoRAAdapter(
106+
id: "qwen2.5-1.5b-abliterated-lora",
107+
name: "Abliterated (Uncensored)",
108+
description: "Removes refusal behavior from Qwen2.5-1.5B. From ggml-org.",
109+
downloadURL: URL(string: "https://huggingface.co/ggml-org/LoRA-Qwen2.5-1.5B-Instruct-abliterated-F16-GGUF/resolve/main/LoRA-Qwen2.5-1.5B-Instruct-abliterated-f16.gguf")!,
110+
fileName: "LoRA-Qwen2.5-1.5B-Instruct-abliterated-f16.gguf",
111+
compatibleModelIds: ["qwen2.5-1.5b-instruct-q4_k_m"],
112+
fileSize: 374_000_000,
113+
defaultScale: 0.3
114+
)
115+
]
116+
117+
static func adapters(forModelId modelId: String) -> [DemoLoRAAdapter] {
118+
adapters.filter { $0.compatibleModelIds.contains(modelId) }
119+
}
120+
}

examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel.swift

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,19 @@ final class LLMViewModel {
3333
private(set) var modelSupportsStreaming = true
3434
private(set) var currentConversation: Conversation?
3535

36+
// MARK: - LoRA Adapter State
37+
38+
private(set) var loraAdapters: [LoRAAdapterInfo] = []
39+
private(set) var isLoadingLoRA = false
40+
41+
// MARK: - LoRA Adapter Download State
42+
// TODO: [Portal Integration] Remove demo adapter download state once portal delivers adapters OTA.
43+
44+
private(set) var availableDemoAdapters: [DemoLoRAAdapter] = []
45+
private(set) var adapterDownloadProgress: [String: Double] = [:]
46+
private(set) var downloadedAdapterPaths: [String: String] = [:]
47+
private(set) var isDownloadingAdapter: [String: Bool] = [:]
48+
3649
// MARK: - User Settings
3750

3851
var currentInput = ""
@@ -308,6 +321,142 @@ final class LLMViewModel {
308321
clearChat()
309322
}
310323

324+
// MARK: - LoRA Adapter Management
325+
326+
func loadLoraAdapter(path: String, scale: Float) async {
327+
isLoadingLoRA = true
328+
error = nil
329+
do {
330+
try await RunAnywhere.loadLoraAdapter(LoRAAdapterConfig(path: path, scale: scale))
331+
await refreshLoraAdapters()
332+
logger.info("LoRA adapter loaded: \(path) (scale=\(scale))")
333+
} catch {
334+
logger.error("Failed to load LoRA adapter: \(error)")
335+
self.error = error
336+
}
337+
isLoadingLoRA = false
338+
}
339+
340+
func removeLoraAdapter(path: String) async {
341+
do {
342+
try await RunAnywhere.removeLoraAdapter(path)
343+
await refreshLoraAdapters()
344+
} catch {
345+
logger.error("Failed to remove LoRA adapter: \(error)")
346+
self.error = error
347+
}
348+
}
349+
350+
func clearLoraAdapters() async {
351+
do {
352+
try await RunAnywhere.clearLoraAdapters()
353+
loraAdapters = []
354+
} catch {
355+
logger.error("Failed to clear LoRA adapters: \(error)")
356+
self.error = error
357+
}
358+
}
359+
360+
func refreshLoraAdapters() async {
361+
do {
362+
loraAdapters = try await RunAnywhere.getLoadedLoraAdapters()
363+
} catch {
364+
logger.error("Failed to refresh LoRA adapters: \(error)")
365+
}
366+
}
367+
368+
// MARK: - Demo LoRA Adapter Download
369+
// TODO: [Portal Integration] Remove demo adapter download logic once portal delivers adapters OTA.
370+
371+
/// Refreshes the list of available demo adapters for the currently loaded model.
372+
func refreshAvailableDemoAdapters() {
373+
guard let modelId = ModelListViewModel.shared.currentModel?.id else {
374+
availableDemoAdapters = []
375+
return
376+
}
377+
availableDemoAdapters = DemoLoRAAdapterCatalog.adapters(forModelId: modelId)
378+
syncDownloadedAdapterPaths()
379+
}
380+
381+
/// Checks if a demo adapter's file already exists on disk.
382+
func isAdapterDownloaded(_ adapter: DemoLoRAAdapter) -> Bool {
383+
downloadedAdapterPaths[adapter.id] != nil
384+
}
385+
386+
/// Returns the local file path for a downloaded adapter, or nil.
387+
func localPath(for adapter: DemoLoRAAdapter) -> String? {
388+
downloadedAdapterPaths[adapter.id]
389+
}
390+
391+
/// Downloads a demo adapter from its URL, then loads it.
392+
func downloadAndLoadAdapter(_ adapter: DemoLoRAAdapter, scale: Float) async {
393+
guard isDownloadingAdapter[adapter.id] != true else { return }
394+
395+
isDownloadingAdapter[adapter.id] = true
396+
adapterDownloadProgress[adapter.id] = 0.0
397+
error = nil
398+
399+
do {
400+
let localPath: String
401+
if let existing = downloadedAdapterPaths[adapter.id] {
402+
localPath = existing
403+
} else {
404+
localPath = try await downloadAdapter(adapter)
405+
}
406+
await loadLoraAdapter(path: localPath, scale: scale)
407+
} catch {
408+
logger.error("Failed to download/load adapter \(adapter.id): \(error)")
409+
self.error = error
410+
}
411+
412+
isDownloadingAdapter[adapter.id] = false
413+
adapterDownloadProgress[adapter.id] = nil
414+
}
415+
416+
/// Downloads the adapter file to the LoRA directory.
417+
private func downloadAdapter(_ adapter: DemoLoRAAdapter) async throws -> String {
418+
let loraDir = Self.loraDownloadDirectory()
419+
try FileManager.default.createDirectory(at: loraDir, withIntermediateDirectories: true)
420+
let destinationURL = loraDir.appendingPathComponent(adapter.fileName)
421+
422+
if FileManager.default.fileExists(atPath: destinationURL.path) {
423+
downloadedAdapterPaths[adapter.id] = destinationURL.path
424+
return destinationURL.path
425+
}
426+
427+
let delegate = DownloadProgressDelegate { [weak self] progress in
428+
Task { @MainActor in
429+
self?.adapterDownloadProgress[adapter.id] = progress
430+
}
431+
}
432+
433+
let (tempURL, _) = try await URLSession.shared.download(from: adapter.downloadURL, delegate: delegate)
434+
if FileManager.default.fileExists(atPath: destinationURL.path) {
435+
try FileManager.default.removeItem(at: destinationURL)
436+
}
437+
try FileManager.default.moveItem(at: tempURL, to: destinationURL)
438+
439+
downloadedAdapterPaths[adapter.id] = destinationURL.path
440+
logger.info("Adapter downloaded to \(destinationURL.path)")
441+
return destinationURL.path
442+
}
443+
444+
/// Scans the LoRA directory to populate downloadedAdapterPaths.
445+
private func syncDownloadedAdapterPaths() {
446+
let loraDir = Self.loraDownloadDirectory()
447+
for adapter in availableDemoAdapters {
448+
let path = loraDir.appendingPathComponent(adapter.fileName).path
449+
if FileManager.default.fileExists(atPath: path) {
450+
downloadedAdapterPaths[adapter.id] = path
451+
}
452+
}
453+
}
454+
455+
static func loraDownloadDirectory() -> URL {
456+
let docs = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0]
457+
return docs.appendingPathComponent("LoRA", isDirectory: true)
458+
}
459+
311460
// MARK: - Private Methods - Message Generation
312461

313462
private func ensureModelIsLoaded() async throws {
@@ -387,6 +536,7 @@ final class LLMViewModel {
387536
self.messages.removeFirst()
388537
}
389538
self.addSystemMessage()
539+
self.refreshAvailableDemoAdapters()
390540
}
391541
} else {
392542
await self.checkModelStatus()

examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModelTypes.swift

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,30 @@ struct GenerationMetricsFromSDK: Sendable {
3131
let tokensPerSecond: Double
3232
let timeToFirstTokenMs: Double?
3333
}
34+
35+
// MARK: - Download Progress Delegate
36+
37+
/// URLSession delegate that reports download progress via a callback.
38+
final class DownloadProgressDelegate: NSObject, URLSessionDownloadDelegate, @unchecked Sendable {
39+
private let onProgress: @Sendable (Double) -> Void
40+
41+
init(onProgress: @escaping @Sendable (Double) -> Void) {
42+
self.onProgress = onProgress
43+
}
44+
45+
func urlSession(
46+
_ session: URLSession,
47+
downloadTask: URLSessionDownloadTask,
48+
didWriteData bytesWritten: Int64,
49+
totalBytesWritten: Int64,
50+
totalBytesExpectedToWrite: Int64
51+
) {
52+
guard totalBytesExpectedToWrite > 0 else { return }
53+
let progress = Double(totalBytesWritten) / Double(totalBytesExpectedToWrite)
54+
onProgress(progress)
55+
}
56+
57+
func urlSession(_ session: URLSession, downloadTask: URLSessionDownloadTask, didFinishDownloadingTo location: URL) {
58+
// Handled by the async download call
59+
}
60+
}

0 commit comments

Comments
 (0)