From 20c138bc520f0e526dbb22083bf96f302b52c07f Mon Sep 17 00:00:00 2001
From: VyasGuru <71374747+VyasGuru@users.noreply.github.com>
Date: Sat, 7 Feb 2026 16:27:46 +0530
Subject: [PATCH 1/6] Fix parametre piping through sdk

---
 .../Chat/ViewModels/LLMViewModel.swift        | 12 ++-
 .../Settings/CombinedSettingsView.swift       | 25 +++++++
 .../Features/Settings/SettingsViewModel.swift | 23 +++++-
 .../backends/llamacpp/llamacpp_backend.cpp    | 74 ++++++++++---------
 .../src/backends/llamacpp/llamacpp_backend.h  |  5 --
 .../backends/llamacpp/rac_llm_llamacpp.cpp    | 26 +++++++
 .../LLM/RunAnywhere+StructuredOutput.swift    | 17 ++++-
 .../LLM/RunAnywhere+TextGeneration.swift      | 60 +++++++++++----
 8 files changed, 181 insertions(+), 61 deletions(-)

diff --git a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel.swift b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel.swift
index 3c234ac06..b131e9336 100644
--- a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel.swift
+++ b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel.swift
@@ -310,15 +310,21 @@ final class LLMViewModel {
     private func getGenerationOptions() -> LLMGenerationOptions {
         let savedTemperature = UserDefaults.standard.double(forKey: "defaultTemperature")
         let savedMaxTokens = UserDefaults.standard.integer(forKey: "defaultMaxTokens")
+        let savedSystemPrompt = UserDefaults.standard.string(forKey: "defaultSystemPrompt")
 
         let effectiveSettings = (
             temperature: savedTemperature != 0 ? savedTemperature : Self.defaultTemperatureValue,
             maxTokens: savedMaxTokens != 0 ? savedMaxTokens : Self.defaultMaxTokensValue
         )
 
+        let effectiveSystemPrompt = (savedSystemPrompt?.isEmpty == false) ? savedSystemPrompt : nil
+
+        logger.info("[PARAMS] App getGenerationOptions: temperature=\(effectiveSettings.temperature), maxTokens=\(effectiveSettings.maxTokens), systemPrompt=\(effectiveSystemPrompt ?? "nil")")
+
         return LLMGenerationOptions(
             maxTokens: effectiveSettings.maxTokens,
-            temperature: Float(effectiveSettings.temperature)
+            temperature: Float(effectiveSettings.temperature),
+            systemPrompt: effectiveSystemPrompt
         )
     }
 
@@ -336,10 +342,12 @@ final class LLMViewModel {
         let savedMaxTokens = UserDefaults.standard.integer(forKey: "defaultMaxTokens")
         let maxTokens = savedMaxTokens != 0 ? savedMaxTokens : Self.defaultMaxTokensValue
 
+        let savedSystemPrompt = UserDefaults.standard.string(forKey: "defaultSystemPrompt")
+
         UserDefaults.standard.set(temperature, forKey: "defaultTemperature")
         UserDefaults.standard.set(maxTokens, forKey: "defaultMaxTokens")
 
-        logger.info("Settings applied - Temperature: \(temperature), MaxTokens: \(maxTokens)")
+        logger.info("Settings applied - Temperature: \(temperature), MaxTokens: \(maxTokens), SystemPrompt: \(savedSystemPrompt ?? "nil")")
     }
 
     @objc
diff --git a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Settings/CombinedSettingsView.swift b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Settings/CombinedSettingsView.swift
index 26c0f17f9..bfbf6393c 100644
--- a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Settings/CombinedSettingsView.swift
+++ b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Settings/CombinedSettingsView.swift
@@ -71,6 +71,17 @@ private struct IOSSettingsContent: View {
                 )
             }
 
+            // System Prompt
+            Section {
+                TextField("Enter system prompt...", text: $viewModel.systemPrompt, axis: .vertical)
+                    .lineLimit(3...8)
+            } header: {
+                Text("System Prompt")
+            } footer: {
+                Text("Optional instructions that define AI behavior and response style.")
+                    .font(AppTypography.caption)
+            }
+
             // API Configuration (for testing custom backend)
             Section {
                 Button(
@@ -291,6 +302,20 @@ private struct GenerationSettingsCard: View {
                     )
                     .frame(maxWidth: 200)
                 }
+
+                VStack(alignment: .leading, spacing: AppSpacing.smallMedium) {
+                    HStack(alignment: .top) {
+                        Text("System Prompt")
+                            .frame(width: 150, alignment: .leading)
+                        TextField("Enter system prompt...", text: $viewModel.systemPrompt, axis: .vertical)
+                            .lineLimit(3...8)
+                            .textFieldStyle(.plain)
+                            .padding(AppSpacing.small)
+                            .background(AppColors.backgroundTertiary)
+                            .cornerRadius(AppSpacing.cornerRadiusRegular)
+                            .frame(maxWidth: 400)
+                    }
+                }
             }
         }
     }
diff --git a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Settings/SettingsViewModel.swift b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Settings/SettingsViewModel.swift
index 4495753bd..bf14368c9 100644
--- a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Settings/SettingsViewModel.swift
+++ b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Settings/SettingsViewModel.swift
@@ -18,6 +18,7 @@ class SettingsViewModel: ObservableObject {
     // Generation Settings
     @Published var temperature: Double = 0.7
     @Published var maxTokens: Int = 10000
+    @Published var systemPrompt: String = ""
 
     // API Configuration
     @Published var apiKey: String = ""
@@ -48,6 +49,7 @@ class SettingsViewModel: ObservableObject {
     private let baseURLStorageKey = "runanywhere_base_url"
     private let temperatureDefaultsKey = "defaultTemperature"
     private let maxTokensDefaultsKey = "defaultMaxTokens"
+    private let systemPromptDefaultsKey = "defaultSystemPrompt"
     private let analyticsLogKey = "analyticsLogToLocal"
     private let deviceRegisteredKey = "com.runanywhere.sdk.deviceRegistered"
 
@@ -113,6 +115,15 @@ class SettingsViewModel: ObservableObject {
             }
             .store(in: &cancellables)
 
+        // Auto-save system prompt changes
+        $systemPrompt
+            .debounce(for: 0.5, scheduler: DispatchQueue.main)
+            .dropFirst() // Skip initial value to avoid saving on init
+            .sink { [weak self] newValue in
+                self?.saveSystemPrompt(newValue)
+            }
+            .store(in: &cancellables)
+
         // Auto-save analytics logging preference
         $analyticsLogToLocal
             .dropFirst() // Skip initial value to avoid saving on init
@@ -139,6 +150,9 @@ class SettingsViewModel: ObservableObject {
         // Load max tokens
         let savedMaxTokens = UserDefaults.standard.integer(forKey: maxTokensDefaultsKey)
         maxTokens = savedMaxTokens > 0 ? savedMaxTokens : 10000
+
+        // Load system prompt
+        systemPrompt = UserDefaults.standard.string(forKey: systemPromptDefaultsKey) ?? ""
     }
 
     private func loadApiKeyConfiguration() {
@@ -181,11 +195,17 @@ class SettingsViewModel: ObservableObject {
         print("Settings: Saved max tokens: \(value)")
     }
 
+    private func saveSystemPrompt(_ value: String) {
+        UserDefaults.standard.set(value, forKey: systemPromptDefaultsKey)
+        print("Settings: Saved system prompt (\(value.count) chars)")
+    }
+
     /// Get current generation configuration for SDK usage
     func getGenerationConfiguration() -> GenerationConfiguration {
         GenerationConfiguration(
             temperature: temperature,
-            maxTokens: maxTokens
+            maxTokens: maxTokens,
+            systemPrompt: systemPrompt.isEmpty ? nil : systemPrompt
         )
     }
 
@@ -397,4 +417,5 @@ class SettingsViewModel: ObservableObject {
 struct GenerationConfiguration {
     let temperature: Double
     let maxTokens: Int
+    let systemPrompt: String?
 }
diff --git a/sdk/runanywhere-commons/src/backends/llamacpp/llamacpp_backend.cpp b/sdk/runanywhere-commons/src/backends/llamacpp/llamacpp_backend.cpp
index e645006d0..32709ff71 100644
--- a/sdk/runanywhere-commons/src/backends/llamacpp/llamacpp_backend.cpp
+++ b/sdk/runanywhere-commons/src/backends/llamacpp/llamacpp_backend.cpp
@@ -190,18 +190,6 @@ bool LlamaCppTextGeneration::load_model(const std::string& model_path,
     if (config.contains("max_context_size")) {
         max_default_context_ = config["max_context_size"].get<int>();
     }
-    if (config.contains("temperature")) {
-        temperature_ = config["temperature"].get<float>();
-    }
-    if (config.contains("min_p")) {
-        min_p_ = config["min_p"].get<float>();
-    }
-    if (config.contains("top_p")) {
-        top_p_ = config["top_p"].get<float>();
-    }
-    if (config.contains("top_k")) {
-        top_k_ = config["top_k"].get<int>();
-    }
 
     model_config_ = config;
     model_path_ = model_path;
@@ -243,29 +231,15 @@ bool LlamaCppTextGeneration::load_model(const std::string& model_path,
         return false;
     }
 
+    // Note: Sampler chain is rebuilt per-request in generate_stream() using request parameters
+    // This initial sampler is not used for actual generation
     auto sparams = llama_sampler_chain_default_params();
     sparams.no_perf = true;
     sampler_ = llama_sampler_chain_init(sparams);
-
-    if (temperature_ > 0.0f) {
-        llama_sampler_chain_add(sampler_, llama_sampler_init_penalties(64, 1.2f, 0.0f, 0.0f));
-
-        if (top_k_ > 0) {
-            llama_sampler_chain_add(sampler_, llama_sampler_init_top_k(top_k_));
-        }
-
-        llama_sampler_chain_add(sampler_, llama_sampler_init_top_p(top_p_, 1));
-        llama_sampler_chain_add(sampler_, llama_sampler_init_temp(temperature_));
-        llama_sampler_chain_add(sampler_, llama_sampler_init_dist(LLAMA_DEFAULT_SEED));
-    } else {
-        llama_sampler_chain_add(sampler_, llama_sampler_init_greedy());
-    }
-
-    LOGI("Sampler chain: penalties(64,1.2) -> top_k(%d) -> top_p(%.2f) -> temp(%.2f) -> dist",
-         top_k_, top_p_, temperature_);
+    llama_sampler_chain_add(sampler_, llama_sampler_init_greedy());
 
     model_loaded_ = true;
-    LOGI("Model loaded successfully: context_size=%d, temp=%.2f", context_size_, temperature_);
+    LOGI("Model loaded successfully: context_size=%d", context_size_);
 
     return true;
 }
@@ -474,7 +448,41 @@ bool LlamaCppTextGeneration::generate_stream(const TextGenerationRequest& reques
         return false;
     }
 
-    llama_sampler_reset(sampler_);
+    // Configure sampler with request parameters
+    if (sampler_) {
+        llama_sampler_free(sampler_);
+    }
+
+    auto sparams = llama_sampler_chain_default_params();
+    sparams.no_perf = true;
+    sampler_ = llama_sampler_chain_init(sparams);
+
+    if (request.temperature > 0.0f) {
+        // Use default penalties (1.2f repetition) or request params if added later
+        llama_sampler_chain_add(sampler_,
+                                llama_sampler_init_penalties(64, request.repetition_penalty, 0.0f, 0.0f));
+
+        if (request.top_k > 0) {
+            llama_sampler_chain_add(sampler_, llama_sampler_init_top_k(request.top_k));
+        }
+
+        llama_sampler_chain_add(sampler_, llama_sampler_init_top_p(request.top_p, 1));
+        llama_sampler_chain_add(sampler_, llama_sampler_init_temp(request.temperature));
+        llama_sampler_chain_add(sampler_, llama_sampler_init_dist(LLAMA_DEFAULT_SEED));
+    } else {
+        llama_sampler_chain_add(sampler_, llama_sampler_init_greedy());
+    }
+
+    // Log generation parameters
+    LOGI("[PARAMS] LLM generate_stream (per-request options): temperature=%.4f, top_p=%.4f, top_k=%d, "
+         "max_tokens=%d (effective=%d), repetition_penalty=%.4f, "
+         "system_prompt_len=%zu",
+         request.temperature, request.top_p, request.top_k,
+         request.max_tokens, effective_max_tokens, request.repetition_penalty,
+         request.system_prompt.length());
+
+    // No need to reset as we just created it
+    // llama_sampler_reset(sampler_);
 
     const auto vocab = llama_model_get_vocab(model_);
 
@@ -599,10 +607,6 @@ nlohmann::json LlamaCppTextGeneration::get_model_info() const {
     info["context_size"] = context_size_;
     info["model_training_context"] = llama_model_n_ctx_train(model_);
     info["max_default_context"] = max_default_context_;
-    info["temperature"] = temperature_;
-    info["top_k"] = top_k_;
-    info["top_p"] = top_p_;
-    info["min_p"] = min_p_;
 
     char buf[256];
     if (llama_model_meta_val_str(model_, "general.name", buf, sizeof(buf)) > 0) {
diff --git a/sdk/runanywhere-commons/src/backends/llamacpp/llamacpp_backend.h b/sdk/runanywhere-commons/src/backends/llamacpp/llamacpp_backend.h
index 2d8deb065..01a54c535 100644
--- a/sdk/runanywhere-commons/src/backends/llamacpp/llamacpp_backend.h
+++ b/sdk/runanywhere-commons/src/backends/llamacpp/llamacpp_backend.h
@@ -140,11 +140,6 @@ class LlamaCppTextGeneration {
     int context_size_ = 0;
     int max_default_context_ = 8192;
 
-    float temperature_ = 0.8f;
-    float top_p_ = 0.95f;
-    float min_p_ = 0.05f;
-    int top_k_ = 40;
-
     mutable std::mutex mutex_;
 };
 
diff --git a/sdk/runanywhere-commons/src/backends/llamacpp/rac_llm_llamacpp.cpp b/sdk/runanywhere-commons/src/backends/llamacpp/rac_llm_llamacpp.cpp
index cde9dc275..09ade4967 100644
--- a/sdk/runanywhere-commons/src/backends/llamacpp/rac_llm_llamacpp.cpp
+++ b/sdk/runanywhere-commons/src/backends/llamacpp/rac_llm_llamacpp.cpp
@@ -16,8 +16,12 @@
 #include "llamacpp_backend.h"
 
 #include "rac/core/rac_error.h"
+#include "rac/core/rac_logger.h"
 #include "rac/infrastructure/events/rac_events.h"
 
+// Use the RAC logging system
+#define LOGI(...) RAC_LOG_INFO("LLM.LlamaCpp.C-API", __VA_ARGS__)
+
 // =============================================================================
 // INTERNAL HANDLE STRUCTURE
 // =============================================================================
@@ -152,6 +156,9 @@ rac_result_t rac_llm_llamacpp_generate(rac_handle_t handle, const char* prompt,
         request.max_tokens = options->max_tokens;
         request.temperature = options->temperature;
         request.top_p = options->top_p;
+        if (options->system_prompt != nullptr) {
+            request.system_prompt = options->system_prompt;
+        }
         // Handle stop sequences if available
         if (options->stop_sequences != nullptr && options->num_stop_sequences > 0) {
             for (int32_t i = 0; i < options->num_stop_sequences; i++) {
@@ -160,6 +167,14 @@ rac_result_t rac_llm_llamacpp_generate(rac_handle_t handle, const char* prompt,
                 }
             }
         }
+        LOGI("[PARAMS] LLM C-API (from caller options): max_tokens=%d, temperature=%.4f, "
+             "top_p=%.4f, system_prompt=%s",
+             request.max_tokens, request.temperature, request.top_p,
+             request.system_prompt.empty() ? "(none)" : "(set)");
+    } else {
+        LOGI("[PARAMS] LLM C-API (using struct defaults): max_tokens=%d, temperature=%.4f, "
+             "top_p=%.4f, system_prompt=(none)",
+             request.max_tokens, request.temperature, request.top_p);
     }
 
     // Generate using C++ class
@@ -203,6 +218,9 @@ rac_result_t rac_llm_llamacpp_generate_stream(rac_handle_t handle, const char* p
         request.max_tokens = options->max_tokens;
         request.temperature = options->temperature;
         request.top_p = options->top_p;
+        if (options->system_prompt != nullptr) {
+            request.system_prompt = options->system_prompt;
+        }
         if (options->stop_sequences != nullptr && options->num_stop_sequences > 0) {
             for (int32_t i = 0; i < options->num_stop_sequences; i++) {
                 if (options->stop_sequences[i]) {
@@ -210,6 +228,14 @@ rac_result_t rac_llm_llamacpp_generate_stream(rac_handle_t handle, const char* p
                 }
             }
         }
+        LOGI("[PARAMS] LLM C-API (from caller options): max_tokens=%d, temperature=%.4f, "
+             "top_p=%.4f, system_prompt=%s",
+             request.max_tokens, request.temperature, request.top_p,
+             request.system_prompt.empty() ? "(none)" : "(set)");
+    } else {
+        LOGI("[PARAMS] LLM C-API (using struct defaults): max_tokens=%d, temperature=%.4f, "
+             "top_p=%.4f, system_prompt=(none)",
+             request.max_tokens, request.temperature, request.top_p);
     }
 
     // Stream using C++ class
diff --git a/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/LLM/RunAnywhere+StructuredOutput.swift b/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/LLM/RunAnywhere+StructuredOutput.swift
index 2656ae702..4f3a27d9f 100644
--- a/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/LLM/RunAnywhere+StructuredOutput.swift
+++ b/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/LLM/RunAnywhere+StructuredOutput.swift
@@ -249,10 +249,21 @@ public extension RunAnywhere {
         cOptions.top_p = options.topP
         cOptions.streaming_enabled = RAC_FALSE
 
-        // Generate
+        // Generate - wrap in system_prompt lifetime scope
         var llmResult = rac_llm_result_t()
-        let generateResult = prompt.withCString { promptPtr in
-            rac_llm_component_generate(handle, promptPtr, &cOptions, &llmResult)
+        let generateResult: rac_result_t
+        if let systemPrompt = options.systemPrompt {
+            generateResult = systemPrompt.withCString { sysPromptPtr in
+                cOptions.system_prompt = sysPromptPtr
+                return prompt.withCString { promptPtr in
+                    rac_llm_component_generate(handle, promptPtr, &cOptions, &llmResult)
+                }
+            }
+        } else {
+            cOptions.system_prompt = nil
+            generateResult = prompt.withCString { promptPtr in
+                rac_llm_component_generate(handle, promptPtr, &cOptions, &llmResult)
+            }
         }
 
         guard generateResult == RAC_SUCCESS else {
diff --git a/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/LLM/RunAnywhere+TextGeneration.swift b/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/LLM/RunAnywhere+TextGeneration.swift
index 5353259ee..780b81a17 100644
--- a/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/LLM/RunAnywhere+TextGeneration.swift
+++ b/sdk/runanywhere-swift/Sources/RunAnywhere/Public/Extensions/LLM/RunAnywhere+TextGeneration.swift
@@ -58,10 +58,23 @@ public extension RunAnywhere {
         cOptions.top_p = opts.topP
         cOptions.streaming_enabled = RAC_FALSE
 
-        // Generate (C++ emits events)
+        SDKLogger.llm.info("[PARAMS] generate: temperature=\(cOptions.temperature), top_p=\(cOptions.top_p), max_tokens=\(cOptions.max_tokens), system_prompt=\(opts.systemPrompt != nil ? "set(\(opts.systemPrompt!.count) chars)" : "nil"), streaming=\(cOptions.streaming_enabled == RAC_TRUE)")
+
+        // Generate (C++ emits events) - wrap in system_prompt lifetime scope
         var llmResult = rac_llm_result_t()
-        let generateResult = prompt.withCString { promptPtr in
-            rac_llm_component_generate(handle, promptPtr, &cOptions, &llmResult)
+        let generateResult: rac_result_t
+        if let systemPrompt = opts.systemPrompt {
+            generateResult = systemPrompt.withCString { sysPromptPtr in
+                cOptions.system_prompt = sysPromptPtr
+                return prompt.withCString { promptPtr in
+                    rac_llm_component_generate(handle, promptPtr, &cOptions, &llmResult)
+                }
+            }
+        } else {
+            cOptions.system_prompt = nil
+            generateResult = prompt.withCString { promptPtr in
+                rac_llm_component_generate(handle, promptPtr, &cOptions, &llmResult)
+            }
         }
 
         guard generateResult == RAC_SUCCESS else {
@@ -148,11 +161,14 @@ public extension RunAnywhere {
         cOptions.top_p = opts.topP
         cOptions.streaming_enabled = RAC_TRUE
 
+        SDKLogger.llm.info("[PARAMS] generateStream: temperature=\(cOptions.temperature), top_p=\(cOptions.top_p), max_tokens=\(cOptions.max_tokens), system_prompt=\(opts.systemPrompt != nil ? "set(\(opts.systemPrompt!.count) chars)" : "nil"), streaming=\(cOptions.streaming_enabled == RAC_TRUE)")
+
         let stream = createTokenStream(
             prompt: prompt,
             handle: handle,
             options: cOptions,
-            collector: collector
+            collector: collector,
+            systemPrompt: opts.systemPrompt
         )
 
         let resultTask = Task<LLMGenerationResult, Error> {
@@ -168,7 +184,8 @@ public extension RunAnywhere {
         prompt: String,
         handle: UnsafeMutableRawPointer,
         options: rac_llm_options_t,
-        collector: LLMStreamingMetricsCollector
+        collector: LLMStreamingMetricsCollector,
+        systemPrompt: String? = nil
     ) -> AsyncThrowingStream<String, Error> {
         AsyncThrowingStream<String, Error> { continuation in
             Task {
@@ -181,16 +198,29 @@ public extension RunAnywhere {
                     let callbacks = LLMStreamCallbacks.create()
                     var cOptions = options
 
-                    let streamResult = prompt.withCString { promptPtr in
-                        rac_llm_component_generate_stream(
-                            handle,
-                            promptPtr,
-                            &cOptions,
-                            callbacks.token,
-                            callbacks.complete,
-                            callbacks.error,
-                            contextPtr
-                        )
+                    let callCFunction: () -> rac_result_t = {
+                        prompt.withCString { promptPtr in
+                            rac_llm_component_generate_stream(
+                                handle,
+                                promptPtr,
+                                &cOptions,
+                                callbacks.token,
+                                callbacks.complete,
+                                callbacks.error,
+                                contextPtr
+                            )
+                        }
+                    }
+
+                    let streamResult: rac_result_t
+                    if let systemPrompt = systemPrompt {
+                        streamResult = systemPrompt.withCString { sysPtr in
+                            cOptions.system_prompt = sysPtr
+                            return callCFunction()
+                        }
+                    } else {
+                        cOptions.system_prompt = nil
+                        streamResult = callCFunction()
                     }
 
                     if streamResult != RAC_SUCCESS {

From e78ac6fd470b03471e327790368fb69b5b4039eb Mon Sep 17 00:00:00 2001
From: VyasGuru <71374747+VyasGuru@users.noreply.github.com>
Date: Thu, 12 Feb 2026 00:08:53 +0530
Subject: [PATCH 2/6] Kotlin sdk updated, along with example app and cpp jni

---
 .../presentation/chat/ChatViewModel.kt        |  26 +-
 .../presentation/settings/SettingsScreen.kt   |  89 ++++
 .../settings/SettingsViewModel.kt             | 108 +++++
 .../presentation/tts/TextToSpeechScreen.kt    |   4 +-
 .../src/jni/CMakeLists.txt                    |  17 +
 .../src/jni/runanywhere_commons_jni.cpp       | 454 +++++++-----------
 .../sdk/public/extensions/LLM/LLMTypes.kt     |   4 +-
 .../bridge/extensions/CppBridgeLLM.kt         |  10 +-
 .../RunAnywhere+TextGeneration.jvmAndroid.kt  |   9 +
 9 files changed, 436 insertions(+), 285 deletions(-)

diff --git a/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/chat/ChatViewModel.kt b/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/chat/ChatViewModel.kt
index 83ff83a0b..37d6f6c7b 100644
--- a/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/chat/ChatViewModel.kt
+++ b/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/chat/ChatViewModel.kt
@@ -67,6 +67,10 @@ class ChatViewModel(application: Application) : AndroidViewModel(application) {
 
     private var generationJob: Job? = null
 
+    private val generationPrefs by lazy {
+        getApplication<Application>().getSharedPreferences("generation_settings", android.content.Context.MODE_PRIVATE)
+    }
+
     init {
         // Always start with a new conversation for a fresh chat experience
         val conversation = conversationStore.createConversation()
@@ -237,7 +241,7 @@ class ChatViewModel(application: Application) : AndroidViewModel(application) {
 
         try {
             // Use SDK streaming generation - returns Flow<String>
-            RunAnywhere.generateStream(prompt).collect { token ->
+            RunAnywhere.generateStream(prompt, getGenerationOptions()).collect { token ->
                 fullResponse += token
                 totalTokensReceived++
 
@@ -361,7 +365,7 @@ class ChatViewModel(application: Application) : AndroidViewModel(application) {
 
         try {
             // RunAnywhere.generate() returns LLMGenerationResult
-            val result = RunAnywhere.generate(prompt)
+            val result = RunAnywhere.generate(prompt, getGenerationOptions())
             val response = result.text
             val endTime = System.currentTimeMillis()
 
@@ -724,6 +728,24 @@ class ChatViewModel(application: Application) : AndroidViewModel(application) {
         _uiState.value = _uiState.value.copy(error = null)
     }
 
+    /**
+     * Get generation options from SharedPreferences
+     */
+    private fun getGenerationOptions(): com.runanywhere.sdk.public.extensions.LLM.LLMGenerationOptions {
+        val temperature = generationPrefs.getFloat("defaultTemperature", 0.7f)
+        val maxTokens = generationPrefs.getInt("defaultMaxTokens", 1000)
+        val systemPromptValue = generationPrefs.getString("defaultSystemPrompt", "")
+        val systemPrompt = if (systemPromptValue.isNullOrEmpty()) null else systemPromptValue
+
+        Log.i(TAG, "[PARAMS] App getGenerationOptions: temperature=$temperature, maxTokens=$maxTokens, systemPrompt=${systemPrompt ?: "nil"}")
+
+        return com.runanywhere.sdk.public.extensions.LLM.LLMGenerationOptions(
+            maxTokens = maxTokens,
+            temperature = temperature,
+            systemPrompt = systemPrompt
+        )
+    }
+
     companion object {
         private const val TAG = "ChatViewModel"
     }
diff --git a/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/settings/SettingsScreen.kt b/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/settings/SettingsScreen.kt
index 03c5bf793..5e7502632 100644
--- a/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/settings/SettingsScreen.kt
+++ b/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/settings/SettingsScreen.kt
@@ -114,6 +114,95 @@ fun SettingsScreen(viewModel: SettingsViewModel = viewModel()) {
             )
         }
 
+        // Generation Settings Section
+        SettingsSection(title = "Generation Settings") {
+            // Temperature Slider
+            Column(modifier = Modifier.fillMaxWidth()) {
+                Row(
+                    modifier = Modifier.fillMaxWidth(),
+                    horizontalArrangement = Arrangement.SpaceBetween,
+                    verticalAlignment = Alignment.CenterVertically,
+                ) {
+                    Text(
+                        text = "Temperature",
+                        style = MaterialTheme.typography.bodyLarge,
+                    )
+                    Text(
+                        text = String.format("%.1f", uiState.temperature),
+                        style = MaterialTheme.typography.bodyMedium,
+                        color = MaterialTheme.colorScheme.onSurfaceVariant,
+                    )
+                }
+                Slider(
+                    value = uiState.temperature,
+                    onValueChange = { viewModel.updateTemperature(it) },
+                    valueRange = 0f..2f,
+                    steps = 19, // 0.1 increments from 0.0 to 2.0
+                    modifier = Modifier.fillMaxWidth(),
+                )
+            }
+
+            HorizontalDivider(modifier = Modifier.padding(vertical = 4.dp))
+
+            // Max Tokens Slider
+            Column(modifier = Modifier.fillMaxWidth()) {
+                Row(
+                    modifier = Modifier.fillMaxWidth(),
+                    horizontalArrangement = Arrangement.SpaceBetween,
+                    verticalAlignment = Alignment.CenterVertically,
+                ) {
+                    Text(
+                        text = "Max Tokens",
+                        style = MaterialTheme.typography.bodyLarge,
+                    )
+                    Text(
+                        text = uiState.maxTokens.toString(),
+                        style = MaterialTheme.typography.bodyMedium,
+                        color = MaterialTheme.colorScheme.onSurfaceVariant,
+                    )
+                }
+                Slider(
+                    value = uiState.maxTokens.toFloat(),
+                    onValueChange = { viewModel.updateMaxTokens(it.toInt()) },
+                    valueRange = 50f..4096f,
+                    steps = 80, // 50-token increments
+                    modifier = Modifier.fillMaxWidth(),
+                )
+            }
+
+            HorizontalDivider(modifier = Modifier.padding(vertical = 4.dp))
+
+            // System Prompt TextField
+            OutlinedTextField(
+                value = uiState.systemPrompt,
+                onValueChange = { viewModel.updateSystemPrompt(it) },
+                label = { Text("System Prompt") },
+                placeholder = { Text("Enter system prompt (optional)") },
+                modifier = Modifier.fillMaxWidth(),
+                maxLines = 3,
+                textStyle = MaterialTheme.typography.bodyMedium,
+            )
+
+            Spacer(modifier = Modifier.height(8.dp))
+
+            // Save Button
+            OutlinedButton(
+                onClick = { viewModel.saveGenerationSettings() },
+                colors = ButtonDefaults.outlinedButtonColors(
+                    contentColor = AppColors.primaryAccent,
+                ),
+            ) {
+                Text("Save Settings")
+            }
+
+            Spacer(modifier = Modifier.height(8.dp))
+            Text(
+                text = "These settings affect LLM text generation.",
+                style = MaterialTheme.typography.bodySmall,
+                color = MaterialTheme.colorScheme.onSurfaceVariant,
+            )
+        }
+
         // Storage Overview Section
         SettingsSection(
             title = "Storage Overview",
diff --git a/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/settings/SettingsViewModel.kt b/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/settings/SettingsViewModel.kt
index 3c84bb76d..734c94108 100644
--- a/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/settings/SettingsViewModel.kt
+++ b/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/settings/SettingsViewModel.kt
@@ -47,6 +47,10 @@ data class SettingsUiState(
     val isBaseURLConfigured: Boolean = false,
     val showApiConfigSheet: Boolean = false,
     val showRestartDialog: Boolean = false,
+    // Generation Settings
+    val temperature: Float = 0.7f,
+    val maxTokens: Int = 1000,
+    val systemPrompt: String = "",
     // Loading states
     val isLoading: Boolean = false,
     val errorMessage: String? = null,
@@ -77,6 +81,10 @@ class SettingsViewModel(application: Application) : AndroidViewModel(application
         )
     }
 
+    private val generationPrefs by lazy {
+        application.getSharedPreferences(PREFS_NAME, Context.MODE_PRIVATE)
+    }
+
     companion object {
         private const val TAG = "SettingsViewModel"
         private const val ENCRYPTED_PREFS_FILE = "runanywhere_secure_prefs"
@@ -84,6 +92,12 @@ class SettingsViewModel(application: Application) : AndroidViewModel(application
         private const val KEY_BASE_URL = "runanywhere_base_url"
         private const val KEY_DEVICE_REGISTERED = "com.runanywhere.sdk.deviceRegistered"
 
+        // Generation settings constants (match iOS key names)
+        private const val PREFS_NAME = "generation_settings"
+        private const val KEY_TEMPERATURE = "defaultTemperature"
+        private const val KEY_MAX_TOKENS = "defaultMaxTokens"
+        private const val KEY_SYSTEM_PROMPT = "defaultSystemPrompt"
+
         /**
          * Get stored API key (for use at app launch)
          */
@@ -145,10 +159,36 @@ class SettingsViewModel(application: Application) : AndroidViewModel(application
         fun hasCustomConfiguration(context: Context): Boolean {
             return getStoredApiKey(context) != null && getStoredBaseURL(context) != null
         }
+
+        /**
+         * Data class for generation settings
+         */
+        data class GenerationSettings(
+            val temperature: Float,
+            val maxTokens: Int,
+            val systemPrompt: String?
+        )
+
+        /**
+         * Get generation settings (for use by ChatViewModel)
+         */
+        fun getGenerationSettings(context: Context): GenerationSettings {
+            val prefs = context.getSharedPreferences(PREFS_NAME, Context.MODE_PRIVATE)
+            val temperature = prefs.getFloat(KEY_TEMPERATURE, 0.7f)
+            val maxTokens = prefs.getInt(KEY_MAX_TOKENS, 1000)
+            val systemPrompt = prefs.getString(KEY_SYSTEM_PROMPT, "")
+
+            return GenerationSettings(
+                temperature = temperature,
+                maxTokens = maxTokens,
+                systemPrompt = if (systemPrompt.isNullOrEmpty()) null else systemPrompt
+            )
+        }
     }
 
     init {
         loadApiConfiguration()
+        loadGenerationSettings()
         loadStorageData()
         subscribeToModelEvents()
     }
@@ -302,6 +342,74 @@ class SettingsViewModel(application: Application) : AndroidViewModel(application
         }
     }
 
+    // ========== Generation Settings Management ==========
+
+    /**
+     * Load generation settings from SharedPreferences
+     */
+    private fun loadGenerationSettings() {
+        try {
+            val temperature = generationPrefs.getFloat(KEY_TEMPERATURE, 0.7f)
+            val maxTokens = generationPrefs.getInt(KEY_MAX_TOKENS, 1000)
+            val systemPrompt = generationPrefs.getString(KEY_SYSTEM_PROMPT, "") ?: ""
+
+            _uiState.update {
+                it.copy(
+                    temperature = temperature,
+                    maxTokens = maxTokens,
+                    systemPrompt = systemPrompt
+                )
+            }
+            Log.d(TAG, "Generation settings loaded - temperature: $temperature, maxTokens: $maxTokens, systemPrompt length: ${systemPrompt.length}")
+        } catch (e: Exception) {
+            Log.e(TAG, "Failed to load generation settings", e)
+        }
+    }
+
+    /**
+     * Update temperature in UI state
+     */
+    fun updateTemperature(value: Float) {
+        _uiState.update { it.copy(temperature = value) }
+    }
+
+    /**
+     * Update max tokens in UI state
+     */
+    fun updateMaxTokens(value: Int) {
+        _uiState.update { it.copy(maxTokens = value) }
+    }
+
+    /**
+     * Update system prompt in UI state
+     */
+    fun updateSystemPrompt(value: String) {
+        _uiState.update { it.copy(systemPrompt = value) }
+    }
+
+    /**
+     * Save generation settings to SharedPreferences
+     */
+    fun saveGenerationSettings() {
+        viewModelScope.launch {
+            try {
+                val currentState = _uiState.value
+                generationPrefs.edit()
+                    .putFloat(KEY_TEMPERATURE, currentState.temperature)
+                    .putInt(KEY_MAX_TOKENS, currentState.maxTokens)
+                    .putString(KEY_SYSTEM_PROMPT, currentState.systemPrompt)
+                    .apply()
+
+                Log.d(TAG, "Generation settings saved successfully - temperature: ${currentState.temperature}, maxTokens: ${currentState.maxTokens}")
+            } catch (e: Exception) {
+                Log.e(TAG, "Failed to save generation settings", e)
+                _uiState.update {
+                    it.copy(errorMessage = "Failed to save generation settings: ${e.message}")
+                }
+            }
+        }
+    }
+
     // ========== API Configuration Management ==========
 
     /**
diff --git a/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/tts/TextToSpeechScreen.kt b/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/tts/TextToSpeechScreen.kt
index 51ee496c1..b8207fc3d 100644
--- a/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/tts/TextToSpeechScreen.kt
+++ b/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/tts/TextToSpeechScreen.kt
@@ -392,7 +392,8 @@ private fun VoiceSettingsSection(
                 )
             }
 
-            // Pitch slider
+            // Pitch slider - Commented out for now
+            /*
             Column(verticalArrangement = Arrangement.spacedBy(8.dp)) {
                 Row(
                     modifier = Modifier.fillMaxWidth(),
@@ -420,6 +421,7 @@ private fun VoiceSettingsSection(
                         ),
                 )
             }
+            */
         }
     }
 }
diff --git a/sdk/runanywhere-commons/src/jni/CMakeLists.txt b/sdk/runanywhere-commons/src/jni/CMakeLists.txt
index 496b7e9ab..c193d6792 100644
--- a/sdk/runanywhere-commons/src/jni/CMakeLists.txt
+++ b/sdk/runanywhere-commons/src/jni/CMakeLists.txt
@@ -22,6 +22,22 @@ project(runanywhere_commons_jni)
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
+include(FetchContent)
+
+# needed for string parsing 
+if(NOT DEFINED NLOHMANN_JSON_VERSION)
+    set(NLOHMANN_JSON_VERSION "3.11.3")
+endif()
+
+FetchContent_Declare(
+    nlohmann_json
+    GIT_REPOSITORY https://github.com/nlohmann/json.git
+    GIT_TAG        v${NLOHMANN_JSON_VERSION}
+    GIT_SHALLOW    TRUE
+)
+set(JSON_BuildTests OFF CACHE BOOL "" FORCE)
+FetchContent_MakeAvailable(nlohmann_json)
+
 # Find JNI
 find_package(JNI REQUIRED)
 include_directories(${JNI_INCLUDE_DIRS})
@@ -41,6 +57,7 @@ add_library(runanywhere_commons_jni SHARED ${JNI_SOURCES})
 # Backend libraries are NOT linked here - they have their own JNI libraries
 target_link_libraries(runanywhere_commons_jni
     rac_commons
+    nlohmann_json::nlohmann_json
     ${JNI_LIBRARIES}
 )
 
diff --git a/sdk/runanywhere-commons/src/jni/runanywhere_commons_jni.cpp b/sdk/runanywhere-commons/src/jni/runanywhere_commons_jni.cpp
index da2e337a4..38a5718f4 100644
--- a/sdk/runanywhere-commons/src/jni/runanywhere_commons_jni.cpp
+++ b/sdk/runanywhere-commons/src/jni/runanywhere_commons_jni.cpp
@@ -20,6 +20,7 @@
 #include <cstring>
 #include <mutex>
 #include <string>
+#include <nlohmann/json.hpp>
 
 // Include runanywhere-commons C API headers
 #include "rac/core/rac_analytics_events.h"
@@ -531,6 +532,28 @@ Java_com_runanywhere_sdk_native_bridge_RunAnywhereBridge_racLlmComponentGenerate
     options.temperature = 0.7f;
     options.top_p = 1.0f;
     options.streaming_enabled = RAC_FALSE;
+    options.system_prompt = RAC_NULL;
+
+    // Parse configJson if provided
+    std::string sys_prompt_storage;
+    if (config != nullptr) {
+        try {
+            auto j = nlohmann::json::parse(config);
+            options.max_tokens = j.value("max_tokens", 512);
+            options.temperature = j.value("temperature", 0.7f);
+            options.top_p = j.value("top_p", 1.0f);
+            sys_prompt_storage = j.value("system_prompt", std::string(""));
+            if (!sys_prompt_storage.empty()) {
+                options.system_prompt = sys_prompt_storage.c_str();
+            }
+        } catch (const nlohmann::json::exception& e) {
+            LOGe("Failed to parse LLM config JSON: %s", e.what());
+        }
+    }
+
+    LOGi("racLlmComponentGenerate options: temp=%.2f, max_tokens=%d, top_p=%.2f, system_prompt=%s",
+         options.temperature, options.max_tokens, options.top_p,
+         options.system_prompt ? "(set)" : "(none)");
 
     rac_llm_result_t result = {};
     LOGi("racLlmComponentGenerate calling rac_llm_component_generate...");
@@ -550,39 +573,14 @@ Java_com_runanywhere_sdk_native_bridge_RunAnywhereBridge_racLlmComponentGenerate
         LOGi("racLlmComponentGenerate result text length=%zu", strlen(result.text));
 
         // Build JSON result - keys must match what Kotlin expects
-        std::string json = "{";
-        json += "\"text\":\"";
-        // Escape special characters in text for JSON
-        for (const char* p = result.text; *p; p++) {
-            switch (*p) {
-                case '"':
-                    json += "\\\"";
-                    break;
-                case '\\':
-                    json += "\\\\";
-                    break;
-                case '\n':
-                    json += "\\n";
-                    break;
-                case '\r':
-                    json += "\\r";
-                    break;
-                case '\t':
-                    json += "\\t";
-                    break;
-                default:
-                    json += *p;
-                    break;
-            }
-        }
-        json += "\",";
-        // Kotlin expects these keys:
-        json += "\"tokens_generated\":" + std::to_string(result.completion_tokens) + ",";
-        json += "\"tokens_evaluated\":" + std::to_string(result.prompt_tokens) + ",";
-        json += "\"stop_reason\":" + std::to_string(0) + ",";  // 0 = normal completion
-        json += "\"total_time_ms\":" + std::to_string(result.total_time_ms) + ",";
-        json += "\"tokens_per_second\":" + std::to_string(result.tokens_per_second);
-        json += "}";
+        nlohmann::json json_obj;
+        json_obj["text"] = std::string(result.text);
+        json_obj["tokens_generated"] = result.completion_tokens;
+        json_obj["tokens_evaluated"] = result.prompt_tokens;
+        json_obj["stop_reason"] = 0;  // 0 = normal completion
+        json_obj["total_time_ms"] = result.total_time_ms;
+        json_obj["tokens_per_second"] = result.tokens_per_second;
+        std::string json = json_obj.dump();
 
         LOGi("racLlmComponentGenerate returning JSON: %zu bytes", json.length());
 
@@ -798,6 +796,28 @@ Java_com_runanywhere_sdk_native_bridge_RunAnywhereBridge_racLlmComponentGenerate
     options.temperature = 0.7f;
     options.top_p = 1.0f;
     options.streaming_enabled = RAC_TRUE;
+    options.system_prompt = RAC_NULL;
+
+    // Parse configJson if provided
+    std::string sys_prompt_storage;
+    if (config != nullptr) {
+        try {
+            auto j = nlohmann::json::parse(config);
+            options.max_tokens = j.value("max_tokens", 512);
+            options.temperature = j.value("temperature", 0.7f);
+            options.top_p = j.value("top_p", 1.0f);
+            sys_prompt_storage = j.value("system_prompt", std::string(""));
+            if (!sys_prompt_storage.empty()) {
+                options.system_prompt = sys_prompt_storage.c_str();
+            }
+        } catch (const nlohmann::json::exception& e) {
+            LOGe("Failed to parse LLM config JSON: %s", e.what());
+        }
+    }
+
+    LOGi("racLlmComponentGenerateStream options: temp=%.2f, max_tokens=%d, top_p=%.2f, system_prompt=%s",
+         options.temperature, options.max_tokens, options.top_p,
+         options.system_prompt ? "(set)" : "(none)");
 
     // Create streaming context
     LLMStreamContext ctx;
@@ -828,39 +848,14 @@ Java_com_runanywhere_sdk_native_bridge_RunAnywhereBridge_racLlmComponentGenerate
          ctx.accumulated_text.length(), ctx.token_count);
 
     // Build JSON result - keys must match what Kotlin expects
-    std::string json = "{";
-    json += "\"text\":\"";
-    // Escape special characters in text for JSON
-    for (char c : ctx.accumulated_text) {
-        switch (c) {
-            case '"':
-                json += "\\\"";
-                break;
-            case '\\':
-                json += "\\\\";
-                break;
-            case '\n':
-                json += "\\n";
-                break;
-            case '\r':
-                json += "\\r";
-                break;
-            case '\t':
-                json += "\\t";
-                break;
-            default:
-                json += c;
-                break;
-        }
-    }
-    json += "\",";
-    // Kotlin expects these keys:
-    json += "\"tokens_generated\":" + std::to_string(ctx.final_result.completion_tokens) + ",";
-    json += "\"tokens_evaluated\":" + std::to_string(ctx.final_result.prompt_tokens) + ",";
-    json += "\"stop_reason\":" + std::to_string(0) + ",";  // 0 = normal completion
-    json += "\"total_time_ms\":" + std::to_string(ctx.final_result.total_time_ms) + ",";
-    json += "\"tokens_per_second\":" + std::to_string(ctx.final_result.tokens_per_second);
-    json += "}";
+    nlohmann::json json_obj;
+    json_obj["text"] = ctx.accumulated_text;
+    json_obj["tokens_generated"] = ctx.final_result.completion_tokens;
+    json_obj["tokens_evaluated"] = ctx.final_result.prompt_tokens;
+    json_obj["stop_reason"] = 0;  // 0 = normal completion
+    json_obj["total_time_ms"] = ctx.final_result.total_time_ms;
+    json_obj["tokens_per_second"] = ctx.final_result.tokens_per_second;
+    std::string json = json_obj.dump();
 
     LOGi("racLlmComponentGenerateStream returning JSON: %zu bytes", json.length());
 
@@ -914,6 +909,28 @@ Java_com_runanywhere_sdk_native_bridge_RunAnywhereBridge_racLlmComponentGenerate
     options.temperature = 0.7f;
     options.top_p = 1.0f;
     options.streaming_enabled = RAC_TRUE;
+    options.system_prompt = RAC_NULL;
+
+    // Parse configJson if provided
+    std::string sys_prompt_storage;
+    if (config != nullptr) {
+        try {
+            auto j = nlohmann::json::parse(config);
+            options.max_tokens = j.value("max_tokens", 512);
+            options.temperature = j.value("temperature", 0.7f);
+            options.top_p = j.value("top_p", 1.0f);
+            sys_prompt_storage = j.value("system_prompt", std::string(""));
+            if (!sys_prompt_storage.empty()) {
+                options.system_prompt = sys_prompt_storage.c_str();
+            }
+        } catch (const nlohmann::json::exception& e) {
+            LOGe("Failed to parse LLM config JSON: %s", e.what());
+        }
+    }
+
+    LOGi("racLlmComponentGenerateStreamWithCallback options: temp=%.2f, max_tokens=%d, top_p=%.2f, system_prompt=%s",
+         options.temperature, options.max_tokens, options.top_p,
+         options.system_prompt ? "(set)" : "(none)");
 
     // Create streaming callback context
     LLMStreamCallbackContext ctx;
@@ -944,37 +961,14 @@ Java_com_runanywhere_sdk_native_bridge_RunAnywhereBridge_racLlmComponentGenerate
          ctx.accumulated_text.length(), ctx.token_count);
 
     // Build JSON result
-    std::string json = "{";
-    json += "\"text\":\"";
-    for (char c : ctx.accumulated_text) {
-        switch (c) {
-            case '"':
-                json += "\\\"";
-                break;
-            case '\\':
-                json += "\\\\";
-                break;
-            case '\n':
-                json += "\\n";
-                break;
-            case '\r':
-                json += "\\r";
-                break;
-            case '\t':
-                json += "\\t";
-                break;
-            default:
-                json += c;
-                break;
-        }
-    }
-    json += "\",";
-    json += "\"tokens_generated\":" + std::to_string(ctx.final_result.completion_tokens) + ",";
-    json += "\"tokens_evaluated\":" + std::to_string(ctx.final_result.prompt_tokens) + ",";
-    json += "\"stop_reason\":" + std::to_string(0) + ",";
-    json += "\"total_time_ms\":" + std::to_string(ctx.final_result.total_time_ms) + ",";
-    json += "\"tokens_per_second\":" + std::to_string(ctx.final_result.tokens_per_second);
-    json += "}";
+    nlohmann::json json_obj;
+    json_obj["text"] = ctx.accumulated_text;
+    json_obj["tokens_generated"] = ctx.final_result.completion_tokens;
+    json_obj["tokens_evaluated"] = ctx.final_result.prompt_tokens;
+    json_obj["stop_reason"] = 0;
+    json_obj["total_time_ms"] = ctx.final_result.total_time_ms;
+    json_obj["tokens_per_second"] = ctx.final_result.tokens_per_second;
+    std::string json = json_obj.dump();
 
     LOGi("racLlmComponentGenerateStreamWithCallback returning JSON: %zu bytes", json.length());
 
@@ -1125,20 +1119,21 @@ Java_com_runanywhere_sdk_native_bridge_RunAnywhereBridge_racSttComponentTranscri
 
     // Parse configJson to override sample_rate if provided
     if (configJson != nullptr) {
-        const char* json = env->GetStringUTFChars(configJson, nullptr);
-        if (json != nullptr) {
-            // Simple JSON parsing for sample_rate
-            const char* sample_rate_key = "\"sample_rate\":";
-            const char* pos = strstr(json, sample_rate_key);
-            if (pos != nullptr) {
-                pos += strlen(sample_rate_key);
-                int sample_rate = atoi(pos);
-                if (sample_rate > 0) {
-                    options.sample_rate = sample_rate;
-                    LOGd("Using sample_rate from config: %d", sample_rate);
+        const char* json_str = env->GetStringUTFChars(configJson, nullptr);
+        if (json_str != nullptr) {
+            try {
+                auto json = nlohmann::json::parse(json_str);
+                if (json.contains("sample_rate") && json["sample_rate"].is_number()) {
+                    int sample_rate = json["sample_rate"].get<int>();
+                    if (sample_rate > 0) {
+                        options.sample_rate = sample_rate;
+                        LOGd("Using sample_rate from config: %d", sample_rate);
+                    }
                 }
+            } catch (const nlohmann::json::exception& e) {
+                LOGe("Failed to parse STT config JSON: %s", e.what());
             }
-            env->ReleaseStringUTFChars(configJson, json);
+            env->ReleaseStringUTFChars(configJson, json_str);
         }
     }
 
@@ -1161,40 +1156,13 @@ Java_com_runanywhere_sdk_native_bridge_RunAnywhereBridge_racSttComponentTranscri
     }
 
     // Build JSON result
-    std::string json_result = "{";
-    json_result += "\"text\":\"";
-    if (result.text != nullptr) {
-        // Escape special characters in text
-        for (const char* p = result.text; *p; ++p) {
-            switch (*p) {
-                case '"':
-                    json_result += "\\\"";
-                    break;
-                case '\\':
-                    json_result += "\\\\";
-                    break;
-                case '\n':
-                    json_result += "\\n";
-                    break;
-                case '\r':
-                    json_result += "\\r";
-                    break;
-                case '\t':
-                    json_result += "\\t";
-                    break;
-                default:
-                    json_result += *p;
-                    break;
-            }
-        }
-    }
-    json_result += "\",";
-    json_result += "\"language\":\"" +
-                   std::string(result.detected_language ? result.detected_language : "en") + "\",";
-    json_result += "\"duration_ms\":" + std::to_string(result.processing_time_ms) + ",";
-    json_result += "\"completion_reason\":1,";  // END_OF_AUDIO
-    json_result += "\"confidence\":" + std::to_string(result.confidence);
-    json_result += "}";
+    nlohmann::json json_obj;
+    json_obj["text"] = result.text ? std::string(result.text) : "";
+    json_obj["language"] = result.detected_language ? std::string(result.detected_language) : "en";
+    json_obj["duration_ms"] = result.processing_time_ms;
+    json_obj["completion_reason"] = 1;  // END_OF_AUDIO
+    json_obj["confidence"] = result.confidence;
+    std::string json_result = json_obj.dump();
 
     rac_stt_result_free(&result);
 
@@ -1669,25 +1637,19 @@ static std::string modelInfoToJson(const rac_model_info_t* model) {
     if (!model)
         return "null";
 
-    std::string json = "{";
-    json += "\"model_id\":\"" + std::string(model->id ? model->id : "") + "\",";
-    json += "\"name\":\"" + std::string(model->name ? model->name : "") + "\",";
-    json += "\"category\":" + std::to_string(static_cast<int>(model->category)) + ",";
-    json += "\"format\":" + std::to_string(static_cast<int>(model->format)) + ",";
-    json += "\"framework\":" + std::to_string(static_cast<int>(model->framework)) + ",";
-    json += "\"download_url\":" +
-            (model->download_url ? ("\"" + std::string(model->download_url) + "\"") : "null") + ",";
-    json += "\"local_path\":" +
-            (model->local_path ? ("\"" + std::string(model->local_path) + "\"") : "null") + ",";
-    json += "\"download_size\":" + std::to_string(model->download_size) + ",";
-    json += "\"context_length\":" + std::to_string(model->context_length) + ",";
-    json +=
-        "\"supports_thinking\":" + std::string(model->supports_thinking ? "true" : "false") + ",";
-    json += "\"description\":" +
-            (model->description ? ("\"" + std::string(model->description) + "\"") : "null");
-    json += "}";
-
-    return json;
+    nlohmann::json j;
+    j["model_id"] = model->id ? model->id : "";
+    j["name"] = model->name ? model->name : "";
+    j["category"] = static_cast<int>(model->category);
+    j["format"] = static_cast<int>(model->format);
+    j["framework"] = static_cast<int>(model->framework);
+    j["download_url"] = model->download_url ? nlohmann::json(model->download_url) : nlohmann::json(nullptr);
+    j["local_path"] = model->local_path ? nlohmann::json(model->local_path) : nlohmann::json(nullptr);
+    j["download_size"] = model->download_size;
+    j["context_length"] = model->context_length;
+    j["supports_thinking"] = static_cast<bool>(model->supports_thinking);
+    j["description"] = model->description ? nlohmann::json(model->description) : nlohmann::json(nullptr);
+    return j.dump();
 }
 
 JNIEXPORT jint JNICALL
@@ -2074,25 +2036,22 @@ Java_com_runanywhere_sdk_native_bridge_RunAnywhereBridge_racModelAssignmentFetch
     }
 
     // Build JSON array of models
-    std::string json = "[";
+    nlohmann::json json_array = nlohmann::json::array();
     for (size_t i = 0; i < count; i++) {
-        if (i > 0) json += ",";
-
         rac_model_info_t* m = models[i];
-        json += "{";
-        json += "\"id\":\"" + std::string(m->id ? m->id : "") + "\",";
-        json += "\"name\":\"" + std::string(m->name ? m->name : "") + "\",";
-        json += "\"category\":" + std::to_string(m->category) + ",";
-        json += "\"format\":" + std::to_string(m->format) + ",";
-        json += "\"framework\":" + std::to_string(m->framework) + ",";
-        json += "\"downloadUrl\":\"" + std::string(m->download_url ? m->download_url : "") + "\",";
-        json += "\"downloadSize\":" + std::to_string(m->download_size) + ",";
-        json += "\"contextLength\":" + std::to_string(m->context_length) + ",";
-        json +=
-            "\"supportsThinking\":" + std::string(m->supports_thinking == RAC_TRUE ? "true" : "false");
-        json += "}";
-    }
-    json += "]";
+        nlohmann::json obj;
+        obj["id"] = m->id ? m->id : "";
+        obj["name"] = m->name ? m->name : "";
+        obj["category"] = static_cast<int>(m->category);
+        obj["format"] = static_cast<int>(m->format);
+        obj["framework"] = static_cast<int>(m->framework);
+        obj["downloadUrl"] = m->download_url ? m->download_url : "";
+        obj["downloadSize"] = m->download_size;
+        obj["contextLength"] = m->context_length;
+        obj["supportsThinking"] = static_cast<bool>(m->supports_thinking == RAC_TRUE);
+        json_array.push_back(obj);
+    }
+    std::string json = json_array.dump();
 
     // Free models array
     if (models) {
@@ -2258,70 +2217,6 @@ static rac_result_t jni_device_http_post(const char* endpoint, const char* json_
 // Protected by g_device_jni_state.mtx for thread safety
 static std::string g_cached_device_id;
 
-// Helper to extract a string value from JSON (simple parser for known keys)
-// Returns allocated string that must be stored persistently, or nullptr
-static std::string extract_json_string(const char* json, const char* key) {
-    if (!json || !key)
-        return "";
-
-    std::string search_key = "\"" + std::string(key) + "\":";
-    const char* pos = strstr(json, search_key.c_str());
-    if (!pos)
-        return "";
-
-    pos += search_key.length();
-    while (*pos == ' ')
-        pos++;
-
-    if (*pos == 'n' && strncmp(pos, "null", 4) == 0) {
-        return "";
-    }
-
-    if (*pos != '"')
-        return "";
-    pos++;
-
-    const char* end = strchr(pos, '"');
-    if (!end)
-        return "";
-
-    return std::string(pos, end - pos);
-}
-
-// Helper to extract an integer value from JSON
-static int64_t extract_json_int(const char* json, const char* key) {
-    if (!json || !key)
-        return 0;
-
-    std::string search_key = "\"" + std::string(key) + "\":";
-    const char* pos = strstr(json, search_key.c_str());
-    if (!pos)
-        return 0;
-
-    pos += search_key.length();
-    while (*pos == ' ')
-        pos++;
-
-    return strtoll(pos, nullptr, 10);
-}
-
-// Helper to extract a boolean value from JSON
-static bool extract_json_bool(const char* json, const char* key) {
-    if (!json || !key)
-        return false;
-
-    std::string search_key = "\"" + std::string(key) + "\":";
-    const char* pos = strstr(json, search_key.c_str());
-    if (!pos)
-        return false;
-
-    pos += search_key.length();
-    while (*pos == ' ')
-        pos++;
-
-    return strncmp(pos, "true", 4) == 0;
-}
-
 // Static storage for device info strings (need to persist for C callbacks)
 static struct {
     std::string device_id;
@@ -2360,25 +2255,46 @@ static void jni_device_get_info(rac_device_registration_info_t* out_info, void*
     }
 
     if (jResult && out_info) {
-        const char* json = env->GetStringUTFChars(jResult, nullptr);
-        LOGd("jni_device_get_info: parsing JSON: %.200s...", json);
+        const char* json_str = env->GetStringUTFChars(jResult, nullptr);
+        LOGd("jni_device_get_info: parsing JSON: %.200s...", json_str);
 
         // Parse JSON and extract all fields
         std::lock_guard<std::mutex> lock(g_device_info_strings.mtx);
 
-        // Extract all string fields from Kotlin's getDeviceInfoCallback() JSON
-        g_device_info_strings.device_id = extract_json_string(json, "device_id");
-        g_device_info_strings.device_model = extract_json_string(json, "device_model");
-        g_device_info_strings.device_name = extract_json_string(json, "device_name");
-        g_device_info_strings.platform = extract_json_string(json, "platform");
-        g_device_info_strings.os_version = extract_json_string(json, "os_version");
-        g_device_info_strings.form_factor = extract_json_string(json, "form_factor");
-        g_device_info_strings.architecture = extract_json_string(json, "architecture");
-        g_device_info_strings.chip_name = extract_json_string(json, "chip_name");
-        g_device_info_strings.gpu_family = extract_json_string(json, "gpu_family");
-        g_device_info_strings.battery_state = extract_json_string(json, "battery_state");
-        g_device_info_strings.device_fingerprint = extract_json_string(json, "device_fingerprint");
-        g_device_info_strings.manufacturer = extract_json_string(json, "manufacturer");
+        try {
+            auto j = nlohmann::json::parse(json_str);
+
+            // Extract all string fields from Kotlin's getDeviceInfoCallback() JSON
+            g_device_info_strings.device_id = j.value("device_id", std::string(""));
+            g_device_info_strings.device_model = j.value("device_model", std::string(""));
+            g_device_info_strings.device_name = j.value("device_name", std::string(""));
+            g_device_info_strings.platform = j.value("platform", std::string(""));
+            g_device_info_strings.os_version = j.value("os_version", std::string(""));
+            g_device_info_strings.form_factor = j.value("form_factor", std::string(""));
+            g_device_info_strings.architecture = j.value("architecture", std::string(""));
+            g_device_info_strings.chip_name = j.value("chip_name", std::string(""));
+            g_device_info_strings.gpu_family = j.value("gpu_family", std::string(""));
+            g_device_info_strings.battery_state = j.value("battery_state", std::string(""));
+            g_device_info_strings.device_fingerprint = j.value("device_fingerprint", std::string(""));
+            g_device_info_strings.manufacturer = j.value("manufacturer", std::string(""));
+
+            // Extract integer fields
+            out_info->total_memory = j.value("total_memory", (int64_t)0);
+            out_info->available_memory = j.value("available_memory", (int64_t)0);
+            out_info->neural_engine_cores = j.value("neural_engine_cores", (int32_t)0);
+            out_info->core_count = j.value("core_count", (int32_t)0);
+            out_info->performance_cores = j.value("performance_cores", (int32_t)0);
+            out_info->efficiency_cores = j.value("efficiency_cores", (int32_t)0);
+
+            // Extract boolean fields
+            out_info->has_neural_engine = j.value("has_neural_engine", false) ? RAC_TRUE : RAC_FALSE;
+            out_info->is_low_power_mode = j.value("is_low_power_mode", false) ? RAC_TRUE : RAC_FALSE;
+
+            // Extract float field for battery
+            out_info->battery_level = j.value("battery_level", 0.0f);
+        } catch (const nlohmann::json::exception& e) {
+            LOGe("Failed to parse device info JSON: %s", e.what());
+        }
 
         // Assign pointers to out_info (C struct uses const char*)
         out_info->device_id = g_device_info_strings.device_id.empty()
@@ -2415,32 +2331,12 @@ static void jni_device_get_info(rac_device_registration_info_t* out_info, void*
                                            ? nullptr
                                            : g_device_info_strings.device_fingerprint.c_str();
 
-        // Extract integer fields
-        out_info->total_memory = extract_json_int(json, "total_memory");
-        out_info->available_memory = extract_json_int(json, "available_memory");
-        out_info->neural_engine_cores =
-            static_cast<int32_t>(extract_json_int(json, "neural_engine_cores"));
-        out_info->core_count = static_cast<int32_t>(extract_json_int(json, "core_count"));
-        out_info->performance_cores =
-            static_cast<int32_t>(extract_json_int(json, "performance_cores"));
-        out_info->efficiency_cores =
-            static_cast<int32_t>(extract_json_int(json, "efficiency_cores"));
-
-        // Extract boolean fields
-        out_info->has_neural_engine =
-            extract_json_bool(json, "has_neural_engine") ? RAC_TRUE : RAC_FALSE;
-        out_info->is_low_power_mode =
-            extract_json_bool(json, "is_low_power_mode") ? RAC_TRUE : RAC_FALSE;
-
-        // Extract float field for battery
-        out_info->battery_level = static_cast<float>(extract_json_int(json, "battery_level"));
-
         LOGi("jni_device_get_info: parsed device_model=%s, os_version=%s, architecture=%s",
              out_info->device_model ? out_info->device_model : "(null)",
              out_info->os_version ? out_info->os_version : "(null)",
              out_info->architecture ? out_info->architecture : "(null)");
 
-        env->ReleaseStringUTFChars(jResult, json);
+        env->ReleaseStringUTFChars(jResult, json_str);
         env->DeleteLocalRef(jResult);
     }
 }
diff --git a/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/public/extensions/LLM/LLMTypes.kt b/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/public/extensions/LLM/LLMTypes.kt
index 5e8b549d1..5a28f0d03 100644
--- a/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/public/extensions/LLM/LLMTypes.kt
+++ b/sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/public/extensions/LLM/LLMTypes.kt
@@ -104,8 +104,8 @@ data class LLMConfiguration(
  */
 @Serializable
 data class LLMGenerationOptions(
-    val maxTokens: Int = 100,
-    val temperature: Float = 0.8f,
+    val maxTokens: Int = 1000,
+    val temperature: Float = 0.7f,
     val topP: Float = 1.0f,
     val stopSequences: List<String> = emptyList(),
     val streamingEnabled: Boolean = false,
diff --git a/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/foundation/bridge/extensions/CppBridgeLLM.kt b/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/foundation/bridge/extensions/CppBridgeLLM.kt
index b5585d6cf..cdff028c2 100644
--- a/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/foundation/bridge/extensions/CppBridgeLLM.kt
+++ b/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/foundation/bridge/extensions/CppBridgeLLM.kt
@@ -219,6 +219,7 @@ object CppBridgeLLM {
      * @param repeatPenalty Penalty for repeating tokens
      * @param stopSequences List of sequences that stop generation
      * @param seed Random seed for reproducibility (-1 for random)
+     * @param systemPrompt System prompt for LLM (optional)
      */
     data class GenerationConfig(
         val maxTokens: Int = 512,
@@ -228,6 +229,7 @@ object CppBridgeLLM {
         val repeatPenalty: Float = 1.1f,
         val stopSequences: List<String> = emptyList(),
         val seed: Long = -1,
+        val systemPrompt: String? = null,
     ) {
         /**
          * Convert to JSON string for C++ interop.
@@ -246,7 +248,13 @@ object CppBridgeLLM {
                     append("\"${escapeJson(seq)}\"")
                 }
                 append("],")
-                append("\"seed\":$seed")
+                append("\"seed\":$seed,")
+                append("\"system_prompt\":")
+                if (systemPrompt != null) {
+                    append("\"${escapeJson(systemPrompt)}\"")
+                } else {
+                    append("null")
+                }
                 append("}")
             }
         }
diff --git a/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/public/extensions/RunAnywhere+TextGeneration.jvmAndroid.kt b/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/public/extensions/RunAnywhere+TextGeneration.jvmAndroid.kt
index 1eb6d6545..c86ab3a0c 100644
--- a/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/public/extensions/RunAnywhere+TextGeneration.jvmAndroid.kt
+++ b/sdk/runanywhere-kotlin/src/jvmAndroidMain/kotlin/com/runanywhere/sdk/public/extensions/RunAnywhere+TextGeneration.jvmAndroid.kt
@@ -50,8 +50,11 @@ actual suspend fun RunAnywhere.generate(
             maxTokens = opts.maxTokens,
             temperature = opts.temperature,
             topP = opts.topP,
+            systemPrompt = opts.systemPrompt,
         )
 
+    llmLogger.info("[PARAMS] generate: temperature=${opts.temperature}, top_p=${opts.topP}, max_tokens=${opts.maxTokens}, system_prompt=${opts.systemPrompt?.let { "set(${it.length} chars)" } ?: "nil"}, streaming=false")
+
     // Call CppBridgeLLM to generate
     val cppResult = CppBridgeLLM.generate(prompt, config)
 
@@ -85,11 +88,14 @@ actual fun RunAnywhere.generateStream(
 
         val opts = options ?: LLMGenerationOptions.DEFAULT
 
+        llmLogger.info("[PARAMS] generateStream: temperature=${opts.temperature}, top_p=${opts.topP}, max_tokens=${opts.maxTokens}, system_prompt=${opts.systemPrompt?.let { "set(${it.length} chars)" } ?: "nil"}, streaming=true")
+
         val config =
             CppBridgeLLM.GenerationConfig(
                 maxTokens = opts.maxTokens,
                 temperature = opts.temperature,
                 topP = opts.topP,
+                systemPrompt = opts.systemPrompt,
             )
 
         // Use a channel to bridge callback to flow
@@ -132,11 +138,14 @@ actual suspend fun RunAnywhere.generateStreamWithMetrics(
     var tokenCount = 0
     var firstTokenTime: Long? = null
 
+    llmLogger.info("[PARAMS] generateStreamWithMetrics: temperature=${opts.temperature}, top_p=${opts.topP}, max_tokens=${opts.maxTokens}, system_prompt=${opts.systemPrompt?.let { "set(${it.length} chars)" } ?: "nil"}, streaming=true")
+
     val config =
         CppBridgeLLM.GenerationConfig(
             maxTokens = opts.maxTokens,
             temperature = opts.temperature,
             topP = opts.topP,
+            systemPrompt = opts.systemPrompt,
         )
 
     // Use a channel to bridge callback to flow

From 7289218227d284c46841137a2d21de8dcfe37d62 Mon Sep 17 00:00:00 2001
From: VyasGuru <71374747+VyasGuru@users.noreply.github.com>
Date: Thu, 12 Feb 2026 00:12:46 +0530
Subject: [PATCH 3/6] TTS pitch ui removal

For the example ios app, removed the tts pitch slider
---
 .../Features/Voice/TTSViewModel.swift         |  2 +-
 .../Features/Voice/TextToSpeechView.swift     | 32 ++++++++++---------
 2 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Voice/TTSViewModel.swift b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Voice/TTSViewModel.swift
index 4c9a1fe93..22368b501 100644
--- a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Voice/TTSViewModel.swift
+++ b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Voice/TTSViewModel.swift
@@ -26,7 +26,7 @@ class TTSViewModel: ObservableObject {
 
     // Voice Settings
     @Published var speechRate: Double = 1.0
-    @Published var pitch: Double = 1.0
+    @Published var pitch: Double = 1.0 // while removed from the UI, the backend still supports pitch, so maintinaining it here.
 
     // MARK: - Private Properties
 
diff --git a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Voice/TextToSpeechView.swift b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Voice/TextToSpeechView.swift
index ff178564b..4b7222ad0 100644
--- a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Voice/TextToSpeechView.swift
+++ b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Voice/TextToSpeechView.swift
@@ -233,21 +233,23 @@ struct TextToSpeechView: View {
                 Slider(value: $viewModel.speechRate, in: 0.5...2.0, step: 0.1)
                     .tint(AppColors.primaryAccent)
             }
-
-            // Pitch
-            VStack(alignment: .leading, spacing: 10) {
-                HStack {
-                    Text("Pitch")
-                        .font(.subheadline)
-                        .foregroundColor(.secondary)
-                    Spacer()
-                    Text(String(format: "%.1fx", viewModel.pitch))
-                        .font(.system(size: 15, weight: .medium, design: .rounded))
-                        .foregroundColor(.primary)
-                }
-                Slider(value: $viewModel.pitch, in: 0.5...2.0, step: 0.1)
-                    .tint(AppColors.primaryPurple)
-            }
+            
+            // TODO: Find a model for TTS that supports pitch, or manually implement a good quality pitch adjustment
+
+            // Pitch (not implemented in the current TTS models. Once supported, we can have this back.)
+            // VStack(alignment: .leading, spacing: 10) {claud
+            //     HStack {
+            //         Text("Pitch")
+            //             .font(.subheadline)
+            //             .foregroundColor(.secondary)
+            //         Spacer()
+            //         Text(String(format: "%.1fx", viewModel.pitch))
+            //             .font(.system(size: 15, weight: .medium, design: .rounded))
+            //             .foregroundColor(.primary)
+            //     }
+            //     Slider(value: $viewModel.pitch, in: 0.5...2.0, step: 0.1)
+            //         .tint(AppColors.primaryPurple)
+            // }
         }
         .padding(20)
         .background(AppColors.backgroundTertiary)

From 707a2bb86c3694921150dfd8014a373050bff06e Mon Sep 17 00:00:00 2001
From: VyasGuru <71374747+VyasGuru@users.noreply.github.com>
Date: Thu, 12 Feb 2026 00:18:32 +0530
Subject: [PATCH 4/6] Flutter sdk + example apps

---
 .../lib/core/utilities/constants.dart         |   1 +
 .../features/chat/chat_interface_view.dart    |   6 +
 .../models/model_selection_sheet.dart         |  44 +----
 .../settings/combined_settings_view.dart      | 156 ++++++++++++++++++
 .../features/voice/text_to_speech_view.dart   |   4 +-
 .../lib/native/dart_bridge_llm.dart           |  45 ++++-
 .../runanywhere/lib/public/runanywhere.dart   |   2 +
 7 files changed, 213 insertions(+), 45 deletions(-)

diff --git a/examples/flutter/RunAnywhereAI/lib/core/utilities/constants.dart b/examples/flutter/RunAnywhereAI/lib/core/utilities/constants.dart
index 5ed4a496e..a959b8ef7 100644
--- a/examples/flutter/RunAnywhereAI/lib/core/utilities/constants.dart
+++ b/examples/flutter/RunAnywhereAI/lib/core/utilities/constants.dart
@@ -86,5 +86,6 @@ class PreferenceKeys {
   static const String routingPolicy = 'routingPolicy';
   static const String defaultTemperature = 'defaultTemperature';
   static const String defaultMaxTokens = 'defaultMaxTokens';
+  static const String defaultSystemPrompt = 'defaultSystemPrompt';
   static const String useStreaming = 'useStreaming';
 }
diff --git a/examples/flutter/RunAnywhereAI/lib/features/chat/chat_interface_view.dart b/examples/flutter/RunAnywhereAI/lib/features/chat/chat_interface_view.dart
index d92b98f32..7013465a5 100644
--- a/examples/flutter/RunAnywhereAI/lib/features/chat/chat_interface_view.dart
+++ b/examples/flutter/RunAnywhereAI/lib/features/chat/chat_interface_view.dart
@@ -116,11 +116,17 @@ class _ChatInterfaceViewState extends State<ChatInterfaceView> {
       final temperature =
           prefs.getDouble(PreferenceKeys.defaultTemperature) ?? 0.7;
       final maxTokens = prefs.getInt(PreferenceKeys.defaultMaxTokens) ?? 500;
+      final systemPromptRaw =
+          prefs.getString(PreferenceKeys.defaultSystemPrompt) ?? '';
+      final systemPrompt = systemPromptRaw.isNotEmpty ? systemPromptRaw : null;
+
+      debugPrint('[PARAMS] App _sendMessage: temperature=$temperature, maxTokens=$maxTokens, systemPrompt=${systemPrompt != null ? "set(${systemPrompt.length} chars)" : "nil"}');
 
       // Streaming now runs in a background isolate, so no ANR concerns
       final options = sdk.LLMGenerationOptions(
         maxTokens: maxTokens,
         temperature: temperature,
+        systemPrompt: systemPrompt,
       );
 
       if (_useStreaming) {
diff --git a/examples/flutter/RunAnywhereAI/lib/features/models/model_selection_sheet.dart b/examples/flutter/RunAnywhereAI/lib/features/models/model_selection_sheet.dart
index 19e4eeb96..d578b3faa 100644
--- a/examples/flutter/RunAnywhereAI/lib/features/models/model_selection_sheet.dart
+++ b/examples/flutter/RunAnywhereAI/lib/features/models/model_selection_sheet.dart
@@ -502,46 +502,12 @@ class _ModelSelectionSheetState extends State<ModelSelectionSheet> {
     });
 
     try {
-      setState(() {
-        _loadingProgress = 'Loading model into memory...';
-      });
-
-      // Load model based on context/modality using real SDK
-      switch (widget.context) {
-        case ModelSelectionContext.llm:
-          debugPrint('🎯 Loading LLM model: ${model.id}');
-          await sdk.RunAnywhere.loadModel(model.id);
-          break;
-        case ModelSelectionContext.stt:
-          debugPrint('🎯 Loading STT model: ${model.id}');
-          await sdk.RunAnywhere.loadSTTModel(model.id);
-          break;
-        case ModelSelectionContext.tts:
-          debugPrint('🎯 Loading TTS voice: ${model.id}');
-          await sdk.RunAnywhere.loadTTSVoice(model.id);
-          break;
-        case ModelSelectionContext.voice:
-          // Determine based on model category
-          if (model.category == ModelCategory.speechRecognition) {
-            debugPrint('🎯 Loading Voice STT model: ${model.id}');
-            await sdk.RunAnywhere.loadSTTModel(model.id);
-          } else if (model.category == ModelCategory.speechSynthesis) {
-            debugPrint('🎯 Loading Voice TTS voice: ${model.id}');
-            await sdk.RunAnywhere.loadTTSVoice(model.id);
-          } else {
-            debugPrint('🎯 Loading Voice LLM model: ${model.id}');
-            await sdk.RunAnywhere.loadModel(model.id);
-          }
-          break;
-      }
-
-      setState(() {
-        _loadingProgress = 'Model loaded successfully!';
-      });
-
-      await Future<void>.delayed(const Duration(milliseconds: 300));
-
+      // Update view model selection state
       await _viewModel.selectModel(model);
+
+      // Call the callback - this is where the actual model loading happens
+      // The callback knows the correct context and how to load the model
+      debugPrint('🎯 Model selected: ${model.id}, calling callback to load');
       await widget.onModelSelected(model);
 
       if (mounted) {
diff --git a/examples/flutter/RunAnywhereAI/lib/features/settings/combined_settings_view.dart b/examples/flutter/RunAnywhereAI/lib/features/settings/combined_settings_view.dart
index 388059db6..516d4ca17 100644
--- a/examples/flutter/RunAnywhereAI/lib/features/settings/combined_settings_view.dart
+++ b/examples/flutter/RunAnywhereAI/lib/features/settings/combined_settings_view.dart
@@ -8,6 +8,7 @@ import 'package:runanywhere_ai/core/design_system/typography.dart';
 import 'package:runanywhere_ai/core/models/app_types.dart';
 import 'package:runanywhere_ai/core/utilities/constants.dart';
 import 'package:runanywhere_ai/core/utilities/keychain_helper.dart';
+import 'package:shared_preferences/shared_preferences.dart';
 import 'package:url_launcher/url_launcher.dart';
 
 /// CombinedSettingsView (mirroring iOS CombinedSettingsView.swift)
@@ -37,6 +38,11 @@ class _CombinedSettingsViewState extends State<CombinedSettingsView> {
   bool _isApiKeyConfigured = false;
   bool _isBaseURLConfigured = false;
 
+  // Generation Settings
+  double _temperature = 0.7;
+  int _maxTokens = 1000;
+  String _systemPrompt = '';
+
   // Loading state
   bool _isRefreshingStorage = false;
 
@@ -44,6 +50,7 @@ class _CombinedSettingsViewState extends State<CombinedSettingsView> {
   void initState() {
     super.initState();
     unawaited(_loadSettings());
+    unawaited(_loadGenerationSettings());
     unawaited(_loadApiConfiguration());
     unawaited(_loadStorageData());
   }
@@ -57,6 +64,32 @@ class _CombinedSettingsViewState extends State<CombinedSettingsView> {
     }
   }
 
+  /// Load generation settings from SharedPreferences
+  Future<void> _loadGenerationSettings() async {
+    final prefs = await SharedPreferences.getInstance();
+    if (mounted) {
+      setState(() {
+        _temperature = prefs.getDouble(PreferenceKeys.defaultTemperature) ?? 0.7;
+        _maxTokens = prefs.getInt(PreferenceKeys.defaultMaxTokens) ?? 1000;
+        _systemPrompt = prefs.getString(PreferenceKeys.defaultSystemPrompt) ?? '';
+      });
+    }
+  }
+
+  /// Save generation settings to SharedPreferences
+  Future<void> _saveGenerationSettings() async {
+    final prefs = await SharedPreferences.getInstance();
+    await prefs.setDouble(PreferenceKeys.defaultTemperature, _temperature);
+    await prefs.setInt(PreferenceKeys.defaultMaxTokens, _maxTokens);
+    await prefs.setString(PreferenceKeys.defaultSystemPrompt, _systemPrompt);
+
+    if (mounted) {
+      ScaffoldMessenger.of(context).showSnackBar(
+        const SnackBar(content: Text('Generation settings saved')),
+      );
+    }
+  }
+
   /// Load API configuration from keychain
   Future<void> _loadApiConfiguration() async {
     final storedApiKey = await KeychainHelper.loadString(KeychainKeys.apiKey);
@@ -386,6 +419,11 @@ class _CombinedSettingsViewState extends State<CombinedSettingsView> {
           _buildApiConfigurationCard(),
           const SizedBox(height: AppSpacing.large),
 
+          // Generation Settings Section
+          _buildSectionHeader('Generation Settings'),
+          _buildGenerationSettingsCard(),
+          const SizedBox(height: AppSpacing.large),
+
           // Storage Overview Section
           _buildSectionHeader('Storage Overview',
               trailing: _buildRefreshButton()),
@@ -449,6 +487,124 @@ class _CombinedSettingsViewState extends State<CombinedSettingsView> {
     );
   }
 
+  Widget _buildGenerationSettingsCard() {
+    return Card(
+      child: Padding(
+        padding: const EdgeInsets.all(AppSpacing.large),
+        child: Column(
+          crossAxisAlignment: CrossAxisAlignment.start,
+          children: [
+            // Temperature Slider
+            Text('Temperature', style: AppTypography.subheadline(context)),
+            const SizedBox(height: AppSpacing.xSmall),
+            Row(
+              children: [
+                Expanded(
+                  child: Slider(
+                    value: _temperature,
+                    min: 0.0,
+                    max: 2.0,
+                    divisions: 20,
+                    label: _temperature.toStringAsFixed(1),
+                    onChanged: (value) {
+                      setState(() {
+                        _temperature = value;
+                      });
+                    },
+                  ),
+                ),
+                SizedBox(
+                  width: 40,
+                  child: Text(
+                    _temperature.toStringAsFixed(1),
+                    style: AppTypography.subheadlineSemibold(context),
+                    textAlign: TextAlign.right,
+                  ),
+                ),
+              ],
+            ),
+            Text(
+              'Controls randomness. Lower = more focused, higher = more creative.',
+              style: AppTypography.caption2(context).copyWith(
+                color: AppColors.textSecondary(context),
+              ),
+            ),
+            const SizedBox(height: AppSpacing.mediumLarge),
+
+            // Max Tokens Slider
+            Text('Max Tokens', style: AppTypography.subheadline(context)),
+            const SizedBox(height: AppSpacing.xSmall),
+            Row(
+              children: [
+                Expanded(
+                  child: Slider(
+                    value: _maxTokens.toDouble(),
+                    min: 50,
+                    max: 4096,
+                    divisions: ((4096 - 50) / 50).round(),
+                    label: _maxTokens.toString(),
+                    onChanged: (value) {
+                      setState(() {
+                        _maxTokens = value.round();
+                      });
+                    },
+                  ),
+                ),
+                SizedBox(
+                  width: 60,
+                  child: Text(
+                    _maxTokens.toString(),
+                    style: AppTypography.subheadlineSemibold(context),
+                    textAlign: TextAlign.right,
+                  ),
+                ),
+              ],
+            ),
+            Text(
+              'Maximum number of tokens to generate.',
+              style: AppTypography.caption2(context).copyWith(
+                color: AppColors.textSecondary(context),
+              ),
+            ),
+            const SizedBox(height: AppSpacing.mediumLarge),
+
+            // System Prompt Field
+            Text('System Prompt', style: AppTypography.subheadline(context)),
+            const SizedBox(height: AppSpacing.xSmall),
+            TextField(
+              controller: TextEditingController(text: _systemPrompt)
+                ..selection = TextSelection.fromPosition(
+                  TextPosition(offset: _systemPrompt.length),
+                ),
+              maxLines: 3,
+              decoration: const InputDecoration(
+                hintText: 'Enter a system prompt...',
+                border: OutlineInputBorder(),
+              ),
+              onChanged: (value) {
+                _systemPrompt = value;
+              },
+            ),
+            const SizedBox(height: AppSpacing.xSmall),
+            Text(
+              'Instructions for how the model should behave.',
+              style: AppTypography.caption2(context).copyWith(
+                color: AppColors.textSecondary(context),
+              ),
+            ),
+            const SizedBox(height: AppSpacing.mediumLarge),
+
+            // Save Settings Button
+            ElevatedButton(
+              onPressed: _saveGenerationSettings,
+              child: const Text('Save Settings'),
+            ),
+          ],
+        ),
+      ),
+    );
+  }
+
   Widget _buildApiConfigurationCard() {
     return Card(
       child: Padding(
diff --git a/examples/flutter/RunAnywhereAI/lib/features/voice/text_to_speech_view.dart b/examples/flutter/RunAnywhereAI/lib/features/voice/text_to_speech_view.dart
index ea6eadc20..b4d8a8c95 100644
--- a/examples/flutter/RunAnywhereAI/lib/features/voice/text_to_speech_view.dart
+++ b/examples/flutter/RunAnywhereAI/lib/features/voice/text_to_speech_view.dart
@@ -444,9 +444,10 @@ class _TextToSpeechViewState extends State<TextToSpeechView> {
               });
             },
           ),
+          
+          /* Pitch slider - Commented out for now as it is not implemented in the current TTS models. Once supported, we can have this back.
           const SizedBox(height: AppSpacing.mediumLarge),
 
-          // Pitch slider
           _buildSliderRow(
             label: 'Pitch',
             value: _pitch,
@@ -459,6 +460,7 @@ class _TextToSpeechViewState extends State<TextToSpeechView> {
               });
             },
           ),
+          */
         ],
       ),
     );
diff --git a/sdk/runanywhere-flutter/packages/runanywhere/lib/native/dart_bridge_llm.dart b/sdk/runanywhere-flutter/packages/runanywhere/lib/native/dart_bridge_llm.dart
index de303e452..aff43aa66 100644
--- a/sdk/runanywhere-flutter/packages/runanywhere/lib/native/dart_bridge_llm.dart
+++ b/sdk/runanywhere-flutter/packages/runanywhere/lib/native/dart_bridge_llm.dart
@@ -229,6 +229,7 @@ class DartBridgeLLM {
   /// [prompt] - Input prompt.
   /// [maxTokens] - Maximum tokens to generate (default: 512).
   /// [temperature] - Sampling temperature (default: 0.7).
+  /// [systemPrompt] - Optional system prompt for model behavior (default: null).
   ///
   /// Returns the generated text and metrics.
   ///
@@ -238,6 +239,7 @@ class DartBridgeLLM {
     String prompt, {
     int maxTokens = 512,
     double temperature = 0.7,
+    String? systemPrompt,
   }) async {
     final handle = getHandle();
 
@@ -251,8 +253,10 @@ class DartBridgeLLM {
     final tokens = maxTokens;
     final temp = temperature;
 
+    _logger.debug('[PARAMS] generate: temperature=$temperature, maxTokens=$maxTokens, systemPrompt=${systemPrompt != null ? "set(${systemPrompt.length} chars)" : "nil"}');
+
     final result = await Isolate.run(() {
-      return _generateInIsolate(handleAddress, prompt, tokens, temp);
+      return _generateInIsolate(handleAddress, prompt, tokens, temp, systemPrompt);
     });
 
     if (result.error != null) {
@@ -270,7 +274,7 @@ class DartBridgeLLM {
   /// Generate text with streaming.
   ///
   /// Returns a stream of tokens as they are generated.
-  /// 
+  ///
   /// ARCHITECTURE: Runs in a background isolate to prevent ANR.
   /// The logger callback uses NativeCallable.listener which is thread-safe.
   /// Tokens are sent back to the main isolate via SendPort for UI updates.
@@ -278,6 +282,7 @@ class DartBridgeLLM {
     String prompt, {
     int maxTokens = 512, // Can use higher values now since it's non-blocking
     double temperature = 0.7,
+    String? systemPrompt,
   }) {
     final handle = getHandle();
 
@@ -288,12 +293,15 @@ class DartBridgeLLM {
     // Create stream controller for emitting tokens to the caller
     final controller = StreamController<String>();
 
+    _logger.debug('[PARAMS] generateStream: temperature=$temperature, maxTokens=$maxTokens, systemPrompt=${systemPrompt != null ? "set(${systemPrompt.length} chars)" : "nil"}');
+
     // Start streaming generation in a background isolate
     unawaited(_startBackgroundStreaming(
       handle.address,
       prompt,
       maxTokens,
       temperature,
+      systemPrompt,
       controller,
     ));
 
@@ -301,7 +309,7 @@ class DartBridgeLLM {
   }
 
   /// Start streaming generation in a background isolate.
-  /// 
+  ///
   /// ARCHITECTURE NOTE:
   /// The logger callback now uses NativeCallable.listener which is thread-safe.
   /// This allows us to run the FFI streaming call in a background isolate
@@ -312,6 +320,7 @@ class DartBridgeLLM {
     String prompt,
     int maxTokens,
     double temperature,
+    String? systemPrompt,
     StreamController<String> controller,
   ) async {
     // Create a ReceivePort to receive tokens from the background isolate
@@ -346,6 +355,7 @@ class DartBridgeLLM {
           prompt: prompt,
           maxTokens: maxTokens,
           temperature: temperature,
+          systemPrompt: systemPrompt,
         ),
       );
     } catch (e) {
@@ -430,6 +440,7 @@ class _StreamingIsolateParams {
   final String prompt;
   final int maxTokens;
   final double temperature;
+  final String? systemPrompt;
 
   _StreamingIsolateParams({
     required this.sendPort,
@@ -437,6 +448,7 @@ class _StreamingIsolateParams {
     required this.prompt,
     required this.maxTokens,
     required this.temperature,
+    this.systemPrompt,
   });
 }
 
@@ -460,6 +472,7 @@ void _streamingIsolateEntry(_StreamingIsolateParams params) {
   final handle = Pointer<Void>.fromAddress(params.handleAddress);
   final promptPtr = params.prompt.toNativeUtf8();
   final optionsPtr = calloc<RacLlmOptionsStruct>();
+  Pointer<Utf8>? systemPromptPtr;
 
   try {
     // Set options
@@ -469,7 +482,14 @@ void _streamingIsolateEntry(_StreamingIsolateParams params) {
     optionsPtr.ref.stopSequences = nullptr;
     optionsPtr.ref.numStopSequences = 0;
     optionsPtr.ref.streamingEnabled = RAC_TRUE;
-    optionsPtr.ref.systemPrompt = nullptr;
+
+    // Set systemPrompt if provided
+    if (params.systemPrompt != null && params.systemPrompt!.isNotEmpty) {
+      systemPromptPtr = params.systemPrompt!.toNativeUtf8();
+      optionsPtr.ref.systemPrompt = systemPromptPtr!;
+    } else {
+      optionsPtr.ref.systemPrompt = nullptr;
+    }
 
     final lib = PlatformLoader.loadCommons();
 
@@ -532,6 +552,9 @@ void _streamingIsolateEntry(_StreamingIsolateParams params) {
   } finally {
     calloc.free(promptPtr);
     calloc.free(optionsPtr);
+    if (systemPromptPtr != null) {
+      calloc.free(systemPromptPtr!);
+    }
     _isolateSendPort = null;
   }
 }
@@ -579,11 +602,13 @@ _IsolateGenerationResult _generateInIsolate(
   String prompt,
   int maxTokens,
   double temperature,
+  String? systemPrompt,
 ) {
   final handle = Pointer<Void>.fromAddress(handleAddress);
   final promptPtr = prompt.toNativeUtf8();
   final optionsPtr = calloc<RacLlmOptionsStruct>();
   final resultPtr = calloc<RacLlmResultStruct>();
+  Pointer<Utf8>? systemPromptPtr;
 
   try {
     // Set options - matching C++ rac_llm_options_t
@@ -593,7 +618,14 @@ _IsolateGenerationResult _generateInIsolate(
     optionsPtr.ref.stopSequences = nullptr;
     optionsPtr.ref.numStopSequences = 0;
     optionsPtr.ref.streamingEnabled = RAC_FALSE;
-    optionsPtr.ref.systemPrompt = nullptr;
+
+    // Set systemPrompt if provided
+    if (systemPrompt != null && systemPrompt.isNotEmpty) {
+      systemPromptPtr = systemPrompt.toNativeUtf8();
+      optionsPtr.ref.systemPrompt = systemPromptPtr!;
+    } else {
+      optionsPtr.ref.systemPrompt = nullptr;
+    }
 
     final lib = PlatformLoader.loadCommons();
     final generateFn = lib.lookupFunction<
@@ -625,5 +657,8 @@ _IsolateGenerationResult _generateInIsolate(
     calloc.free(promptPtr);
     calloc.free(optionsPtr);
     calloc.free(resultPtr);
+    if (systemPromptPtr != null) {
+      calloc.free(systemPromptPtr!);
+    }
   }
 }
diff --git a/sdk/runanywhere-flutter/packages/runanywhere/lib/public/runanywhere.dart b/sdk/runanywhere-flutter/packages/runanywhere/lib/public/runanywhere.dart
index 7096e22ec..ab6d24745 100644
--- a/sdk/runanywhere-flutter/packages/runanywhere/lib/public/runanywhere.dart
+++ b/sdk/runanywhere-flutter/packages/runanywhere/lib/public/runanywhere.dart
@@ -1192,6 +1192,7 @@ class RunAnywhere {
         prompt,
         maxTokens: opts.maxTokens,
         temperature: opts.temperature,
+        systemPrompt: opts.systemPrompt,
       );
 
       final endTime = DateTime.now();
@@ -1292,6 +1293,7 @@ class RunAnywhere {
       prompt,
       maxTokens: opts.maxTokens,
       temperature: opts.temperature,
+      systemPrompt: opts.systemPrompt,
     );
 
     // Forward tokens and collect them, track subscription in bridge for cancellation

From fcb5139aec4a250125dd0fbbef3dd28fa14ce9c2 Mon Sep 17 00:00:00 2001
From: VyasGuru <71374747+VyasGuru@users.noreply.github.com>
Date: Thu, 12 Feb 2026 00:20:51 +0530
Subject: [PATCH 5/6] React native sdk+ example apps

---
 .../RunAnywhereAI/src/screens/ChatScreen.tsx  |  30 ++++-
 .../src/screens/SettingsScreen.tsx            | 109 ++++++++++++++++++
 .../RunAnywhereAI/src/screens/TTSScreen.tsx   |   3 +-
 .../RunAnywhereAI/src/types/settings.ts       |  10 ++
 .../core/cpp/HybridRunAnywhereCore.cpp        |   9 ++
 5 files changed, 155 insertions(+), 6 deletions(-)

diff --git a/examples/react-native/RunAnywhereAI/src/screens/ChatScreen.tsx b/examples/react-native/RunAnywhereAI/src/screens/ChatScreen.tsx
index 0af9c5bc9..33945e16e 100644
--- a/examples/react-native/RunAnywhereAI/src/screens/ChatScreen.tsx
+++ b/examples/react-native/RunAnywhereAI/src/screens/ChatScreen.tsx
@@ -31,6 +31,7 @@ import {
   Alert,
   Modal,
 } from 'react-native';
+import AsyncStorage from '@react-native-async-storage/async-storage';
 import Icon from 'react-native-vector-icons/Ionicons';
 import { Colors } from '../theme/colors';
 import { Typography } from '../theme/typography';
@@ -48,9 +49,10 @@ import {
   ModelSelectionSheet,
   ModelSelectionContext,
 } from '../components/model';
+import { GENERATION_SETTINGS_KEYS } from '../types/settings';
 
 // Import RunAnywhere SDK (Multi-Package Architecture)
-import { RunAnywhere, type ModelInfo as SDKModelInfo } from '@runanywhere/core';
+import { RunAnywhere, type ModelInfo as SDKModelInfo, type GenerationOptions } from '@runanywhere/core';
 
 // Generate unique ID
 const generateId = () => Math.random().toString(36).substring(2, 15);
@@ -119,6 +121,24 @@ export const ChatScreen: React.FC = () => {
   // Messages from current conversation
   const messages = currentConversation?.messages || [];
 
+  /**
+   * Get generation options from AsyncStorage
+   * Reads user-configured temperature, maxTokens, and systemPrompt
+   */
+  const getGenerationOptions = async (): Promise<GenerationOptions> => {
+    const tempStr = await AsyncStorage.getItem(GENERATION_SETTINGS_KEYS.TEMPERATURE);
+    const maxStr = await AsyncStorage.getItem(GENERATION_SETTINGS_KEYS.MAX_TOKENS);
+    const sysStr = await AsyncStorage.getItem(GENERATION_SETTINGS_KEYS.SYSTEM_PROMPT);
+
+    const temperature =tempStr !== null && !Number.isNaN(parseFloat(tempStr))? parseFloat(tempStr): 0.7;
+    const maxTokens = maxStr ? parseInt(maxStr, 10) : 1000;
+    const systemPrompt = sysStr && sysStr.trim() !== '' ? sysStr : undefined;
+
+    console.log(`[PARAMS] App getGenerationOptions: temperature=${temperature}, maxTokens=${maxTokens}, systemPrompt=${systemPrompt ? `set(${systemPrompt.length} chars)` : 'nil'}`);
+
+    return { temperature, maxTokens, systemPrompt };
+  };
+
   /**
    * Load available LLM models from catalog
    */
@@ -267,11 +287,11 @@ export const ChatScreen: React.FC = () => {
     try {
       console.log('[ChatScreen] Starting streaming generation for:', prompt);
 
+      // Get user-configured generation options from AsyncStorage
+      const options = await getGenerationOptions();
+
       // Use streaming generation (matches Swift SDK: RunAnywhere.generateStream)
-      const streamingResult = await RunAnywhere.generateStream(prompt, {
-        maxTokens: 1000,
-        temperature: 0.7,
-      });
+      const streamingResult = await RunAnywhere.generateStream(prompt, options);
 
       let fullResponse = '';
 
diff --git a/examples/react-native/RunAnywhereAI/src/screens/SettingsScreen.tsx b/examples/react-native/RunAnywhereAI/src/screens/SettingsScreen.tsx
index be8f6a851..6e4b38f46 100644
--- a/examples/react-native/RunAnywhereAI/src/screens/SettingsScreen.tsx
+++ b/examples/react-native/RunAnywhereAI/src/screens/SettingsScreen.tsx
@@ -43,6 +43,7 @@ import {
   RoutingPolicy,
   RoutingPolicyDisplayNames,
   SETTINGS_CONSTRAINTS,
+  GENERATION_SETTINGS_KEYS,
 } from '../types/settings';
 import { LLMFramework, FrameworkDisplayNames } from '../types/model';
 
@@ -121,6 +122,7 @@ export const SettingsScreen: React.FC = () => {
   );
   const [temperature, setTemperature] = useState(0.7);
   const [maxTokens, setMaxTokens] = useState(10000);
+  const [systemPrompt, setSystemPrompt] = useState('');
   const [apiKeyConfigured, setApiKeyConfigured] = useState(false);
 
   // API Configuration state
@@ -168,6 +170,7 @@ export const SettingsScreen: React.FC = () => {
   useEffect(() => {
     loadData();
     loadApiConfiguration();
+    loadGenerationSettings();
   }, []);
 
   /**
@@ -187,6 +190,52 @@ export const SettingsScreen: React.FC = () => {
     }
   };
 
+  /**
+   * Load generation settings from AsyncStorage
+   */
+  const loadGenerationSettings = async () => {
+    try {
+      const tempStr = await AsyncStorage.getItem(GENERATION_SETTINGS_KEYS.TEMPERATURE);
+      const maxStr = await AsyncStorage.getItem(GENERATION_SETTINGS_KEYS.MAX_TOKENS);
+      const sysStr = await AsyncStorage.getItem(GENERATION_SETTINGS_KEYS.SYSTEM_PROMPT);
+
+      const temperature = tempStr !== null ? parseFloat(tempStr) : 0.7;
+      setTemperature(temperature);
+      if (maxStr) setMaxTokens(parseInt(maxStr, 10));
+      if (sysStr) setSystemPrompt(sysStr);
+
+      console.log('[Settings] Loaded generation settings:', {
+      temperature,
+      maxTokens,
+      systemPrompt: systemPrompt ? 'set' : 'empty',
+    });
+    } catch (error) {
+      console.error('[Settings] Failed to load generation settings:', error);
+    }
+  };
+
+  /**
+   * Save generation settings to AsyncStorage
+   */
+  const saveGenerationSettings = async () => {
+    try {
+      await AsyncStorage.setItem(GENERATION_SETTINGS_KEYS.TEMPERATURE, temperature.toString());
+      await AsyncStorage.setItem(GENERATION_SETTINGS_KEYS.MAX_TOKENS, maxTokens.toString());
+      await AsyncStorage.setItem(GENERATION_SETTINGS_KEYS.SYSTEM_PROMPT, systemPrompt);
+
+      console.log('[Settings] Saved generation settings:', {
+        temperature,
+        maxTokens,
+        systemPrompt: systemPrompt ? `set(${systemPrompt.length} chars)` : 'empty',
+      });
+
+      Alert.alert('Saved', 'Generation settings have been saved successfully.');
+    } catch (error) {
+      console.error('[Settings] Failed to save generation settings:', error);
+      Alert.alert('Error', `Failed to save settings: ${error}`);
+    }
+  };
+
   /**
    * Normalize base URL by adding https:// if no scheme is present
    */
@@ -755,6 +804,30 @@ export const SettingsScreen: React.FC = () => {
             SETTINGS_CONSTRAINTS.maxTokens.step,
             (v) => v.toLocaleString()
           )}
+
+          {/* System Prompt Input */}
+          <View style={styles.systemPromptContainer}>
+            <Text style={styles.systemPromptLabel}>System Prompt</Text>
+            <TextInput
+              style={styles.systemPromptInput}
+              value={systemPrompt}
+              onChangeText={setSystemPrompt}
+              placeholder="Enter system prompt (optional)"
+              placeholderTextColor={Colors.textTertiary}
+              multiline
+              numberOfLines={3}
+              textAlignVertical="top"
+            />
+          </View>
+
+          {/* Save Settings Button */}
+          <TouchableOpacity
+            style={styles.saveSettingsButton}
+            onPress={saveGenerationSettings}
+          >
+            <Icon name="checkmark-circle-outline" size={20} color={Colors.textWhite} />
+            <Text style={styles.saveSettingsButtonText}>Save Settings</Text>
+          </TouchableOpacity>
         </View>
 
         {/* API Configuration (Testing) */}
@@ -1520,6 +1593,42 @@ const styles = StyleSheet.create({
   modalButtonTextDisabled: {
     color: Colors.textTertiary,
   },
+  // System Prompt styles
+  systemPromptContainer: {
+    padding: Padding.padding16,
+    borderBottomWidth: 1,
+    borderBottomColor: Colors.borderLight,
+  },
+  systemPromptLabel: {
+    ...Typography.subheadline,
+    color: Colors.textPrimary,
+    marginBottom: Spacing.small,
+  },
+  systemPromptInput: {
+    backgroundColor: Colors.backgroundSecondary,
+    borderRadius: BorderRadius.small,
+    padding: Padding.padding12,
+    ...Typography.body,
+    color: Colors.textPrimary,
+    borderWidth: 1,
+    borderColor: Colors.borderLight,
+    minHeight: 80,
+  },
+  saveSettingsButton: {
+    flexDirection: 'row',
+    alignItems: 'center',
+    justifyContent: 'center',
+    gap: Spacing.small,
+    backgroundColor: Colors.primaryBlue,
+    padding: Padding.padding16,
+    margin: Padding.padding16,
+    borderRadius: BorderRadius.small,
+  },
+  saveSettingsButtonText: {
+    ...Typography.body,
+    color: Colors.textWhite,
+    fontWeight: '600',
+  },
 });
 
 export default SettingsScreen;
diff --git a/examples/react-native/RunAnywhereAI/src/screens/TTSScreen.tsx b/examples/react-native/RunAnywhereAI/src/screens/TTSScreen.tsx
index 2b98d970b..5e18afc8d 100644
--- a/examples/react-native/RunAnywhereAI/src/screens/TTSScreen.tsx
+++ b/examples/react-native/RunAnywhereAI/src/screens/TTSScreen.tsx
@@ -1012,7 +1012,8 @@ export const TTSScreen: React.FC = () => {
         <View style={styles.settingsSection}>
           <Text style={styles.sectionLabel}>Voice Settings</Text>
           {renderSlider('Speed', speed, setSpeed)}
-          {renderSlider('Pitch', pitch, setPitch)}
+          {/* Pitch slider - Commented out for now as it is not implemented in the current TTS models */}
+          {/* {renderSlider('Pitch', pitch, setPitch)} */}
           {renderSlider(
             'Volume',
             volume,
diff --git a/examples/react-native/RunAnywhereAI/src/types/settings.ts b/examples/react-native/RunAnywhereAI/src/types/settings.ts
index ac95be54a..e51a13b4c 100644
--- a/examples/react-native/RunAnywhereAI/src/types/settings.ts
+++ b/examples/react-native/RunAnywhereAI/src/types/settings.ts
@@ -123,3 +123,13 @@ export const RoutingPolicyDescriptions: Record<RoutingPolicy, string> = {
   [RoutingPolicy.PreferCloud]:
     'Prefer cloud execution, fall back to device if offline.',
 };
+
+/**
+ * AsyncStorage keys for generation settings persistence
+ * Matches iOS/Android naming convention for cross-platform consistency
+ */
+export const GENERATION_SETTINGS_KEYS = {
+  TEMPERATURE: 'defaultTemperature',
+  MAX_TOKENS: 'defaultMaxTokens',
+  SYSTEM_PROMPT: 'defaultSystemPrompt',
+} as const;
diff --git a/sdk/runanywhere-react-native/packages/core/cpp/HybridRunAnywhereCore.cpp b/sdk/runanywhere-react-native/packages/core/cpp/HybridRunAnywhereCore.cpp
index dbb75ae09..27eaa8c7b 100644
--- a/sdk/runanywhere-react-native/packages/core/cpp/HybridRunAnywhereCore.cpp
+++ b/sdk/runanywhere-react-native/packages/core/cpp/HybridRunAnywhereCore.cpp
@@ -1411,15 +1411,18 @@ std::shared_ptr<Promise<std::string>> HybridRunAnywhereCore::generate(
         // Parse options
         int maxTokens = 256;
         float temperature = 0.7f;
+        std::string systemPrompt;
         if (optionsJson.has_value()) {
             maxTokens = extractIntValue(optionsJson.value(), "max_tokens", 256);
             temperature = static_cast<float>(extractDoubleValue(optionsJson.value(), "temperature", 0.7));
+            systemPrompt = extractStringValue(optionsJson.value(), "system_prompt", "");
         }
 
         rac_llm_options_t options = {};
         options.max_tokens = maxTokens;
         options.temperature = temperature;
         options.top_p = 0.9f;
+        options.system_prompt = systemPrompt.empty() ? nullptr : systemPrompt.c_str();
 
         rac_llm_result_t llmResult = {};
         rac_result_t result = rac_llm_component_generate(handle, prompt.c_str(), &options, &llmResult);
@@ -1509,10 +1512,13 @@ std::shared_ptr<Promise<std::string>> HybridRunAnywhereCore::generateStream(
         }
 
         // Parse options
+        std::string systemPrompt = extractStringValue(optionsJson, "system_prompt", "");
+
         rac_llm_options_t options = {};
         options.max_tokens = extractIntValue(optionsJson, "max_tokens", 256);
         options.temperature = static_cast<float>(extractDoubleValue(optionsJson, "temperature", 0.7));
         options.top_p = 0.9f;
+        options.system_prompt = systemPrompt.empty() ? nullptr : systemPrompt.c_str();
 
         // Create streaming context
         LLMStreamContext ctx;
@@ -1585,14 +1591,17 @@ std::shared_ptr<Promise<std::string>> HybridRunAnywhereCore::generateStructured(
         }
 
         // Generate with the prepared prompt
+        std::string systemPrompt;
         rac_llm_options_t options = {};
         if (optionsJson.has_value()) {
             options.max_tokens = extractIntValue(optionsJson.value(), "max_tokens", 512);
             options.temperature = static_cast<float>(extractDoubleValue(optionsJson.value(), "temperature", 0.7));
+            systemPrompt = extractStringValue(optionsJson.value(), "system_prompt", "");
         } else {
             options.max_tokens = 512;
             options.temperature = 0.7f;
         }
+        options.system_prompt = systemPrompt.empty() ? nullptr : systemPrompt.c_str();
 
         rac_llm_result_t llmResult = {};
         rac_result_t result = rac_llm_component_generate(handle, preparedPrompt, &options, &llmResult);

From fba5ccc1ec668ed95165ce7c6c6ebdec97673eec Mon Sep 17 00:00:00 2001
From: VyasGuru <71374747+VyasGuru@users.noreply.github.com>
Date: Sat, 14 Feb 2026 04:28:31 +0530
Subject: [PATCH 6/6] Fixed typos and Logging

---
 Package.swift                                 |  2 +-
 .../presentation/chat/ChatViewModel.kt        |  3 ++-
 .../settings/combined_settings_view.dart      | 14 +++++++----
 .../Chat/ViewModels/LLMViewModel.swift        | 23 ++++++++++++-------
 .../Features/Voice/TTSViewModel.swift         |  2 +-
 .../Features/Voice/TextToSpeechView.swift     |  2 +-
 6 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/Package.swift b/Package.swift
index cb3eb08e9..274b56fa0 100644
--- a/Package.swift
+++ b/Package.swift
@@ -40,7 +40,7 @@ let onnxRuntimeMacOSPath = "\(packageDir)/sdk/runanywhere-swift/Binaries/onnxrun
 //   ./scripts/build-swift.sh --set-remote  (sets useLocalBinaries = false)
 //
 // =============================================================================
-let useLocalBinaries = true  //  Toggle: true for local dev, false for release
+let useLocalBinaries = false //  Toggle: true for local dev, false for release
 
 // Version for remote XCFrameworks (used when testLocal = false)
 // Updated automatically by CI/CD during releases
diff --git a/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/chat/ChatViewModel.kt b/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/chat/ChatViewModel.kt
index f0ad86875..52edb75a7 100644
--- a/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/chat/ChatViewModel.kt
+++ b/examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/chat/ChatViewModel.kt
@@ -858,8 +858,9 @@ class ChatViewModel(application: Application) : AndroidViewModel(application) {
         val maxTokens = generationPrefs.getInt("defaultMaxTokens", 1000)
         val systemPromptValue = generationPrefs.getString("defaultSystemPrompt", "")
         val systemPrompt = if (systemPromptValue.isNullOrEmpty()) null else systemPromptValue
+        val systemPromptInfo = systemPrompt?.let { "set(${it.length} chars)" } ?: "nil"
 
-        Log.i(TAG, "[PARAMS] App getGenerationOptions: temperature=$temperature, maxTokens=$maxTokens, systemPrompt=${systemPrompt ?: "nil"}")
+        Log.i(TAG, "[PARAMS] App getGenerationOptions: temperature=$temperature, maxTokens=$maxTokens, systemPrompt=$systemPromptInfo")
 
         return com.runanywhere.sdk.public.extensions.LLM.LLMGenerationOptions(
             maxTokens = maxTokens,
diff --git a/examples/flutter/RunAnywhereAI/lib/features/settings/combined_settings_view.dart b/examples/flutter/RunAnywhereAI/lib/features/settings/combined_settings_view.dart
index 2b7a44891..9608002f0 100644
--- a/examples/flutter/RunAnywhereAI/lib/features/settings/combined_settings_view.dart
+++ b/examples/flutter/RunAnywhereAI/lib/features/settings/combined_settings_view.dart
@@ -44,6 +44,7 @@ class _CombinedSettingsViewState extends State<CombinedSettingsView> {
   double _temperature = 0.7;
   int _maxTokens = 1000;
   String _systemPrompt = '';
+  late final TextEditingController _systemPromptController;
 
   // Loading state
   bool _isRefreshingStorage = false;
@@ -51,12 +52,19 @@ class _CombinedSettingsViewState extends State<CombinedSettingsView> {
   @override
   void initState() {
     super.initState();
+    _systemPromptController = TextEditingController();
     unawaited(_loadSettings());
     unawaited(_loadGenerationSettings());
     unawaited(_loadApiConfiguration());
     unawaited(_loadStorageData());
   }
 
+  @override
+  void dispose() {
+    _systemPromptController.dispose();
+    super.dispose();
+  }
+
   Future<void> _loadSettings() async {
     // Load from keychain
     _analyticsLogToLocal =
@@ -74,6 +82,7 @@ class _CombinedSettingsViewState extends State<CombinedSettingsView> {
         _temperature = prefs.getDouble(PreferenceKeys.defaultTemperature) ?? 0.7;
         _maxTokens = prefs.getInt(PreferenceKeys.defaultMaxTokens) ?? 1000;
         _systemPrompt = prefs.getString(PreferenceKeys.defaultSystemPrompt) ?? '';
+        _systemPromptController.text = _systemPrompt;
       });
     }
   }
@@ -579,10 +588,7 @@ class _CombinedSettingsViewState extends State<CombinedSettingsView> {
             Text('System Prompt', style: AppTypography.subheadline(context)),
             const SizedBox(height: AppSpacing.xSmall),
             TextField(
-              controller: TextEditingController(text: _systemPrompt)
-                ..selection = TextSelection.fromPosition(
-                  TextPosition(offset: _systemPrompt.length),
-                ),
+              controller: _systemPromptController,
               maxLines: 3,
               decoration: const InputDecoration(
                 hintText: 'Enter a system prompt...',
diff --git a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel.swift b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel.swift
index 213e45bda..f68677cc0 100644
--- a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel.swift
+++ b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel.swift
@@ -333,14 +333,21 @@ final class LLMViewModel {
 
         let effectiveSystemPrompt = (savedSystemPrompt?.isEmpty == false) ? savedSystemPrompt : nil
 
-        logger.info("[PARAMS] App getGenerationOptions: temperature=\(effectiveSettings.temperature), maxTokens=\(effectiveSettings.maxTokens), systemPrompt=\(effectiveSystemPrompt ?? "nil")")
-
-        return LLMGenerationOptions(
-            maxTokens: effectiveSettings.maxTokens,
-            temperature: Float(effectiveSettings.temperature),
-            systemPrompt: effectiveSystemPrompt
-        )
-    }
+    let systemPromptInfo: String = {
+        guard let prompt = effectiveSystemPrompt else { return "nil" }
+        return "set(\(prompt.count) chars)"
+    }()
+
+    logger.info(
+        "[PARAMS] App getGenerationOptions: temperature=\(effectiveSettings.temperature), maxTokens=\(effectiveSettings.maxTokens), systemPrompt=\(systemPromptInfo)"
+    )
+
+    return LLMGenerationOptions(
+        maxTokens: effectiveSettings.maxTokens,
+        temperature: Float(effectiveSettings.temperature),
+        systemPrompt: effectiveSystemPrompt
+    )
+}
 
     // MARK: - Internal Methods - Helpers
 
diff --git a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Voice/TTSViewModel.swift b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Voice/TTSViewModel.swift
index 22368b501..16fe78dba 100644
--- a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Voice/TTSViewModel.swift
+++ b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Voice/TTSViewModel.swift
@@ -26,7 +26,7 @@ class TTSViewModel: ObservableObject {
 
     // Voice Settings
     @Published var speechRate: Double = 1.0
-    @Published var pitch: Double = 1.0 // while removed from the UI, the backend still supports pitch, so maintinaining it here.
+    @Published var pitch: Double = 1.0 // while removed from the UI, the backend still supports pitch, so maintaining it here.
 
     // MARK: - Private Properties
 
diff --git a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Voice/TextToSpeechView.swift b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Voice/TextToSpeechView.swift
index 4b7222ad0..d47e5f30b 100644
--- a/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Voice/TextToSpeechView.swift
+++ b/examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Voice/TextToSpeechView.swift
@@ -237,7 +237,7 @@ struct TextToSpeechView: View {
             // TODO: Find a model for TTS that supports pitch, or manually implement a good quality pitch adjustment
 
             // Pitch (not implemented in the current TTS models. Once supported, we can have this back.)
-            // VStack(alignment: .leading, spacing: 10) {claud
+            // VStack(alignment: .leading, spacing: 10) {
             //     HStack {
             //         Text("Pitch")
             //             .font(.subheadline)