diff --git a/method_comparison/MetaMathQA/results/delora--llama-3.2-3B-rank32.json b/method_comparison/MetaMathQA/results/delora--llama-3.2-3B-rank32.json
new file mode 100644
index 0000000000..317ae09c61
--- /dev/null
+++ b/method_comparison/MetaMathQA/results/delora--llama-3.2-3B-rank32.json
@@ -0,0 +1,354 @@
+{
+  "run_info": {
+    "created_at": "2025-10-23T16:18:17+00:00",
+    "total_time": 2331.184612270001,
+    "experiment_name": "delora/llama-3.2-3B-rank32",
+    "peft_branch": "main",
+    "train_config": {
+      "model_id": "meta-llama/Llama-3.2-3B",
+      "dtype": "bfloat16",
+      "max_seq_length": 768,
+      "batch_size": 4,
+      "batch_size_eval": 50,
+      "max_steps": 5000,
+      "eval_steps": 250,
+      "compile": false,
+      "query_template": "Question: {query} Think step by step.\nAnswer:",
+      "seed": 0,
+      "grad_norm_clip": 1.0,
+      "optimizer_type": "AdamW",
+      "optimizer_kwargs": {
+        "lr": 0.001
+      },
+      "lr_scheduler": "cosine",
+      "use_amp": false,
+      "autocast_adapter_dtype": true,
+      "generation_kwargs": {
+        "max_length": 800,
+        "max_new_tokens": 300
+      },
+      "attn_implementation": null
+    },
+    "peft_config": {
+      "task_type": "CAUSAL_LM",
+      "peft_type": "DELORA",
+      "auto_mapping": null,
+      "peft_version": "0.17.2.dev0@UNKNOWN",
+      "base_model_name_or_path": "meta-llama/Llama-3.2-3B",
+      "revision": null,
+      "inference_mode": false,
+      "r": 32,
+      "delora_lambda": 15,
+      "module_dropout": 0.0,
+      "target_modules": [
+        "q_proj",
+        "v_proj"
+      ],
+      "exclude_modules": null,
+      "bias": "none",
+      "init_weights": true,
+      "layers_to_transform": null,
+      "layers_pattern": null,
+      "rank_pattern": {},
+      "lambda_pattern": {},
+      "modules_to_save": null
+    },
+    "error_msg": ""
+  },
+  "train_info": {
+    "accelerator_memory_reserved_avg": 11956236845,
+    "accelerator_memory_max": 22361931776,
+    "accelerator_memory_reserved_99th": 17769252782,
+    "train_time": 2063.197599866002,
+    "file_size": 37417520,
+    "num_trainable_params": 9175096,
+    "num_total_params": 3221924920,
+    "status": "success",
+    "metrics": [
+      {
+        "step": 250,
+        "valid accuracy": 0.32,
+        "train loss": 0.7512386105060578,
+        "train samples": 1000,
+        "train time": 37.84413140498509,
+        "eval time": 13.205585324998538,
+        "tokens / sec": 5594.500181132732,
+        "mem allocated avg": 6926794532.864,
+        "mem reserved avg": 12007369605.12,
+        "elapsed time": 112.85904153599768
+      },
+      {
+        "step": 500,
+        "valid accuracy": 0.38,
+        "train loss": 0.7050024774074555,
+        "train samples": 2000,
+        "train time": 37.53846677497859,
+        "eval time": 13.265299970000342,
+        "tokens / sec": 5540.849636902056,
+        "mem allocated avg": 6919349673.984,
+        "mem reserved avg": 11903770296.32,
+        "elapsed time": 212.84601919299894
+      },
+      {
+        "step": 750,
+        "valid accuracy": 0.32,
+        "train loss": 0.6706294032335282,
+        "train samples": 3000,
+        "train time": 37.80458352702772,
+        "eval time": 13.272025713999028,
+        "tokens / sec": 5671.29644072703,
+        "mem allocated avg": 6929633923.072,
+        "mem reserved avg": 12056694620.16,
+        "elapsed time": 313.49587832399993
+      },
+      {
+        "step": 1000,
+        "valid accuracy": 0.4,
+        "train loss": 0.6481547034978866,
+        "train samples": 4000,
+        "train time": 37.52610543700939,
+        "eval time": 13.21725967599923,
+        "tokens / sec": 5551.761835496328,
+        "mem allocated avg": 6919568891.904,
+        "mem reserved avg": 11917057851.392,
+        "elapsed time": 413.16383353999845
+      },
+      {
+        "step": 1250,
+        "valid accuracy": 0.38,
+        "train loss": 0.6453099972009659,
+        "train samples": 5000,
+        "train time": 37.5804522819999,
+        "eval time": 9.624667924999812,
+        "tokens / sec": 5549.108308626837,
+        "mem allocated avg": 6921147688.96,
+        "mem reserved avg": 11914943922.176,
+        "elapsed time": 509.47617638500014
+      },
+      {
+        "step": 1500,
+        "valid accuracy": 0.46,
+        "train loss": 0.6384247626066208,
+        "train samples": 6000,
+        "train time": 37.65730221097692,
+        "eval time": 9.775350372998219,
+        "tokens / sec": 5558.842182246954,
+        "mem allocated avg": 6921056847.872,
+        "mem reserved avg": 11953078534.144,
+        "elapsed time": 606.1567662300004
+      },
+      {
+        "step": 1750,
+        "valid accuracy": 0.48,
+        "train loss": 0.6297660274505615,
+        "train samples": 7000,
+        "train time": 37.82186047102368,
+        "eval time": 7.911249515000236,
+        "tokens / sec": 5535.290897717534,
+        "mem allocated avg": 6923910838.272,
+        "mem reserved avg": 11956249427.968,
+        "elapsed time": 701.1174360119985
+      },
+      {
+        "step": 2000,
+        "valid accuracy": 0.5,
+        "train loss": 0.6332990030050277,
+        "train samples": 8000,
+        "train time": 37.523248280005646,
+        "eval time": 8.530133835996821,
+        "tokens / sec": 5535.128474223041,
+        "mem allocated avg": 6920641826.816,
+        "mem reserved avg": 11907327066.112,
+        "elapsed time": 796.1569609649996
+      },
+      {
+        "step": 2250,
+        "valid accuracy": 0.4,
+        "train loss": 0.6243826431035996,
+        "train samples": 9000,
+        "train time": 38.08898475294336,
+        "eval time": 13.285918199999287,
+        "tokens / sec": 5643.311350885762,
+        "mem allocated avg": 6931386861.568,
+        "mem reserved avg": 12094938284.032,
+        "elapsed time": 897.2201951069983
+      },
+      {
+        "step": 2500,
+        "valid accuracy": 0.5,
+        "train loss": 0.6215927278995514,
+        "train samples": 10000,
+        "train time": 37.63880846399843,
+        "eval time": 13.24860273900049,
+        "tokens / sec": 5472.1976705773695,
+        "mem allocated avg": 6917278386.176,
+        "mem reserved avg": 11845175869.44,
+        "elapsed time": 998.0728250969987
+      },
+      {
+        "step": 2750,
+        "valid accuracy": 0.42,
+        "train loss": 0.6130854382514953,
+        "train samples": 11000,
+        "train time": 37.79084398697523,
+        "eval time": 13.198808683002426,
+        "tokens / sec": 5606.675523653974,
+        "mem allocated avg": 6926927112.192,
+        "mem reserved avg": 12020548108.288,
+        "elapsed time": 1098.4325272319984
+      },
+      {
+        "step": 3000,
+        "valid accuracy": 0.46,
+        "train loss": 0.604831589102745,
+        "train samples": 12000,
+        "train time": 37.568779274977715,
+        "eval time": 10.355002560001594,
+        "tokens / sec": 5555.969718159649,
+        "mem allocated avg": 6922721505.28,
+        "mem reserved avg": 11937609940.992,
+        "elapsed time": 1195.2514979959997
+      },
+      {
+        "step": 3250,
+        "valid accuracy": 0.4,
+        "train loss": 0.6124310380220414,
+        "train samples": 13000,
+        "train time": 37.70235535401662,
+        "eval time": 10.490295633000642,
+        "tokens / sec": 5593.841499282662,
+        "mem allocated avg": 6924630044.672,
+        "mem reserved avg": 11975081852.928,
+        "elapsed time": 1292.7081366849998
+      },
+      {
+        "step": 3500,
+        "valid accuracy": 0.54,
+        "train loss": 0.5956783784627915,
+        "train samples": 14000,
+        "train time": 37.79015436899135,
+        "eval time": 7.505472221000673,
+        "tokens / sec": 5550.387488549399,
+        "mem allocated avg": 6923355121.664,
+        "mem reserved avg": 11948884230.144,
+        "elapsed time": 1387.1216009819982
+      },
+      {
+        "step": 3750,
+        "valid accuracy": 0.48,
+        "train loss": 0.5921734108924865,
+        "train samples": 15000,
+        "train time": 37.99711803697937,
+        "eval time": 8.399906407001254,
+        "tokens / sec": 5703.143059142048,
+        "mem allocated avg": 6933243086.848,
+        "mem reserved avg": 12128694042.624,
+        "elapsed time": 1483.2807508709993
+      },
+      {
+        "step": 4000,
+        "valid accuracy": 0.52,
+        "train loss": 0.6020598074197769,
+        "train samples": 16000,
+        "train time": 37.42554273099813,
+        "eval time": 13.19645261199912,
+        "tokens / sec": 5460.78921203528,
+        "mem allocated avg": 6915014187.008,
+        "mem reserved avg": 11819355734.016,
+        "elapsed time": 1582.7408143280009
+      },
+      {
+        "step": 4250,
+        "valid accuracy": 0.5,
+        "train loss": 0.58726664686203,
+        "train samples": 17000,
+        "train time": 37.58307892599987,
+        "eval time": 9.69436509300067,
+        "tokens / sec": 5624.579093592081,
+        "mem allocated avg": 6926118213.632,
+        "mem reserved avg": 11987807371.264,
+        "elapsed time": 1679.2568312559997
+      },
+      {
+        "step": 4500,
+        "valid accuracy": 0.52,
+        "train loss": 0.5931945472955704,
+        "train samples": 18000,
+        "train time": 37.45943218199682,
+        "eval time": 7.795902468998975,
+        "tokens / sec": 5547.815006653474,
+        "mem allocated avg": 6920348925.952,
+        "mem reserved avg": 11897596280.832,
+        "elapsed time": 1773.5582212900008
+      },
+      {
+        "step": 4750,
+        "valid accuracy": 0.5,
+        "train loss": 0.5837668641805649,
+        "train samples": 19000,
+        "train time": 37.71794232197135,
+        "eval time": 10.624573600001895,
+        "tokens / sec": 5566.024737190049,
+        "mem allocated avg": 6922591481.856,
+        "mem reserved avg": 11951140765.696,
+        "elapsed time": 1871.3457676430007
+      },
+      {
+        "step": 5000,
+        "valid accuracy": 0.52,
+        "train loss": 0.5912798082828522,
+        "train samples": 20000,
+        "train time": 37.50696286400489,
+        "eval time": 9.267422332999558,
+        "tokens / sec": 5553.1022534454405,
+        "mem allocated avg": 6919856828.416,
+        "mem reserved avg": 11901413097.472,
+        "elapsed time": 1967.2812061679979
+      },
+      {
+        "step": 5000,
+        "test accuracy": 0.5056861258529188,
+        "train loss": 0.5912798082828522,
+        "train samples": 20000,
+        "train total tokens": 4198051
+      }
+    ]
+  },
+  "meta_info": {
+    "model_info": {
+      "sha": "13afe5124825b4f3751f836b40dafda64c1ed062",
+      "created_at": "2024-09-18T15:23:48+00:00"
+    },
+    "dataset_info": {
+      "metamath": {
+        "sha": "aa4f34d3d2d3231299b5b03d9b3e5a20da45aa18",
+        "created_at": "2023-09-21T17:22:46+00:00"
+      },
+      "gsm8k": {
+        "sha": "e53f048856ff4f594e959d75785d2c2d37b678ee",
+        "created_at": "2022-04-12T10:22:10+00:00"
+      }
+    },
+    "package_info": {
+      "transformers-version": "4.57.1",
+      "transformers-commit-hash": null,
+      "peft-version": "0.17.2.dev0",
+      "peft-commit-hash": "a18ba67f242ab2eb74cdabab76ea2fd836b5cd83",
+      "datasets-version": "4.2.0",
+      "datasets-commit-hash": null,
+      "bitsandbytes-version": "0.46.0",
+      "bitsandbytes-commit-hash": null,
+      "torch-version": "2.9.0+cu128",
+      "torch-commit-hash": null
+    },
+    "system_info": {
+      "system": "Linux",
+      "release": "6.14.0-1014-aws",
+      "version": "#14~24.04.1-Ubuntu SMP Tue Sep 23 14:51:14 UTC 2025",
+      "machine": "x86_64",
+      "processor": "x86_64",
+      "accelerator": "NVIDIA L40S"
+    },
+    "pytorch_info": "PyTorch built with:\n  - GCC 13.3\n  - C++ Version: 201703\n  - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n  - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n  - OpenMP 201511 (a.k.a. OpenMP 4.5)\n  - LAPACK is enabled (usually provided by MKL)\n  - NNPACK is enabled\n  - CPU capability usage: AVX2\n  - CUDA Runtime 12.8\n  - NVCC architecture flags: -gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90;-gencode;arch=compute_100,code=sm_100;-gencode;arch=compute_120,code=sm_120\n  - CuDNN 90.7.1\n    - Built with CuDNN 90.8\n  - Magma 2.6.1\n  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=0fabc3ba44823f257e70ce397d989c8de5e362c1, CUDA_VERSION=12.8, CUDNN_VERSION=9.8.0, CXX_COMPILER=/opt/rh/gcc-toolset-13/root/usr/bin/c++, CXX_FLAGS= -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -DC10_NODEPRECATED -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-dangling-reference -Wno-error=dangling-reference -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.9.0, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, USE_XCCL=OFF, USE_XPU=OFF, \n"
+  }
+}
\ No newline at end of file
diff --git a/method_comparison/MetaMathQA/results/lora--llama-3.2-3B-rank10-target-mlp.json b/method_comparison/MetaMathQA/results/lora--llama-3.2-3B-rank10-target-mlp.json
new file mode 100644
index 0000000000..7fdd1804b7
--- /dev/null
+++ b/method_comparison/MetaMathQA/results/lora--llama-3.2-3B-rank10-target-mlp.json
@@ -0,0 +1,373 @@
+{
+  "run_info": {
+    "created_at": "2025-10-23T16:57:13+00:00",
+    "total_time": 2248.6777099889987,
+    "experiment_name": "lora/llama-3.2-3B-rank10-target-mlp",
+    "peft_branch": "main",
+    "train_config": {
+      "model_id": "meta-llama/Llama-3.2-3B",
+      "dtype": "bfloat16",
+      "max_seq_length": 768,
+      "batch_size": 4,
+      "batch_size_eval": 50,
+      "max_steps": 5000,
+      "eval_steps": 250,
+      "compile": false,
+      "query_template": "Question: {query} Think step by step.\nAnswer:",
+      "seed": 0,
+      "grad_norm_clip": 1.0,
+      "optimizer_type": "AdamW",
+      "optimizer_kwargs": {
+        "lr": 0.0001,
+        "weight_decay": 0.1
+      },
+      "lr_scheduler": "cosine",
+      "use_amp": false,
+      "autocast_adapter_dtype": true,
+      "generation_kwargs": {
+        "max_length": 800,
+        "max_new_tokens": 300
+      },
+      "attn_implementation": null
+    },
+    "peft_config": {
+      "task_type": "CAUSAL_LM",
+      "peft_type": "LORA",
+      "auto_mapping": null,
+      "peft_version": "0.17.2.dev0@UNKNOWN",
+      "base_model_name_or_path": "meta-llama/Llama-3.2-3B",
+      "revision": null,
+      "inference_mode": false,
+      "r": 10,
+      "target_modules": [
+        "down_proj",
+        "up_proj",
+        "gate_proj"
+      ],
+      "exclude_modules": null,
+      "lora_alpha": 20,
+      "lora_dropout": 0.0,
+      "fan_in_fan_out": false,
+      "bias": "none",
+      "use_rslora": false,
+      "modules_to_save": null,
+      "init_lora_weights": true,
+      "layers_to_transform": null,
+      "layers_pattern": null,
+      "rank_pattern": {},
+      "alpha_pattern": {},
+      "megatron_config": null,
+      "megatron_core": "megatron.core",
+      "trainable_token_indices": null,
+      "loftq_config": {},
+      "eva_config": null,
+      "corda_config": null,
+      "use_dora": false,
+      "alora_invocation_tokens": null,
+      "use_qalora": false,
+      "qalora_group_size": 16,
+      "layer_replication": null,
+      "lora_bias": false,
+      "target_parameters": null,
+      "arrow_config": null,
+      "ensure_weight_tying": false
+    },
+    "error_msg": ""
+  },
+  "train_info": {
+    "accelerator_memory_reserved_avg": 12694032954,
+    "accelerator_memory_max": 24712839168,
+    "accelerator_memory_reserved_99th": 19381941698,
+    "train_time": 2051.9836875680085,
+    "file_size": 37868984,
+    "num_trainable_params": 9461760,
+    "num_total_params": 3222211584,
+    "status": "success",
+    "metrics": [
+      {
+        "step": 250,
+        "valid accuracy": 0.3,
+        "train loss": 0.9570077481269836,
+        "train samples": 1000,
+        "train time": 36.60406935900755,
+        "eval time": 11.010158622000745,
+        "tokens / sec": 5784.029035774408,
+        "mem allocated avg": 6935100323.84,
+        "mem reserved avg": 12750096957.44,
+        "elapsed time": 110.24424074899798
+      },
+      {
+        "step": 500,
+        "valid accuracy": 0.34,
+        "train loss": 0.6960296107530594,
+        "train samples": 2000,
+        "train time": 36.118105334990105,
+        "eval time": 12.692174800999055,
+        "tokens / sec": 5758.74614880479,
+        "mem allocated avg": 6925969729.536,
+        "mem reserved avg": 12626524372.992,
+        "elapsed time": 209.50398311299796
+      },
+      {
+        "step": 750,
+        "valid accuracy": 0.34,
+        "train loss": 0.6628359504938126,
+        "train samples": 3000,
+        "train time": 36.51641339201524,
+        "eval time": 7.421836968998832,
+        "tokens / sec": 5871.359755360898,
+        "mem allocated avg": 6938496946.176,
+        "mem reserved avg": 12801720451.072,
+        "elapsed time": 304.07620789499924
+      },
+      {
+        "step": 1000,
+        "valid accuracy": 0.44,
+        "train loss": 0.6412760821580887,
+        "train samples": 4000,
+        "train time": 36.199786640008824,
+        "eval time": 12.675621071000933,
+        "tokens / sec": 5755.172042084423,
+        "mem allocated avg": 6928097030.144,
+        "mem reserved avg": 12653737017.344,
+        "elapsed time": 402.97702367999955
+      },
+      {
+        "step": 1250,
+        "valid accuracy": 0.36,
+        "train loss": 0.6370910699367524,
+        "train samples": 5000,
+        "train time": 36.12392316597834,
+        "eval time": 7.926442895997752,
+        "tokens / sec": 5772.850281012721,
+        "mem allocated avg": 6928575430.656,
+        "mem reserved avg": 12650138304.512,
+        "elapsed time": 497.5677833490008
+      },
+      {
+        "step": 1500,
+        "valid accuracy": 0.54,
+        "train loss": 0.6281076629161835,
+        "train samples": 6000,
+        "train time": 36.30386043796898,
+        "eval time": 7.837062710997998,
+        "tokens / sec": 5766.081002808941,
+        "mem allocated avg": 6928918276.096,
+        "mem reserved avg": 12675237019.648,
+        "elapsed time": 592.1144442160003
+      },
+      {
+        "step": 1750,
+        "valid accuracy": 0.36,
+        "train loss": 0.6193128414154053,
+        "train samples": 7000,
+        "train time": 36.462194165000255,
+        "eval time": 12.642417379000108,
+        "tokens / sec": 5741.700542008469,
+        "mem allocated avg": 6929703954.432,
+        "mem reserved avg": 12699194884.096,
+        "elapsed time": 691.625672238999
+      },
+      {
+        "step": 2000,
+        "valid accuracy": 0.4,
+        "train loss": 0.6216564847230911,
+        "train samples": 8000,
+        "train time": 36.21432654597447,
+        "eval time": 7.853862869000295,
+        "tokens / sec": 5735.1887998338925,
+        "mem allocated avg": 6928315158.528,
+        "mem reserved avg": 12636641034.24,
+        "elapsed time": 785.9735525059987
+      },
+      {
+        "step": 2250,
+        "valid accuracy": 0.48,
+        "train loss": 0.6131566362380981,
+        "train samples": 9000,
+        "train time": 36.94326955001088,
+        "eval time": 7.9556675359999645,
+        "tokens / sec": 5818.3263857851125,
+        "mem allocated avg": 6937815918.592,
+        "mem reserved avg": 12847388033.024,
+        "elapsed time": 881.850712686999
+      },
+      {
+        "step": 2500,
+        "valid accuracy": 0.48,
+        "train loss": 0.6090103325843811,
+        "train samples": 10000,
+        "train time": 35.94672909302972,
+        "eval time": 7.34414544700121,
+        "tokens / sec": 5729.784188902412,
+        "mem allocated avg": 6922815086.592,
+        "mem reserved avg": 12567728619.52,
+        "elapsed time": 975.391175255998
+      },
+      {
+        "step": 2750,
+        "valid accuracy": 0.54,
+        "train loss": 0.5998001435995102,
+        "train samples": 11000,
+        "train time": 36.57762499699311,
+        "eval time": 8.164194213000883,
+        "tokens / sec": 5792.6396264770565,
+        "mem allocated avg": 6934571272.192,
+        "mem reserved avg": 12769894072.32,
+        "elapsed time": 1070.6943081120007
+      },
+      {
+        "step": 3000,
+        "valid accuracy": 0.38,
+        "train loss": 0.5910915687084198,
+        "train samples": 12000,
+        "train time": 36.188985478995164,
+        "eval time": 12.692775247996906,
+        "tokens / sec": 5767.80468524468,
+        "mem allocated avg": 6928919042.048,
+        "mem reserved avg": 12673836122.112,
+        "elapsed time": 1169.909223751998
+      },
+      {
+        "step": 3250,
+        "valid accuracy": 0.48,
+        "train loss": 0.5988883073329926,
+        "train samples": 13000,
+        "train time": 36.27180437299103,
+        "eval time": 8.65234920200237,
+        "tokens / sec": 5814.461222586507,
+        "mem allocated avg": 6930257350.656,
+        "mem reserved avg": 12713606512.64,
+        "elapsed time": 1265.7605457559985
+      },
+      {
+        "step": 3500,
+        "valid accuracy": 0.56,
+        "train loss": 0.5822008575201034,
+        "train samples": 14000,
+        "train time": 36.417341429965745,
+        "eval time": 12.674946688999626,
+        "tokens / sec": 5759.618680659888,
+        "mem allocated avg": 6928901287.936,
+        "mem reserved avg": 12692374945.792,
+        "elapsed time": 1365.5030611420007
+      },
+      {
+        "step": 3750,
+        "valid accuracy": 0.6,
+        "train loss": 0.5811240552663803,
+        "train samples": 15000,
+        "train time": 36.86457888804216,
+        "eval time": 12.64913076099765,
+        "tokens / sec": 5878.352785695116,
+        "mem allocated avg": 6940823429.12,
+        "mem reserved avg": 12892720070.656,
+        "elapsed time": 1465.9017574809986
+      },
+      {
+        "step": 4000,
+        "valid accuracy": 0.52,
+        "train loss": 0.5901038019657135,
+        "train samples": 16000,
+        "train time": 36.045212401033496,
+        "eval time": 12.659105679998902,
+        "tokens / sec": 5669.906941487191,
+        "mem allocated avg": 6920815292.416,
+        "mem reserved avg": 12551395999.744,
+        "elapsed time": 1564.9818188249992
+      },
+      {
+        "step": 4250,
+        "valid accuracy": 0.48,
+        "train loss": 0.5774346487522125,
+        "train samples": 17000,
+        "train time": 36.271750094994786,
+        "eval time": 7.057205803001125,
+        "tokens / sec": 5827.923920030812,
+        "mem allocated avg": 6933466675.2,
+        "mem reserved avg": 12724444594.176,
+        "elapsed time": 1659.218996085001
+      },
+      {
+        "step": 4500,
+        "valid accuracy": 0.52,
+        "train loss": 0.5839375752210617,
+        "train samples": 18000,
+        "train time": 36.09228529396205,
+        "eval time": 7.738268649998645,
+        "tokens / sec": 5757.961800073832,
+        "mem allocated avg": 6927241906.176,
+        "mem reserved avg": 12634778763.264,
+        "elapsed time": 1753.44311027
+      },
+      {
+        "step": 4750,
+        "valid accuracy": 0.5,
+        "train loss": 0.5752255419492721,
+        "train samples": 19000,
+        "train time": 36.37206586197499,
+        "eval time": 7.730858285998693,
+        "tokens / sec": 5771.984489324259,
+        "mem allocated avg": 6928856346.624,
+        "mem reserved avg": 12691150209.024,
+        "elapsed time": 1848.2997361499984
+      },
+      {
+        "step": 5000,
+        "valid accuracy": 0.52,
+        "train loss": 0.5811339800357819,
+        "train samples": 20000,
+        "train time": 36.14202177399784,
+        "eval time": 8.250298782000755,
+        "tokens / sec": 5762.820942956926,
+        "mem allocated avg": 6926857431.04,
+        "mem reserved avg": 12628051099.648,
+        "elapsed time": 1943.1900490109983
+      },
+      {
+        "step": 5000,
+        "test accuracy": 0.5261561789234268,
+        "train loss": 0.5811339800357819,
+        "train samples": 20000,
+        "train total tokens": 4198051
+      }
+    ]
+  },
+  "meta_info": {
+    "model_info": {
+      "sha": "13afe5124825b4f3751f836b40dafda64c1ed062",
+      "created_at": "2024-09-18T15:23:48+00:00"
+    },
+    "dataset_info": {
+      "metamath": {
+        "sha": "aa4f34d3d2d3231299b5b03d9b3e5a20da45aa18",
+        "created_at": "2023-09-21T17:22:46+00:00"
+      },
+      "gsm8k": {
+        "sha": "e53f048856ff4f594e959d75785d2c2d37b678ee",
+        "created_at": "2022-04-12T10:22:10+00:00"
+      }
+    },
+    "package_info": {
+      "transformers-version": "4.57.1",
+      "transformers-commit-hash": null,
+      "peft-version": "0.17.2.dev0",
+      "peft-commit-hash": "a18ba67f242ab2eb74cdabab76ea2fd836b5cd83",
+      "datasets-version": "4.2.0",
+      "datasets-commit-hash": null,
+      "bitsandbytes-version": "0.46.0",
+      "bitsandbytes-commit-hash": null,
+      "torch-version": "2.9.0+cu128",
+      "torch-commit-hash": null
+    },
+    "system_info": {
+      "system": "Linux",
+      "release": "6.14.0-1014-aws",
+      "version": "#14~24.04.1-Ubuntu SMP Tue Sep 23 14:51:14 UTC 2025",
+      "machine": "x86_64",
+      "processor": "x86_64",
+      "accelerator": "NVIDIA L40S"
+    },
+    "pytorch_info": "PyTorch built with:\n  - GCC 13.3\n  - C++ Version: 201703\n  - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n  - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n  - OpenMP 201511 (a.k.a. OpenMP 4.5)\n  - LAPACK is enabled (usually provided by MKL)\n  - NNPACK is enabled\n  - CPU capability usage: AVX2\n  - CUDA Runtime 12.8\n  - NVCC architecture flags: -gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90;-gencode;arch=compute_100,code=sm_100;-gencode;arch=compute_120,code=sm_120\n  - CuDNN 90.7.1\n    - Built with CuDNN 90.8\n  - Magma 2.6.1\n  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=0fabc3ba44823f257e70ce397d989c8de5e362c1, CUDA_VERSION=12.8, CUDNN_VERSION=9.8.0, CXX_COMPILER=/opt/rh/gcc-toolset-13/root/usr/bin/c++, CXX_FLAGS= -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -DC10_NODEPRECATED -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-dangling-reference -Wno-error=dangling-reference -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.9.0, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, USE_XCCL=OFF, USE_XPU=OFF, \n"
+  }
+}
\ No newline at end of file
diff --git a/method_comparison/MetaMathQA/results/oft--llama-3.2-3B-rank32.json b/method_comparison/MetaMathQA/results/oft--llama-3.2-3B-rank32.json
index b57f300fa3..0b7741e369 100644
--- a/method_comparison/MetaMathQA/results/oft--llama-3.2-3B-rank32.json
+++ b/method_comparison/MetaMathQA/results/oft--llama-3.2-3B-rank32.json
@@ -1,7 +1,7 @@
 {
   "run_info": {
-    "created_at": "2025-07-31T14:11:12+00:00",
-    "total_time": 2493.9155955019996,
+    "created_at": "2025-10-23T17:34:45+00:00",
+    "total_time": 2374.6856670790003,
     "experiment_name": "oft/llama-3.2-3B-rank32",
     "peft_branch": "main",
     "train_config": {
@@ -34,6 +34,7 @@
       "task_type": null,
       "peft_type": "OFT",
       "auto_mapping": null,
+      "peft_version": "0.17.2.dev0@UNKNOWN",
       "base_model_name_or_path": "meta-llama/Llama-3.2-3B",
       "revision": null,
       "inference_mode": false,
@@ -60,10 +61,10 @@
     "error_msg": ""
   },
   "train_info": {
-    "accelerator_memory_reserved_avg": 12057354384,
-    "accelerator_memory_max": 22294822912,
-    "accelerator_memory_reserved_99th": 17939310837,
-    "train_time": 2214.446992367006,
+    "accelerator_memory_reserved_avg": 12097176784,
+    "accelerator_memory_max": 22328377344,
+    "accelerator_memory_reserved_99th": 17958185205,
+    "train_time": 2166.5656557240145,
     "file_size": 32693568,
     "num_trainable_params": 8171520,
     "num_total_params": 3220921344,
@@ -72,247 +73,247 @@
       {
         "step": 250,
         "valid accuracy": 0.36,
-        "train loss": 0.9631057088375091,
+        "train loss": 0.9631274998188019,
         "train samples": 1000,
-        "train time": 43.418166981995455,
-        "eval time": 16.96007740999994,
-        "tokens / sec": 4876.276791873667,
-        "mem allocated avg": 6903823460.352,
-        "mem reserved avg": 12108561383.424,
-        "elapsed time": 113.91408998500083
+        "train time": 40.319602065053914,
+        "eval time": 14.108862943998247,
+        "tokens / sec": 5251.019086408657,
+        "mem allocated avg": 6909552105.472,
+        "mem reserved avg": 12148658929.664,
+        "elapsed time": 117.40419055000166
       },
       {
         "step": 500,
-        "valid accuracy": 0.36,
-        "train loss": 0.7144306401014328,
+        "valid accuracy": 0.3,
+        "train loss": 0.7145850785970688,
         "train samples": 2000,
-        "train time": 42.455775934988196,
-        "eval time": 16.150497423999695,
-        "tokens / sec": 4899.097835792689,
-        "mem allocated avg": 6896105342.976,
-        "mem reserved avg": 11994249822.208,
-        "elapsed time": 220.49977440600014
+        "train time": 39.82235778199902,
+        "eval time": 8.958179848999862,
+        "tokens / sec": 5223.07094769814,
+        "mem allocated avg": 6901974622.208,
+        "mem reserved avg": 12035630825.472,
+        "elapsed time": 217.32610749300147
       },
       {
         "step": 750,
-        "valid accuracy": 0.52,
-        "train loss": 0.6711842056512832,
+        "valid accuracy": 0.46,
+        "train loss": 0.6711596403121948,
         "train samples": 3000,
-        "train time": 43.15603912099323,
-        "eval time": 10.51256339000065,
-        "tokens / sec": 4968.041654585135,
-        "mem allocated avg": 6906686986.24,
-        "mem reserved avg": 12155101380.608,
-        "elapsed time": 322.5515955810006
+        "train time": 40.14594141800262,
+        "eval time": 8.506328391002171,
+        "tokens / sec": 5340.539851031025,
+        "mem allocated avg": 6912328740.864,
+        "mem reserved avg": 12194418786.304,
+        "elapsed time": 317.6191419630013
       },
       {
         "step": 1000,
         "valid accuracy": 0.48,
-        "train loss": 0.6508683092594146,
+        "train loss": 0.651293668627739,
         "train samples": 4000,
-        "train time": 42.42713372799517,
-        "eval time": 16.934662378998837,
-        "tokens / sec": 4910.442485595753,
-        "mem allocated avg": 6897939019.776,
-        "mem reserved avg": 12025262505.984,
-        "elapsed time": 429.7382754350001
+        "train time": 39.88486097396162,
+        "eval time": 9.90862209899933,
+        "tokens / sec": 5223.435531993199,
+        "mem allocated avg": 6903443197.952,
+        "mem reserved avg": 12063405506.56,
+        "elapsed time": 418.50864810500207
       },
       {
         "step": 1250,
-        "valid accuracy": 0.4,
-        "train loss": 0.6453732433319092,
+        "valid accuracy": 0.36,
+        "train loss": 0.6456290460824966,
         "train samples": 5000,
-        "train time": 42.549762738994104,
-        "eval time": 16.92903551499876,
-        "tokens / sec": 4901.03790423462,
-        "mem allocated avg": 6897900118.016,
-        "mem reserved avg": 12017234608.128,
-        "elapsed time": 537.135011331
+        "train time": 39.799740495029255,
+        "eval time": 10.214905517997977,
+        "tokens / sec": 5239.682405116313,
+        "mem allocated avg": 6904099018.752,
+        "mem reserved avg": 12058431062.016,
+        "elapsed time": 519.874058526002
       },
       {
         "step": 1500,
-        "valid accuracy": 0.5,
-        "train loss": 0.636857116818428,
+        "valid accuracy": 0.44,
+        "train loss": 0.6369200776815415,
         "train samples": 6000,
-        "train time": 42.7670685170051,
-        "eval time": 16.97714005600028,
-        "tokens / sec": 4894.677312679627,
-        "mem allocated avg": 6899436058.624,
-        "mem reserved avg": 12045822984.192,
-        "elapsed time": 644.8122739440005
+        "train time": 39.7944654230123,
+        "eval time": 9.540699907996895,
+        "tokens / sec": 5260.304360790541,
+        "mem allocated avg": 6905092661.248,
+        "mem reserved avg": 12085794701.312,
+        "elapsed time": 620.4396147330008
       },
       {
         "step": 1750,
-        "valid accuracy": 0.48,
-        "train loss": 0.6280697054862976,
+        "valid accuracy": 0.46,
+        "train loss": 0.6281714961528778,
         "train samples": 7000,
-        "train time": 42.93359049599712,
-        "eval time": 11.770931148001182,
-        "tokens / sec": 4876.251848060996,
-        "mem allocated avg": 6900382935.04,
-        "mem reserved avg": 12059630632.96,
-        "elapsed time": 747.525349122001
+        "train time": 39.897877080999024,
+        "eval time": 10.18648028700045,
+        "tokens / sec": 5247.271667486872,
+        "mem allocated avg": 6906448510.976,
+        "mem reserved avg": 12100340547.584,
+        "elapsed time": 721.9082820210024
       },
       {
         "step": 2000,
-        "valid accuracy": 0.4,
-        "train loss": 0.6299525223970414,
+        "valid accuracy": 0.42,
+        "train loss": 0.6302315661907196,
         "train samples": 8000,
-        "train time": 42.82682755300084,
-        "eval time": 11.5680384089992,
-        "tokens / sec": 4849.670448808364,
-        "mem allocated avg": 6896952041.472,
-        "mem reserved avg": 12003611508.736,
-        "elapsed time": 849.5279627600012
+        "train time": 39.71084841699121,
+        "eval time": 14.071537550997164,
+        "tokens / sec": 5230.20807359866,
+        "mem allocated avg": 6903141050.368,
+        "mem reserved avg": 12043474173.952,
+        "elapsed time": 826.8578335800012
       },
       {
         "step": 2250,
-        "valid accuracy": 0.42,
-        "train loss": 0.6208749743700027,
+        "valid accuracy": 0.44,
+        "train loss": 0.6209213199615479,
         "train samples": 9000,
-        "train time": 43.43083962600576,
-        "eval time": 16.986704689999897,
-        "tokens / sec": 4949.20203825146,
-        "mem allocated avg": 6908628027.392,
-        "mem reserved avg": 12188169273.344,
-        "elapsed time": 958.0240945160003
+        "train time": 40.21075651299543,
+        "eval time": 14.178777003002324,
+        "tokens / sec": 5345.534842910316,
+        "mem allocated avg": 6914497898.496,
+        "mem reserved avg": 12228820467.712,
+        "elapsed time": 933.0094860480021
       },
       {
         "step": 2500,
-        "valid accuracy": 0.42,
-        "train loss": 0.6179436918497085,
+        "valid accuracy": 0.44,
+        "train loss": 0.618088245511055,
         "train samples": 10000,
-        "train time": 42.63891591101674,
-        "eval time": 17.232789900999705,
-        "tokens / sec": 4830.493355643306,
-        "mem allocated avg": 6893492830.208,
-        "mem reserved avg": 11953867063.296,
-        "elapsed time": 1065.2266578140006
+        "train time": 39.52404374004254,
+        "eval time": 14.292836533997615,
+        "tokens / sec": 5211.182371790845,
+        "mem allocated avg": 6899276843.008,
+        "mem reserved avg": 11993117360.128,
+        "elapsed time": 1037.8300729750008
       },
       {
         "step": 2750,
-        "valid accuracy": 0.42,
-        "train loss": 0.6097300077676773,
+        "valid accuracy": 0.5,
+        "train loss": 0.6095741709470749,
         "train samples": 11000,
-        "train time": 43.157022238001446,
-        "eval time": 17.135427543998958,
-        "tokens / sec": 4909.537058222485,
-        "mem allocated avg": 6904392247.296,
-        "mem reserved avg": 12124977889.28,
-        "elapsed time": 1173.5244531360004
+        "train time": 40.033341915019264,
+        "eval time": 8.408460123999248,
+        "tokens / sec": 5292.613353383542,
+        "mem allocated avg": 6909805750.272,
+        "mem reserved avg": 12163313827.84,
+        "elapsed time": 1137.8264588340026
       },
       {
         "step": 3000,
-        "valid accuracy": 0.42,
-        "train loss": 0.600518134355545,
+        "valid accuracy": 0.38,
+        "train loss": 0.6007885160446167,
         "train samples": 12000,
-        "train time": 42.90499155000907,
-        "eval time": 17.038416949999373,
-        "tokens / sec": 4864.958422301702,
-        "mem allocated avg": 6898886381.568,
-        "mem reserved avg": 12038994657.28,
-        "elapsed time": 1281.100714346001
+        "train time": 39.80941545598034,
+        "eval time": 9.015956413000822,
+        "tokens / sec": 5243.257094061238,
+        "mem allocated avg": 6905287532.544,
+        "mem reserved avg": 12079830401.024,
+        "elapsed time": 1237.902389021001
       },
       {
         "step": 3250,
-        "valid accuracy": 0.54,
-        "train loss": 0.6095727566480637,
+        "valid accuracy": 0.56,
+        "train loss": 0.609751238822937,
         "train samples": 13000,
-        "train time": 42.991201876006016,
-        "eval time": 17.145920277998812,
-        "tokens / sec": 4905.678157318666,
-        "mem allocated avg": 6900920473.6,
-        "mem reserved avg": 12070426771.456,
-        "elapsed time": 1389.080374264
+        "train time": 40.0327758529711,
+        "eval time": 9.789832267997554,
+        "tokens / sec": 5268.208249524811,
+        "mem allocated avg": 6907088541.696,
+        "mem reserved avg": 12110599815.168,
+        "elapsed time": 1339.3388089530017
       },
       {
         "step": 3500,
-        "valid accuracy": 0.54,
-        "train loss": 0.59402192902565,
+        "valid accuracy": 0.52,
+        "train loss": 0.5943620399236679,
         "train samples": 14000,
-        "train time": 43.139979139998104,
-        "eval time": 10.18719298600081,
-        "tokens / sec": 4862.079309758545,
-        "mem allocated avg": 6899826102.272,
-        "mem reserved avg": 12054404530.176,
-        "elapsed time": 1490.7450829120007
+        "train time": 39.922039763983776,
+        "eval time": 8.802732422998815,
+        "tokens / sec": 5253.990057622979,
+        "mem allocated avg": 6905655146.496,
+        "mem reserved avg": 12095215108.096,
+        "elapsed time": 1439.3830861440001
       },
       {
         "step": 3750,
-        "valid accuracy": 0.58,
-        "train loss": 0.5927710949182511,
+        "valid accuracy": 0.48,
+        "train loss": 0.5927145059108734,
         "train samples": 15000,
-        "train time": 43.49427866901169,
-        "eval time": 10.884315328999946,
-        "tokens / sec": 4982.333461582249,
-        "mem allocated avg": 6910839183.36,
-        "mem reserved avg": 12223619530.752,
-        "elapsed time": 1593.6702795590008
+        "train time": 40.492691420033225,
+        "eval time": 9.00371527400057,
+        "tokens / sec": 5351.65711145565,
+        "mem allocated avg": 6916861732.864,
+        "mem reserved avg": 12265587736.576,
+        "elapsed time": 1540.9954331820009
       },
       {
         "step": 4000,
-        "valid accuracy": 0.52,
-        "train loss": 0.6036465883255004,
+        "valid accuracy": 0.5,
+        "train loss": 0.6037785897254944,
         "train samples": 16000,
-        "train time": 42.54699739801072,
-        "eval time": 10.508950370000093,
-        "tokens / sec": 4803.464697829781,
-        "mem allocated avg": 6892073494.528,
-        "mem reserved avg": 11931788247.04,
-        "elapsed time": 1694.1543825910012
+        "train time": 39.58210096696348,
+        "eval time": 9.008053338999161,
+        "tokens / sec": 5163.268118854439,
+        "mem allocated avg": 6898274762.752,
+        "mem reserved avg": 11974511427.584,
+        "elapsed time": 1640.5221296710006
       },
       {
         "step": 4250,
         "valid accuracy": 0.5,
-        "train loss": 0.5904108211994171,
+        "train loss": 0.5905539064407349,
         "train samples": 17000,
-        "train time": 42.904117188016244,
-        "eval time": 10.362485865000053,
-        "tokens / sec": 4927.009663749569,
-        "mem allocated avg": 6902539771.904,
-        "mem reserved avg": 12087044603.904,
-        "elapsed time": 1795.3652429800004
+        "train time": 40.03998009499628,
+        "eval time": 10.12545333899834,
+        "tokens / sec": 5279.448179006884,
+        "mem allocated avg": 6908281157.632,
+        "mem reserved avg": 12122973011.968,
+        "elapsed time": 1742.3377487470025
       },
       {
         "step": 4500,
         "valid accuracy": 0.56,
-        "train loss": 0.5975252593755722,
+        "train loss": 0.5975803916454315,
         "train samples": 18000,
-        "train time": 42.7045542899923,
-        "eval time": 9.970661539999128,
-        "tokens / sec": 4866.413043179837,
-        "mem allocated avg": 6897064284.16,
-        "mem reserved avg": 12006883065.856,
-        "elapsed time": 1895.7771126360003
+        "train time": 39.89842279496588,
+        "eval time": 8.936802754000382,
+        "tokens / sec": 5208.677071471134,
+        "mem allocated avg": 6903550846.976,
+        "mem reserved avg": 12046091419.648,
+        "elapsed time": 1842.5112857700005
       },
       {
         "step": 4750,
-        "valid accuracy": 0.54,
-        "train loss": 0.588557964682579,
+        "valid accuracy": 0.56,
+        "train loss": 0.5887055099010468,
         "train samples": 19000,
-        "train time": 42.698231221012975,
-        "eval time": 10.72399718899942,
-        "tokens / sec": 4916.8078863342525,
-        "mem allocated avg": 6900484192.256,
-        "mem reserved avg": 12052575813.632,
-        "elapsed time": 1997.1282366079995
+        "train time": 39.961028160010756,
+        "eval time": 9.079531961000612,
+        "tokens / sec": 5253.59355518503,
+        "mem allocated avg": 6905698629.632,
+        "mem reserved avg": 12090920140.8,
+        "elapsed time": 1943.151558054
       },
       {
         "step": 5000,
         "valid accuracy": 0.56,
-        "train loss": 0.5946548076868057,
+        "train loss": 0.5947723392248153,
         "train samples": 20000,
-        "train time": 42.98944765599845,
-        "eval time": 10.321189939999385,
-        "tokens / sec": 4844.909887343902,
-        "mem allocated avg": 6896923324.416,
-        "mem reserved avg": 12004861411.328,
-        "elapsed time": 2098.129397994
+        "train time": 39.70571685399773,
+        "eval time": 8.965388607000932,
+        "tokens / sec": 5245.592234636347,
+        "mem allocated avg": 6902749710.336,
+        "mem reserved avg": 12042400432.128,
+        "elapsed time": 2043.0771329560012
       },
       {
         "step": 5000,
-        "test accuracy": 0.5056861258529188,
-        "train loss": 0.5946548076868057,
+        "test accuracy": 0.4935557240333586,
+        "train loss": 0.5947723392248153,
         "train samples": 20000,
         "train total tokens": 4198051
       }
@@ -334,25 +335,25 @@
       }
     },
     "package_info": {
-      "transformers-version": "4.52.4",
+      "transformers-version": "4.57.1",
       "transformers-commit-hash": null,
-      "peft-version": "0.16.1.dev0",
-      "peft-commit-hash": "25e5c6b25c4589eb2683484ede1ba3d985d8a760",
-      "datasets-version": "3.6.0",
+      "peft-version": "0.17.2.dev0",
+      "peft-commit-hash": "a18ba67f242ab2eb74cdabab76ea2fd836b5cd83",
+      "datasets-version": "4.2.0",
       "datasets-commit-hash": null,
       "bitsandbytes-version": "0.46.0",
       "bitsandbytes-commit-hash": null,
-      "torch-version": "2.7.1+cu126",
+      "torch-version": "2.9.0+cu128",
       "torch-commit-hash": null
     },
     "system_info": {
       "system": "Linux",
-      "release": "6.8.0-1031-aws",
-      "version": "#33-Ubuntu SMP Fri Jun 20 18:11:07 UTC 2025",
+      "release": "6.14.0-1014-aws",
+      "version": "#14~24.04.1-Ubuntu SMP Tue Sep 23 14:51:14 UTC 2025",
       "machine": "x86_64",
       "processor": "x86_64",
       "accelerator": "NVIDIA L40S"
     },
-    "pytorch_info": "PyTorch built with:\n  - GCC 11.2\n  - C++ Version: 201703\n  - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n  - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n  - OpenMP 201511 (a.k.a. OpenMP 4.5)\n  - LAPACK is enabled (usually provided by MKL)\n  - NNPACK is enabled\n  - CPU capability usage: AVX2\n  - CUDA Runtime 12.6\n  - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n  - CuDNN 90.7.1  (built against CUDA 12.8)\n    - Built with CuDNN 90.5.1\n  - Magma 2.6.1\n  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n"
+    "pytorch_info": "PyTorch built with:\n  - GCC 13.3\n  - C++ Version: 201703\n  - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n  - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n  - OpenMP 201511 (a.k.a. OpenMP 4.5)\n  - LAPACK is enabled (usually provided by MKL)\n  - NNPACK is enabled\n  - CPU capability usage: AVX2\n  - CUDA Runtime 12.8\n  - NVCC architecture flags: -gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90;-gencode;arch=compute_100,code=sm_100;-gencode;arch=compute_120,code=sm_120\n  - CuDNN 90.7.1\n    - Built with CuDNN 90.8\n  - Magma 2.6.1\n  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=0fabc3ba44823f257e70ce397d989c8de5e362c1, CUDA_VERSION=12.8, CUDNN_VERSION=9.8.0, CXX_COMPILER=/opt/rh/gcc-toolset-13/root/usr/bin/c++, CXX_FLAGS= -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -DC10_NODEPRECATED -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-dangling-reference -Wno-error=dangling-reference -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.9.0, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, USE_XCCL=OFF, USE_XPU=OFF, \n"
   }
 }
\ No newline at end of file
diff --git a/method_comparison/MetaMathQA/results/osf--llama-3.2-3B-rank128.json b/method_comparison/MetaMathQA/results/osf--llama-3.2-3B-rank128.json
new file mode 100644
index 0000000000..69f9ffc565
--- /dev/null
+++ b/method_comparison/MetaMathQA/results/osf--llama-3.2-3B-rank128.json
@@ -0,0 +1,361 @@
+{
+  "run_info": {
+    "created_at": "2025-10-23T18:14:24+00:00",
+    "total_time": 4667.161105344003,
+    "experiment_name": "osf/llama-3.2-3B-rank128",
+    "peft_branch": "main",
+    "train_config": {
+      "model_id": "meta-llama/Llama-3.2-3B",
+      "dtype": "bfloat16",
+      "max_seq_length": 768,
+      "batch_size": 4,
+      "batch_size_eval": 50,
+      "max_steps": 5000,
+      "eval_steps": 250,
+      "compile": false,
+      "query_template": "Question: {query} Think step by step.\nAnswer:",
+      "seed": 0,
+      "grad_norm_clip": 1.0,
+      "optimizer_type": "AdamW",
+      "optimizer_kwargs": {
+        "lr": 5e-05
+      },
+      "lr_scheduler": "cosine",
+      "use_amp": false,
+      "autocast_adapter_dtype": true,
+      "generation_kwargs": {
+        "max_length": 800,
+        "max_new_tokens": 300
+      },
+      "attn_implementation": null
+    },
+    "peft_config": {
+      "task_type": null,
+      "peft_type": "OSF",
+      "auto_mapping": null,
+      "peft_version": "0.17.2.dev0@UNKNOWN",
+      "base_model_name_or_path": "meta-llama/Llama-3.2-3B",
+      "revision": null,
+      "inference_mode": false,
+      "effective_rank": null,
+      "target_modules": [
+        "q_proj",
+        "k_proj",
+        "v_proj",
+        "o_proj",
+        "gate_proj",
+        "down_proj",
+        "up_proj"
+      ],
+      "rank_pattern": {
+        "q_proj": 2944,
+        "o_proj": 2944,
+        "k_proj": 896,
+        "v_proj": 896,
+        "gate_proj": 2944,
+        "down_proj": 2944,
+        "up_proj": 2944
+      },
+      "init_weights": null,
+      "modules_to_save": null,
+      "target_svd_config": null
+    },
+    "error_msg": ""
+  },
+  "train_info": {
+    "accelerator_memory_reserved_avg": 27568475262,
+    "accelerator_memory_max": 38503710720,
+    "accelerator_memory_reserved_99th": 33747495813,
+    "train_time": 3807.1486767399983,
+    "file_size": 389155304,
+    "num_trainable_params": 194535936,
+    "num_total_params": 3407285760,
+    "status": "success",
+    "metrics": [
+      {
+        "step": 250,
+        "valid accuracy": 0.3,
+        "train loss": 1.1347286381721498,
+        "train samples": 1000,
+        "train time": 71.47102643898688,
+        "eval time": 40.76489851000224,
+        "tokens / sec": 2962.3052941703513,
+        "mem allocated avg": 16401750667.264,
+        "mem reserved avg": 27622234914.816,
+        "elapsed time": 192.81275960900166
+      },
+      {
+        "step": 500,
+        "valid accuracy": 0.4,
+        "train loss": 0.7763967347145081,
+        "train samples": 2000,
+        "train time": 70.88684602297872,
+        "eval time": 40.57877984700099,
+        "tokens / sec": 2934.183302958298,
+        "mem allocated avg": 16395229587.456,
+        "mem reserved avg": 27508837711.872,
+        "elapsed time": 373.68713150500116
+      },
+      {
+        "step": 750,
+        "valid accuracy": 0.32,
+        "train loss": 0.6953229109048843,
+        "train samples": 3000,
+        "train time": 71.95635187800144,
+        "eval time": 32.978734361000534,
+        "tokens / sec": 2979.597970218205,
+        "mem allocated avg": 16406115661.824,
+        "mem reserved avg": 27673027936.256,
+        "elapsed time": 548.7281182350016
+      },
+      {
+        "step": 1000,
+        "valid accuracy": 0.4,
+        "train loss": 0.6674379595518112,
+        "train samples": 4000,
+        "train time": 71.01955180798177,
+        "eval time": 40.51733253599741,
+        "tokens / sec": 2933.5020384708405,
+        "mem allocated avg": 16397466488.832,
+        "mem reserved avg": 27537417699.328,
+        "elapsed time": 729.8881993149989
+      },
+      {
+        "step": 1250,
+        "valid accuracy": 0.42,
+        "train loss": 0.6616734237670898,
+        "train samples": 5000,
+        "train time": 71.39272207195972,
+        "eval time": 40.68997940099507,
+        "tokens / sec": 2920.9980225968384,
+        "mem allocated avg": 16395791054.848,
+        "mem reserved avg": 27535354101.76,
+        "elapsed time": 911.6590812729992
+      },
+      {
+        "step": 1500,
+        "valid accuracy": 0.42,
+        "train loss": 0.6527736356258392,
+        "train samples": 6000,
+        "train time": 71.3452017439995,
+        "eval time": 40.91994198199973,
+        "tokens / sec": 2934.058561515047,
+        "mem allocated avg": 16398943606.784,
+        "mem reserved avg": 27551267291.136,
+        "elapsed time": 1093.6544228519997
+      },
+      {
+        "step": 1750,
+        "valid accuracy": 0.4,
+        "train loss": 0.6452968027591706,
+        "train samples": 7000,
+        "train time": 71.2096087580212,
+        "eval time": 27.92585728400445,
+        "tokens / sec": 2939.98244971986,
+        "mem allocated avg": 16398630768.64,
+        "mem reserved avg": 27576273731.584,
+        "elapsed time": 1262.488236760997
+      },
+      {
+        "step": 2000,
+        "valid accuracy": 0.44,
+        "train loss": 0.647414596915245,
+        "train samples": 8000,
+        "train time": 71.19596286901651,
+        "eval time": 27.970824908996292,
+        "tokens / sec": 2917.2440631515974,
+        "mem allocated avg": 16394856886.272,
+        "mem reserved avg": 27520036503.552,
+        "elapsed time": 1431.4772036520008
+      },
+      {
+        "step": 2250,
+        "valid accuracy": 0.46,
+        "train loss": 0.6401616543531418,
+        "train samples": 9000,
+        "train time": 72.0709888140409,
+        "eval time": 40.758223525001085,
+        "tokens / sec": 2982.448326810298,
+        "mem allocated avg": 16407562051.584,
+        "mem reserved avg": 27707186348.032,
+        "elapsed time": 1614.6744573789983
+      },
+      {
+        "step": 2500,
+        "valid accuracy": 0.4,
+        "train loss": 0.6391781423091888,
+        "train samples": 10000,
+        "train time": 70.6605427990362,
+        "eval time": 40.550873344996944,
+        "tokens / sec": 2914.8799576276306,
+        "mem allocated avg": 16391433314.304,
+        "mem reserved avg": 27455117066.24,
+        "elapsed time": 1795.4127528829995
+      },
+      {
+        "step": 2750,
+        "valid accuracy": 0.44,
+        "train loss": 0.633193033695221,
+        "train samples": 11000,
+        "train time": 71.95110527896759,
+        "eval time": 41.14912619200186,
+        "tokens / sec": 2944.7914549540083,
+        "mem allocated avg": 16402253942.784,
+        "mem reserved avg": 27629960822.784,
+        "elapsed time": 1978.6382485249997
+      },
+      {
+        "step": 3000,
+        "valid accuracy": 0.46,
+        "train loss": 0.6262783712148666,
+        "train samples": 12000,
+        "train time": 71.53329248691443,
+        "eval time": 27.86414769000112,
+        "tokens / sec": 2917.9559998329883,
+        "mem allocated avg": 16397906978.816,
+        "mem reserved avg": 27544262803.456,
+        "elapsed time": 2147.756047652998
+      },
+      {
+        "step": 3250,
+        "valid accuracy": 0.5,
+        "train loss": 0.6382041232585907,
+        "train samples": 13000,
+        "train time": 71.6970354819714,
+        "eval time": 27.99712078000448,
+        "tokens / sec": 2941.558163210698,
+        "mem allocated avg": 16398972514.304,
+        "mem reserved avg": 27574889611.264,
+        "elapsed time": 2317.192205391999
+      },
+      {
+        "step": 3500,
+        "valid accuracy": 0.44,
+        "train loss": 0.6242904909849167,
+        "train samples": 14000,
+        "train time": 71.18509741093294,
+        "eval time": 40.980086228999426,
+        "tokens / sec": 2946.5436956441617,
+        "mem allocated avg": 16399184805.888,
+        "mem reserved avg": 27556719886.336,
+        "elapsed time": 2498.944009259998
+      },
+      {
+        "step": 3750,
+        "valid accuracy": 0.46,
+        "train loss": 0.6247457062005997,
+        "train samples": 15000,
+        "train time": 72.02823552303016,
+        "eval time": 41.01247237699863,
+        "tokens / sec": 3008.584042444186,
+        "mem allocated avg": 16410260455.424,
+        "mem reserved avg": 27745530675.2,
+        "elapsed time": 2682.3083391710024
+      },
+      {
+        "step": 4000,
+        "valid accuracy": 0.48,
+        "train loss": 0.6386832315921783,
+        "train samples": 16000,
+        "train time": 71.0258735000898,
+        "eval time": 41.09184825400007,
+        "tokens / sec": 2877.4443724334005,
+        "mem allocated avg": 16391636211.712,
+        "mem reserved avg": 27441292640.256,
+        "elapsed time": 2863.960687703002
+      },
+      {
+        "step": 4250,
+        "valid accuracy": 0.48,
+        "train loss": 0.6240871007442474,
+        "train samples": 17000,
+        "train time": 71.68756263409159,
+        "eval time": 40.60824938499718,
+        "tokens / sec": 2948.7541804004964,
+        "mem allocated avg": 16400376035.328,
+        "mem reserved avg": 27598243495.936,
+        "elapsed time": 3046.081177453998
+      },
+      {
+        "step": 4500,
+        "valid accuracy": 0.44,
+        "train loss": 0.633060937166214,
+        "train samples": 18000,
+        "train time": 71.06215882203833,
+        "eval time": 40.82050051999977,
+        "tokens / sec": 2924.453794324497,
+        "mem allocated avg": 16395753859.072,
+        "mem reserved avg": 27509492023.296,
+        "elapsed time": 3227.7807077830003
+      },
+      {
+        "step": 4750,
+        "valid accuracy": 0.46,
+        "train loss": 0.6253616527318955,
+        "train samples": 19000,
+        "train time": 70.68259103103628,
+        "eval time": 40.62497806800093,
+        "tokens / sec": 2970.1655943514734,
+        "mem allocated avg": 16399317602.304,
+        "mem reserved avg": 27571760660.48,
+        "elapsed time": 3408.8530542459994
+      },
+      {
+        "step": 5000,
+        "valid accuracy": 0.48,
+        "train loss": 0.6308260992765427,
+        "train samples": 20000,
+        "train time": 71.07603502904385,
+        "eval time": 28.169818383001257,
+        "tokens / sec": 2930.382933078504,
+        "mem allocated avg": 16396032215.04,
+        "mem reserved avg": 27510599319.552,
+        "elapsed time": 3577.845173487003
+      },
+      {
+        "step": 5000,
+        "test accuracy": 0.4359363153904473,
+        "train loss": 0.6308260992765427,
+        "train samples": 20000,
+        "train total tokens": 4198051
+      }
+    ]
+  },
+  "meta_info": {
+    "model_info": {
+      "sha": "13afe5124825b4f3751f836b40dafda64c1ed062",
+      "created_at": "2024-09-18T15:23:48+00:00"
+    },
+    "dataset_info": {
+      "metamath": {
+        "sha": "aa4f34d3d2d3231299b5b03d9b3e5a20da45aa18",
+        "created_at": "2023-09-21T17:22:46+00:00"
+      },
+      "gsm8k": {
+        "sha": "e53f048856ff4f594e959d75785d2c2d37b678ee",
+        "created_at": "2022-04-12T10:22:10+00:00"
+      }
+    },
+    "package_info": {
+      "transformers-version": "4.57.1",
+      "transformers-commit-hash": null,
+      "peft-version": "0.17.2.dev0",
+      "peft-commit-hash": "a18ba67f242ab2eb74cdabab76ea2fd836b5cd83",
+      "datasets-version": "4.2.0",
+      "datasets-commit-hash": null,
+      "bitsandbytes-version": "0.46.0",
+      "bitsandbytes-commit-hash": null,
+      "torch-version": "2.9.0+cu128",
+      "torch-commit-hash": null
+    },
+    "system_info": {
+      "system": "Linux",
+      "release": "6.14.0-1014-aws",
+      "version": "#14~24.04.1-Ubuntu SMP Tue Sep 23 14:51:14 UTC 2025",
+      "machine": "x86_64",
+      "processor": "x86_64",
+      "accelerator": "NVIDIA L40S"
+    },
+    "pytorch_info": "PyTorch built with:\n  - GCC 13.3\n  - C++ Version: 201703\n  - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n  - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n  - OpenMP 201511 (a.k.a. OpenMP 4.5)\n  - LAPACK is enabled (usually provided by MKL)\n  - NNPACK is enabled\n  - CPU capability usage: AVX2\n  - CUDA Runtime 12.8\n  - NVCC architecture flags: -gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90;-gencode;arch=compute_100,code=sm_100;-gencode;arch=compute_120,code=sm_120\n  - CuDNN 90.7.1\n    - Built with CuDNN 90.8\n  - Magma 2.6.1\n  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=0fabc3ba44823f257e70ce397d989c8de5e362c1, CUDA_VERSION=12.8, CUDNN_VERSION=9.8.0, CXX_COMPILER=/opt/rh/gcc-toolset-13/root/usr/bin/c++, CXX_FLAGS= -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -DC10_NODEPRECATED -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-dangling-reference -Wno-error=dangling-reference -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.9.0, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, USE_XCCL=OFF, USE_XPU=OFF, \n"
+  }
+}
\ No newline at end of file
diff --git a/method_comparison/MetaMathQA/results/prompt_tuning--llama-3.2-3B-sample_vocab-lr_0.001.json b/method_comparison/MetaMathQA/results/prompt_tuning--llama-3.2-3B-sample_vocab-lr_0.001.json
new file mode 100644
index 0000000000..b01361552e
--- /dev/null
+++ b/method_comparison/MetaMathQA/results/prompt_tuning--llama-3.2-3B-sample_vocab-lr_0.001.json
@@ -0,0 +1,349 @@
+{
+  "run_info": {
+    "created_at": "2025-10-23T19:32:16+00:00",
+    "total_time": 2686.3397733460006,
+    "experiment_name": "prompt_tuning/llama-3.2-3B-sample_vocab-lr_0.001",
+    "peft_branch": "main",
+    "train_config": {
+      "model_id": "meta-llama/Llama-3.2-3B",
+      "dtype": "bfloat16",
+      "max_seq_length": 768,
+      "batch_size": 4,
+      "batch_size_eval": 50,
+      "max_steps": 5000,
+      "eval_steps": 250,
+      "compile": false,
+      "query_template": "Question: {query} Think step by step.\nAnswer:",
+      "seed": 0,
+      "grad_norm_clip": 1.0,
+      "optimizer_type": "AdamW",
+      "optimizer_kwargs": {
+        "lr": 0.001
+      },
+      "lr_scheduler": "cosine",
+      "use_amp": false,
+      "autocast_adapter_dtype": true,
+      "generation_kwargs": {
+        "max_length": 800,
+        "max_new_tokens": 300
+      },
+      "attn_implementation": null
+    },
+    "peft_config": {
+      "task_type": "CAUSAL_LM",
+      "peft_type": "PROMPT_TUNING",
+      "auto_mapping": null,
+      "peft_version": "0.17.2.dev0@UNKNOWN",
+      "base_model_name_or_path": "meta-llama/Llama-3.2-3B",
+      "revision": null,
+      "inference_mode": false,
+      "num_virtual_tokens": 200,
+      "token_dim": 3072,
+      "num_transformer_submodules": 1,
+      "num_attention_heads": 24,
+      "num_layers": 28,
+      "modules_to_save": null,
+      "prompt_tuning_init": "SAMPLE_VOCAB",
+      "prompt_tuning_init_text": null,
+      "tokenizer_name_or_path": null,
+      "tokenizer_kwargs": null
+    },
+    "error_msg": ""
+  },
+  "train_info": {
+    "accelerator_memory_reserved_avg": 15333902725,
+    "accelerator_memory_max": 24423432192,
+    "accelerator_memory_reserved_99th": 20718058209,
+    "train_time": 2404.491197405987,
+    "file_size": 2457728,
+    "num_trainable_params": 614400,
+    "num_total_params": 3213364224,
+    "status": "success",
+    "metrics": [
+      {
+        "step": 250,
+        "valid accuracy": 0.26,
+        "train loss": 1.2232583401203156,
+        "train samples": 1000,
+        "train time": 46.17508273195563,
+        "eval time": 16.505391887003498,
+        "tokens / sec": 4585.135260699362,
+        "mem allocated avg": 7082755428.352,
+        "mem reserved avg": 15369582411.776,
+        "elapsed time": 130.58785935799824
+      },
+      {
+        "step": 500,
+        "valid accuracy": 0.26,
+        "train loss": 0.7836624360084534,
+        "train samples": 2000,
+        "train time": 45.018037280060526,
+        "eval time": 13.687452188001771,
+        "tokens / sec": 4620.259179804925,
+        "mem allocated avg": 7075105931.264,
+        "mem reserved avg": 15270336790.528,
+        "elapsed time": 244.8983392040027
+      },
+      {
+        "step": 750,
+        "valid accuracy": 0.34,
+        "train loss": 0.7472633671760559,
+        "train samples": 3000,
+        "train time": 45.978240520918916,
+        "eval time": 16.433850564004388,
+        "tokens / sec": 4663.097099212682,
+        "mem allocated avg": 7084822022.144,
+        "mem reserved avg": 15411735166.976,
+        "elapsed time": 363.6571200059989
+      },
+      {
+        "step": 1000,
+        "valid accuracy": 0.36,
+        "train loss": 0.7188941253423691,
+        "train samples": 4000,
+        "train time": 45.29402179891622,
+        "eval time": 16.422749457000464,
+        "tokens / sec": 4599.635707443073,
+        "mem allocated avg": 7077147275.264,
+        "mem reserved avg": 15320416780.288,
+        "elapsed time": 481.36312461700436
+      },
+      {
+        "step": 1250,
+        "valid accuracy": 0.38,
+        "train loss": 0.7124129735231399,
+        "train samples": 5000,
+        "train time": 45.307238408975536,
+        "eval time": 10.875751104998926,
+        "tokens / sec": 4602.752392842549,
+        "mem allocated avg": 7076391862.272,
+        "mem reserved avg": 15302725206.016,
+        "elapsed time": 593.3289284760031
+      },
+      {
+        "step": 1500,
+        "valid accuracy": 0.44,
+        "train loss": 0.7025347559452056,
+        "train samples": 6000,
+        "train time": 45.58526100008021,
+        "eval time": 16.42347687900474,
+        "tokens / sec": 4592.07637309857,
+        "mem allocated avg": 7078312007.68,
+        "mem reserved avg": 15317346549.76,
+        "elapsed time": 711.5082658690008
+      },
+      {
+        "step": 1750,
+        "valid accuracy": 0.38,
+        "train loss": 0.6954681335687637,
+        "train samples": 7000,
+        "train time": 45.64173767795728,
+        "eval time": 16.42899393199332,
+        "tokens / sec": 4586.920013369872,
+        "mem allocated avg": 7079384836.096,
+        "mem reserved avg": 15339685412.864,
+        "elapsed time": 829.8873879540042
+      },
+      {
+        "step": 2000,
+        "valid accuracy": 0.38,
+        "train loss": 0.6959483157396317,
+        "train samples": 8000,
+        "train time": 45.487343653003336,
+        "eval time": 13.574101327998505,
+        "tokens / sec": 4566.017342854592,
+        "mem allocated avg": 7076109684.736,
+        "mem reserved avg": 15293246078.976,
+        "elapsed time": 945.0783910430036
+      },
+      {
+        "step": 2250,
+        "valid accuracy": 0.36,
+        "train loss": 0.6886743805408477,
+        "train samples": 9000,
+        "train time": 46.25566355796764,
+        "eval time": 13.330924835005135,
+        "tokens / sec": 4646.955279987001,
+        "mem allocated avg": 7087138603.008,
+        "mem reserved avg": 15455901188.096,
+        "elapsed time": 1060.9912824740022
+      },
+      {
+        "step": 2500,
+        "valid accuracy": 0.34,
+        "train loss": 0.685915477514267,
+        "train samples": 10000,
+        "train time": 45.102773971921124,
+        "eval time": 9.97469689600257,
+        "tokens / sec": 4566.614907726638,
+        "mem allocated avg": 7072584992.768,
+        "mem reserved avg": 15242411114.496,
+        "elapsed time": 1172.0626167860028
+      },
+      {
+        "step": 2750,
+        "valid accuracy": 0.34,
+        "train loss": 0.6786098405122757,
+        "train samples": 11000,
+        "train time": 45.94233982402511,
+        "eval time": 16.441506881994428,
+        "tokens / sec": 4611.889616671175,
+        "mem allocated avg": 7083189243.904,
+        "mem reserved avg": 15378675662.848,
+        "elapsed time": 1290.5899199240012
+      },
+      {
+        "step": 3000,
+        "valid accuracy": 0.34,
+        "train loss": 0.6700806043148041,
+        "train samples": 12000,
+        "train time": 45.345923172040784,
+        "eval time": 16.428019475999463,
+        "tokens / sec": 4603.081939871026,
+        "mem allocated avg": 7077922359.296,
+        "mem reserved avg": 15325441556.48,
+        "elapsed time": 1408.37935114
+      },
+      {
+        "step": 3250,
+        "valid accuracy": 0.38,
+        "train loss": 0.6773221861124039,
+        "train samples": 13000,
+        "train time": 45.62251189197559,
+        "eval time": 10.20512724499713,
+        "tokens / sec": 4622.739767143219,
+        "mem allocated avg": 7079430316.032,
+        "mem reserved avg": 15336581627.904,
+        "elapsed time": 1520.2662671619983
+      },
+      {
+        "step": 3500,
+        "valid accuracy": 0.36,
+        "train loss": 0.6638141021728515,
+        "train samples": 14000,
+        "train time": 45.362639506965934,
+        "eval time": 16.423270223000145,
+        "tokens / sec": 4623.849103132338,
+        "mem allocated avg": 7078731702.272,
+        "mem reserved avg": 15340423610.368,
+        "elapsed time": 1637.9249663660012
+      },
+      {
+        "step": 3750,
+        "valid accuracy": 0.4,
+        "train loss": 0.659807546377182,
+        "train samples": 15000,
+        "train time": 46.37028079503216,
+        "eval time": 16.430752983003913,
+        "tokens / sec": 4673.316535603474,
+        "mem allocated avg": 7089302118.4,
+        "mem reserved avg": 15476906262.528,
+        "elapsed time": 1757.2157563939982
+      },
+      {
+        "step": 4000,
+        "valid accuracy": 0.44,
+        "train loss": 0.6735307123661042,
+        "train samples": 16000,
+        "train time": 45.140112428038265,
+        "eval time": 10.74987911900098,
+        "tokens / sec": 4527.525276455804,
+        "mem allocated avg": 7071258566.656,
+        "mem reserved avg": 15226523090.944,
+        "elapsed time": 1869.2519954439995
+      },
+      {
+        "step": 4250,
+        "valid accuracy": 0.38,
+        "train loss": 0.6553376598358154,
+        "train samples": 17000,
+        "train time": 45.46981849010626,
+        "eval time": 10.198734415003855,
+        "tokens / sec": 4648.995905844576,
+        "mem allocated avg": 7081538940.928,
+        "mem reserved avg": 15362930245.632,
+        "elapsed time": 1981.2125737099996
+      },
+      {
+        "step": 4500,
+        "valid accuracy": 0.44,
+        "train loss": 0.6633048733472824,
+        "train samples": 18000,
+        "train time": 45.337251453973295,
+        "eval time": 11.732473295996897,
+        "tokens / sec": 4583.824412271184,
+        "mem allocated avg": 7076373809.152,
+        "mem reserved avg": 15290343620.608,
+        "elapsed time": 2094.4324725890037
+      },
+      {
+        "step": 4750,
+        "valid accuracy": 0.38,
+        "train loss": 0.6535381546020508,
+        "train samples": 19000,
+        "train time": 45.22358582002926,
+        "eval time": 16.417741852004838,
+        "tokens / sec": 4642.245770502773,
+        "mem allocated avg": 7078582056.96,
+        "mem reserved avg": 15336782954.496,
+        "elapsed time": 2212.095038350999
+      },
+      {
+        "step": 5000,
+        "valid accuracy": 0.38,
+        "train loss": 0.6601177526712417,
+        "train samples": 20000,
+        "train time": 45.29508365698712,
+        "eval time": 10.317947228002595,
+        "tokens / sec": 4598.291540363922,
+        "mem allocated avg": 7075333005.312,
+        "mem reserved avg": 15280059187.2,
+        "elapsed time": 2323.7172720720046
+      },
+      {
+        "step": 5000,
+        "test accuracy": 0.3912054586808188,
+        "train loss": 0.6601177526712417,
+        "train samples": 20000,
+        "train total tokens": 4198051
+      }
+    ]
+  },
+  "meta_info": {
+    "model_info": {
+      "sha": "13afe5124825b4f3751f836b40dafda64c1ed062",
+      "created_at": "2024-09-18T15:23:48+00:00"
+    },
+    "dataset_info": {
+      "metamath": {
+        "sha": "aa4f34d3d2d3231299b5b03d9b3e5a20da45aa18",
+        "created_at": "2023-09-21T17:22:46+00:00"
+      },
+      "gsm8k": {
+        "sha": "e53f048856ff4f594e959d75785d2c2d37b678ee",
+        "created_at": "2022-04-12T10:22:10+00:00"
+      }
+    },
+    "package_info": {
+      "transformers-version": "4.57.1",
+      "transformers-commit-hash": null,
+      "peft-version": "0.17.2.dev0",
+      "peft-commit-hash": "a18ba67f242ab2eb74cdabab76ea2fd836b5cd83",
+      "datasets-version": "4.2.0",
+      "datasets-commit-hash": null,
+      "bitsandbytes-version": "0.46.0",
+      "bitsandbytes-commit-hash": null,
+      "torch-version": "2.9.0+cu128",
+      "torch-commit-hash": null
+    },
+    "system_info": {
+      "system": "Linux",
+      "release": "6.14.0-1014-aws",
+      "version": "#14~24.04.1-Ubuntu SMP Tue Sep 23 14:51:14 UTC 2025",
+      "machine": "x86_64",
+      "processor": "x86_64",
+      "accelerator": "NVIDIA L40S"
+    },
+    "pytorch_info": "PyTorch built with:\n  - GCC 13.3\n  - C++ Version: 201703\n  - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n  - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n  - OpenMP 201511 (a.k.a. OpenMP 4.5)\n  - LAPACK is enabled (usually provided by MKL)\n  - NNPACK is enabled\n  - CPU capability usage: AVX2\n  - CUDA Runtime 12.8\n  - NVCC architecture flags: -gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90;-gencode;arch=compute_100,code=sm_100;-gencode;arch=compute_120,code=sm_120\n  - CuDNN 90.7.1\n    - Built with CuDNN 90.8\n  - Magma 2.6.1\n  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=0fabc3ba44823f257e70ce397d989c8de5e362c1, CUDA_VERSION=12.8, CUDNN_VERSION=9.8.0, CXX_COMPILER=/opt/rh/gcc-toolset-13/root/usr/bin/c++, CXX_FLAGS= -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -DC10_NODEPRECATED -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-dangling-reference -Wno-error=dangling-reference -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.9.0, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, USE_XCCL=OFF, USE_XPU=OFF, \n"
+  }
+}
\ No newline at end of file
diff --git a/method_comparison/MetaMathQA/results/road--llama-3.2-3B-lr_0.001.json b/method_comparison/MetaMathQA/results/road--llama-3.2-3B-lr_0.001.json
new file mode 100644
index 0000000000..36f2a4fa36
--- /dev/null
+++ b/method_comparison/MetaMathQA/results/road--llama-3.2-3B-lr_0.001.json
@@ -0,0 +1,347 @@
+{
+  "run_info": {
+    "created_at": "2025-10-23T20:17:06+00:00",
+    "total_time": 2179.066774046005,
+    "experiment_name": "road/llama-3.2-3B-lr_0.001",
+    "peft_branch": "main",
+    "train_config": {
+      "model_id": "meta-llama/Llama-3.2-3B",
+      "dtype": "bfloat16",
+      "max_seq_length": 768,
+      "batch_size": 4,
+      "batch_size_eval": 50,
+      "max_steps": 5000,
+      "eval_steps": 250,
+      "compile": false,
+      "query_template": "Question: {query} Think step by step.\nAnswer:",
+      "seed": 0,
+      "grad_norm_clip": 1.0,
+      "optimizer_type": "AdamW",
+      "optimizer_kwargs": {
+        "lr": 0.001
+      },
+      "lr_scheduler": "cosine",
+      "use_amp": false,
+      "autocast_adapter_dtype": true,
+      "generation_kwargs": {
+        "max_length": 800,
+        "max_new_tokens": 300
+      },
+      "attn_implementation": null
+    },
+    "peft_config": {
+      "task_type": null,
+      "peft_type": "ROAD",
+      "auto_mapping": null,
+      "peft_version": "0.17.2.dev0@UNKNOWN",
+      "base_model_name_or_path": "meta-llama/Llama-3.2-3B",
+      "revision": null,
+      "inference_mode": false,
+      "variant": "road_2",
+      "group_size": 64,
+      "init_weights": true,
+      "target_modules": [
+        "v_proj",
+        "q_proj"
+      ],
+      "modules_to_save": null
+    },
+    "error_msg": ""
+  },
+  "train_info": {
+    "accelerator_memory_reserved_avg": 11905060883,
+    "accelerator_memory_max": 22817013760,
+    "accelerator_memory_reserved_99th": 18119540080,
+    "train_time": 1957.655842856002,
+    "file_size": 931480,
+    "num_trainable_params": 229376,
+    "num_total_params": 3212979200,
+    "status": "success",
+    "metrics": [
+      {
+        "step": 250,
+        "valid accuracy": 0.2,
+        "train loss": 1.1802424001693725,
+        "train samples": 1000,
+        "train time": 35.46842726102477,
+        "eval time": 12.287958328997775,
+        "tokens / sec": 5969.224359509503,
+        "mem allocated avg": 6782454061.056,
+        "mem reserved avg": 11957121843.2,
+        "elapsed time": 106.62609866999992
+      },
+      {
+        "step": 500,
+        "valid accuracy": 0.32,
+        "train loss": 0.8729077637195587,
+        "train samples": 2000,
+        "train time": 34.766947779018665,
+        "eval time": 12.224552476000099,
+        "tokens / sec": 5982.549901188677,
+        "mem allocated avg": 6774821242.88,
+        "mem reserved avg": 11843875635.2,
+        "elapsed time": 200.47699543899944
+      },
+      {
+        "step": 750,
+        "valid accuracy": 0.38,
+        "train loss": 0.7739069720506668,
+        "train samples": 3000,
+        "train time": 35.457704256870784,
+        "eval time": 9.125866555994435,
+        "tokens / sec": 6046.668967815496,
+        "mem allocated avg": 6785753763.84,
+        "mem reserved avg": 12008544010.24,
+        "elapsed time": 292.6061148650042
+      },
+      {
+        "step": 1000,
+        "valid accuracy": 0.36,
+        "train loss": 0.7330719463825226,
+        "train samples": 4000,
+        "train time": 34.75414815990371,
+        "eval time": 12.21972047399322,
+        "tokens / sec": 5994.564995276156,
+        "mem allocated avg": 6776829087.744,
+        "mem reserved avg": 11869007904.768,
+        "elapsed time": 386.3837600199986
+      },
+      {
+        "step": 1250,
+        "valid accuracy": 0.32,
+        "train loss": 0.7227181429862977,
+        "train samples": 5000,
+        "train time": 34.886374148009054,
+        "eval time": 12.182244757997978,
+        "tokens / sec": 5977.634680957555,
+        "mem allocated avg": 6777144475.648,
+        "mem reserved avg": 11868110323.712,
+        "elapsed time": 480.67707514700305
+      },
+      {
+        "step": 1500,
+        "valid accuracy": 0.42,
+        "train loss": 0.7143287745714187,
+        "train samples": 6000,
+        "train time": 34.91908030194463,
+        "eval time": 12.299323118000757,
+        "tokens / sec": 5994.7455141979335,
+        "mem allocated avg": 6777638397.952,
+        "mem reserved avg": 11895960502.272,
+        "elapsed time": 575.0448827479995
+      },
+      {
+        "step": 1750,
+        "valid accuracy": 0.44,
+        "train loss": 0.703706993818283,
+        "train samples": 7000,
+        "train time": 34.91686388308881,
+        "eval time": 12.217809029003547,
+        "tokens / sec": 5995.813389798628,
+        "mem allocated avg": 6779787098.112,
+        "mem reserved avg": 11905875836.928,
+        "elapsed time": 669.3053597020044
+      },
+      {
+        "step": 2000,
+        "valid accuracy": 0.32,
+        "train loss": 0.7052462505102157,
+        "train samples": 8000,
+        "train time": 34.839815382081724,
+        "eval time": 7.910366256000998,
+        "tokens / sec": 5961.455240856959,
+        "mem allocated avg": 6776694951.936,
+        "mem reserved avg": 11852641730.56,
+        "elapsed time": 759.2260113459997
+      },
+      {
+        "step": 2250,
+        "valid accuracy": 0.38,
+        "train loss": 0.6979660025835037,
+        "train samples": 9000,
+        "train time": 35.489929292030865,
+        "eval time": 12.21494767999684,
+        "tokens / sec": 6056.591384876774,
+        "mem allocated avg": 6788274759.68,
+        "mem reserved avg": 12044455641.088,
+        "elapsed time": 854.9085956600029
+      },
+      {
+        "step": 2500,
+        "valid accuracy": 0.4,
+        "train loss": 0.6967317589521408,
+        "train samples": 10000,
+        "train time": 34.54721695394983,
+        "eval time": 12.251032476997352,
+        "tokens / sec": 5961.898472879782,
+        "mem allocated avg": 6773104156.672,
+        "mem reserved avg": 11792201809.92,
+        "elapsed time": 948.5542301430032
+      },
+      {
+        "step": 2750,
+        "valid accuracy": 0.36,
+        "train loss": 0.6901429216861725,
+        "train samples": 11000,
+        "train time": 35.248878062957374,
+        "eval time": 12.229859940001916,
+        "tokens / sec": 6010.999828748116,
+        "mem allocated avg": 6784006227.968,
+        "mem reserved avg": 11973513183.232,
+        "elapsed time": 1043.7242833290002
+      },
+      {
+        "step": 3000,
+        "valid accuracy": 0.42,
+        "train loss": 0.6821614302396775,
+        "train samples": 12000,
+        "train time": 34.93150638397492,
+        "eval time": 7.433937801004504,
+        "tokens / sec": 5975.4365501900265,
+        "mem allocated avg": 6778632783.872,
+        "mem reserved avg": 11887857106.944,
+        "elapsed time": 1133.1159700200005
+      },
+      {
+        "step": 3250,
+        "valid accuracy": 0.42,
+        "train loss": 0.691840036034584,
+        "train samples": 13000,
+        "train time": 34.95601586808334,
+        "eval time": 12.231384652004635,
+        "tokens / sec": 6033.324873060365,
+        "mem allocated avg": 6780070547.456,
+        "mem reserved avg": 11917074628.608,
+        "elapsed time": 1227.7147229480033
+      },
+      {
+        "step": 3500,
+        "valid accuracy": 0.42,
+        "train loss": 0.6793323725461959,
+        "train samples": 14000,
+        "train time": 35.1950762630222,
+        "eval time": 7.8389490870031295,
+        "tokens / sec": 5959.640446080646,
+        "mem allocated avg": 6779174166.528,
+        "mem reserved avg": 11899424997.376,
+        "elapsed time": 1318.1422208670047
+      },
+      {
+        "step": 3750,
+        "valid accuracy": 0.44,
+        "train loss": 0.6769173287153244,
+        "train samples": 15000,
+        "train time": 35.7101883490468,
+        "eval time": 12.202735990998917,
+        "tokens / sec": 6068.380202362734,
+        "mem allocated avg": 6789275869.184,
+        "mem reserved avg": 12087027826.688,
+        "elapsed time": 1414.2409790489983
+      },
+      {
+        "step": 4000,
+        "valid accuracy": 0.42,
+        "train loss": 0.693774617433548,
+        "train samples": 16000,
+        "train time": 34.639687986935314,
+        "eval time": 6.860093137001968,
+        "tokens / sec": 5899.966537720583,
+        "mem allocated avg": 6771220369.408,
+        "mem reserved avg": 11770911522.816,
+        "elapsed time": 1502.9286165810045
+      },
+      {
+        "step": 4250,
+        "valid accuracy": 0.42,
+        "train loss": 0.6743522936105728,
+        "train samples": 17000,
+        "train time": 35.23996867898677,
+        "eval time": 8.069594663997123,
+        "tokens / sec": 5998.558112398354,
+        "mem allocated avg": 6781373200.384,
+        "mem reserved avg": 11933046538.24,
+        "elapsed time": 1593.8458517020044
+      },
+      {
+        "step": 4500,
+        "valid accuracy": 0.44,
+        "train loss": 0.6836657630205154,
+        "train samples": 18000,
+        "train time": 34.69980391602439,
+        "eval time": 12.216164864999882,
+        "tokens / sec": 5989.025197460252,
+        "mem allocated avg": 6776051484.672,
+        "mem reserved avg": 11847029751.808,
+        "elapsed time": 1687.6608126920037
+      },
+      {
+        "step": 4750,
+        "valid accuracy": 0.38,
+        "train loss": 0.6764673949480057,
+        "train samples": 19000,
+        "train time": 35.06901030093286,
+        "eval time": 12.21244175699394,
+        "tokens / sec": 5986.453515467914,
+        "mem allocated avg": 6778213396.48,
+        "mem reserved avg": 11901983522.816,
+        "elapsed time": 1782.2256710229994
+      },
+      {
+        "step": 5000,
+        "valid accuracy": 0.38,
+        "train loss": 0.683658688902855,
+        "train samples": 20000,
+        "train time": 34.891452592011774,
+        "eval time": 12.243604967006831,
+        "tokens / sec": 5969.370276309009,
+        "mem allocated avg": 6775612205.056,
+        "mem reserved avg": 11845553356.8,
+        "elapsed time": 1876.6560215470017
+      },
+      {
+        "step": 5000,
+        "test accuracy": 0.39651250947687644,
+        "train loss": 0.683658688902855,
+        "train samples": 20000,
+        "train total tokens": 4198051
+      }
+    ]
+  },
+  "meta_info": {
+    "model_info": {
+      "sha": "13afe5124825b4f3751f836b40dafda64c1ed062",
+      "created_at": "2024-09-18T15:23:48+00:00"
+    },
+    "dataset_info": {
+      "metamath": {
+        "sha": "aa4f34d3d2d3231299b5b03d9b3e5a20da45aa18",
+        "created_at": "2023-09-21T17:22:46+00:00"
+      },
+      "gsm8k": {
+        "sha": "e53f048856ff4f594e959d75785d2c2d37b678ee",
+        "created_at": "2022-04-12T10:22:10+00:00"
+      }
+    },
+    "package_info": {
+      "transformers-version": "4.57.1",
+      "transformers-commit-hash": null,
+      "peft-version": "0.17.2.dev0",
+      "peft-commit-hash": "a18ba67f242ab2eb74cdabab76ea2fd836b5cd83",
+      "datasets-version": "4.2.0",
+      "datasets-commit-hash": null,
+      "bitsandbytes-version": "0.46.0",
+      "bitsandbytes-commit-hash": null,
+      "torch-version": "2.9.0+cu128",
+      "torch-commit-hash": null
+    },
+    "system_info": {
+      "system": "Linux",
+      "release": "6.14.0-1014-aws",
+      "version": "#14~24.04.1-Ubuntu SMP Tue Sep 23 14:51:14 UTC 2025",
+      "machine": "x86_64",
+      "processor": "x86_64",
+      "accelerator": "NVIDIA L40S"
+    },
+    "pytorch_info": "PyTorch built with:\n  - GCC 13.3\n  - C++ Version: 201703\n  - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n  - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n  - OpenMP 201511 (a.k.a. OpenMP 4.5)\n  - LAPACK is enabled (usually provided by MKL)\n  - NNPACK is enabled\n  - CPU capability usage: AVX2\n  - CUDA Runtime 12.8\n  - NVCC architecture flags: -gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90;-gencode;arch=compute_100,code=sm_100;-gencode;arch=compute_120,code=sm_120\n  - CuDNN 90.7.1\n    - Built with CuDNN 90.8\n  - Magma 2.6.1\n  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=0fabc3ba44823f257e70ce397d989c8de5e362c1, CUDA_VERSION=12.8, CUDNN_VERSION=9.8.0, CXX_COMPILER=/opt/rh/gcc-toolset-13/root/usr/bin/c++, CXX_FLAGS= -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -DC10_NODEPRECATED -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-dangling-reference -Wno-error=dangling-reference -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.9.0, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, USE_XCCL=OFF, USE_XPU=OFF, \n"
+  }
+}
\ No newline at end of file
diff --git a/method_comparison/MetaMathQA/results/waveft--llama-3.2-3B-n_frequency-5000.json b/method_comparison/MetaMathQA/results/waveft--llama-3.2-3B-n_frequency-5000.json
new file mode 100644
index 0000000000..04e997d6ff
--- /dev/null
+++ b/method_comparison/MetaMathQA/results/waveft--llama-3.2-3B-n_frequency-5000.json
@@ -0,0 +1,358 @@
+{
+  "run_info": {
+    "created_at": "2025-10-23T20:53:30+00:00",
+    "total_time": 3265.5213168810005,
+    "experiment_name": "waveft/llama-3.2-3B-n_frequency-5000",
+    "peft_branch": "main",
+    "train_config": {
+      "model_id": "meta-llama/Llama-3.2-3B",
+      "dtype": "bfloat16",
+      "max_seq_length": 768,
+      "batch_size": 4,
+      "batch_size_eval": 50,
+      "max_steps": 5000,
+      "eval_steps": 250,
+      "compile": false,
+      "query_template": "Question: {query} Think step by step.\nAnswer:",
+      "seed": 0,
+      "grad_norm_clip": 1.0,
+      "optimizer_type": "AdamW",
+      "optimizer_kwargs": {
+        "lr": 0.0001,
+        "weight_decay": 0.1
+      },
+      "lr_scheduler": "cosine",
+      "use_amp": false,
+      "autocast_adapter_dtype": true,
+      "generation_kwargs": {
+        "max_length": 800,
+        "max_new_tokens": 300
+      },
+      "attn_implementation": null
+    },
+    "peft_config": {
+      "task_type": "CAUSAL_LM",
+      "peft_type": "WAVEFT",
+      "auto_mapping": null,
+      "peft_version": "0.17.2.dev0@UNKNOWN",
+      "base_model_name_or_path": "meta-llama/Llama-3.2-3B",
+      "revision": null,
+      "inference_mode": false,
+      "n_frequency": 5000,
+      "scaling": 25.0,
+      "wavelet_family": "db1",
+      "use_idwt": true,
+      "random_loc_seed": 777,
+      "fan_in_fan_out": false,
+      "target_modules": [
+        "q_proj",
+        "v_proj"
+      ],
+      "exclude_modules": null,
+      "bias": "none",
+      "modules_to_save": null,
+      "layers_to_transform": null,
+      "layers_pattern": null,
+      "n_frequency_pattern": {},
+      "proportional_parameters": false,
+      "init_weights": true
+    },
+    "error_msg": ""
+  },
+  "train_info": {
+    "accelerator_memory_reserved_avg": 14582950762,
+    "accelerator_memory_max": 24914165760,
+    "accelerator_memory_reserved_99th": 20564693483,
+    "train_time": 2783.132204494017,
+    "file_size": 1127304,
+    "num_trainable_params": 280000,
+    "num_total_params": 3213029824,
+    "status": "success",
+    "metrics": [
+      {
+        "step": 250,
+        "valid accuracy": 0.34,
+        "train loss": 1.057445770263672,
+        "train samples": 1000,
+        "train time": 66.99218972800008,
+        "eval time": 29.364740246994188,
+        "tokens / sec": 3160.3534809000257,
+        "mem allocated avg": 6784647395.328,
+        "mem reserved avg": 14636350963.712,
+        "elapsed time": 149.00784370000474
+      },
+      {
+        "step": 500,
+        "valid accuracy": 0.34,
+        "train loss": 0.7512865424156189,
+        "train samples": 2000,
+        "train time": 65.21943290103809,
+        "eval time": 29.0264903579955,
+        "tokens / sec": 3189.156831762169,
+        "mem allocated avg": 6776712450.048,
+        "mem reserved avg": 14526627971.072,
+        "elapsed time": 283.5782854230056
+      },
+      {
+        "step": 750,
+        "valid accuracy": 0.34,
+        "train loss": 0.6967935096025467,
+        "train samples": 3000,
+        "train time": 66.11754105806904,
+        "eval time": 22.38963912300096,
+        "tokens / sec": 3242.724949672555,
+        "mem allocated avg": 6786416594.944,
+        "mem reserved avg": 14687840239.616,
+        "elapsed time": 412.45540837700537
+      },
+      {
+        "step": 1000,
+        "valid accuracy": 0.36,
+        "train loss": 0.6762894586324691,
+        "train samples": 4000,
+        "train time": 65.32631866908923,
+        "eval time": 22.478863092997926,
+        "tokens / sec": 3189.1587379250154,
+        "mem allocated avg": 6778948349.952,
+        "mem reserved avg": 14555082129.408,
+        "elapsed time": 540.8314235460057
+      },
+      {
+        "step": 1250,
+        "valid accuracy": 0.38,
+        "train loss": 0.6724052220582962,
+        "train samples": 5000,
+        "train time": 65.4501353219166,
+        "eval time": 29.181654504995095,
+        "tokens / sec": 3186.2118997051034,
+        "mem allocated avg": 6778097270.784,
+        "mem reserved avg": 14548866170.88,
+        "elapsed time": 675.7093939700062
+      },
+      {
+        "step": 1500,
+        "valid accuracy": 0.48,
+        "train loss": 0.6683271112442016,
+        "train samples": 6000,
+        "train time": 65.66567935897183,
+        "eval time": 19.425667663999775,
+        "tokens / sec": 3187.8296553616533,
+        "mem allocated avg": 6779162421.248,
+        "mem reserved avg": 14573251854.336,
+        "elapsed time": 801.0151559639999
+      },
+      {
+        "step": 1750,
+        "valid accuracy": 0.54,
+        "train loss": 0.6589902213811875,
+        "train samples": 7000,
+        "train time": 65.90766002406599,
+        "eval time": 24.991644790003193,
+        "tokens / sec": 3176.48965118098,
+        "mem allocated avg": 6781186451.456,
+        "mem reserved avg": 14585163677.696,
+        "elapsed time": 932.4019877410028
+      },
+      {
+        "step": 2000,
+        "valid accuracy": 0.42,
+        "train loss": 0.6641829339265823,
+        "train samples": 8000,
+        "train time": 65.52569843604579,
+        "eval time": 29.105937477994303,
+        "tokens / sec": 3169.6876944045835,
+        "mem allocated avg": 6777191518.208,
+        "mem reserved avg": 14533355634.688,
+        "elapsed time": 1067.1376723650028
+      },
+      {
+        "step": 2250,
+        "valid accuracy": 0.4,
+        "train loss": 0.6568171486854554,
+        "train samples": 9000,
+        "train time": 66.60324803898402,
+        "eval time": 18.03150882799673,
+        "tokens / sec": 3227.290054595945,
+        "mem allocated avg": 6789178621.952,
+        "mem reserved avg": 14712368529.408,
+        "elapsed time": 1192.7977026480003
+      },
+      {
+        "step": 2500,
+        "valid accuracy": 0.42,
+        "train loss": 0.6552880892753601,
+        "train samples": 10000,
+        "train time": 64.93178476598405,
+        "eval time": 29.03553620800085,
+        "tokens / sec": 3172.0520349519234,
+        "mem allocated avg": 6774276726.784,
+        "mem reserved avg": 14475549736.96,
+        "elapsed time": 1327.1052960740053
+      },
+      {
+        "step": 2750,
+        "valid accuracy": 0.42,
+        "train loss": 0.6487538056373596,
+        "train samples": 11000,
+        "train time": 66.10884880107187,
+        "eval time": 20.587333617004333,
+        "tokens / sec": 3205.0323646925253,
+        "mem allocated avg": 6784980387.84,
+        "mem reserved avg": 14651936997.376,
+        "elapsed time": 1454.7400554460037
+      },
+      {
+        "step": 3000,
+        "valid accuracy": 0.4,
+        "train loss": 0.6414109219312668,
+        "train samples": 12000,
+        "train time": 65.51606800403533,
+        "eval time": 17.688484279002296,
+        "tokens / sec": 3185.951269040499,
+        "mem allocated avg": 6780026255.36,
+        "mem reserved avg": 14562925477.888,
+        "elapsed time": 1578.3467425210038
+      },
+      {
+        "step": 3250,
+        "valid accuracy": 0.4,
+        "train loss": 0.6511869001388549,
+        "train samples": 13000,
+        "train time": 65.77464669098845,
+        "eval time": 22.121913303002657,
+        "tokens / sec": 3206.4178313388766,
+        "mem allocated avg": 6781515575.296,
+        "mem reserved avg": 14588720447.488,
+        "elapsed time": 1706.5400248380029
+      },
+      {
+        "step": 3500,
+        "valid accuracy": 0.46,
+        "train loss": 0.637642817735672,
+        "train samples": 14000,
+        "train time": 65.76092355793662,
+        "eval time": 29.041672920000565,
+        "tokens / sec": 3189.584158063204,
+        "mem allocated avg": 6779834134.528,
+        "mem reserved avg": 14574015217.664,
+        "elapsed time": 1842.0509184040056
+      },
+      {
+        "step": 3750,
+        "valid accuracy": 0.42,
+        "train loss": 0.6350828701257706,
+        "train samples": 15000,
+        "train time": 66.55924862711254,
+        "eval time": 20.789683652998065,
+        "tokens / sec": 3255.790960232193,
+        "mem allocated avg": 6791231805.44,
+        "mem reserved avg": 14752080199.68,
+        "elapsed time": 1970.2113445850046
+      },
+      {
+        "step": 4000,
+        "valid accuracy": 0.38,
+        "train loss": 0.65046697640419,
+        "train samples": 16000,
+        "train time": 65.04778776894818,
+        "eval time": 19.624021877003543,
+        "tokens / sec": 3141.890093571505,
+        "mem allocated avg": 6772911845.376,
+        "mem reserved avg": 14460534128.64,
+        "elapsed time": 2095.6094995790045
+      },
+      {
+        "step": 4250,
+        "valid accuracy": 0.42,
+        "train loss": 0.6331748945713043,
+        "train samples": 17000,
+        "train time": 65.85189565200562,
+        "eval time": 23.701296111001284,
+        "tokens / sec": 3210.067043735313,
+        "mem allocated avg": 6782308450.304,
+        "mem reserved avg": 14607057944.576,
+        "elapsed time": 2225.6684009890014
+      },
+      {
+        "step": 4500,
+        "valid accuracy": 0.4,
+        "train loss": 0.641278461933136,
+        "train samples": 18000,
+        "train time": 65.11867782095214,
+        "eval time": 23.630613847002678,
+        "tokens / sec": 3191.3731505944966,
+        "mem allocated avg": 6778411657.216,
+        "mem reserved avg": 14525831053.312,
+        "elapsed time": 2354.2676229330027
+      },
+      {
+        "step": 4750,
+        "valid accuracy": 0.4,
+        "train loss": 0.6345745379924774,
+        "train samples": 19000,
+        "train time": 65.44978067000193,
+        "eval time": 23.75194463099615,
+        "tokens / sec": 3207.634889695251,
+        "mem allocated avg": 6780527521.792,
+        "mem reserved avg": 14582739369.984,
+        "elapsed time": 2484.0077965070013
+      },
+      {
+        "step": 5000,
+        "valid accuracy": 0.44,
+        "train loss": 0.6398445825576782,
+        "train samples": 20000,
+        "train time": 65.40377733100468,
+        "eval time": 19.663959343997703,
+        "tokens / sec": 3184.5255503502062,
+        "mem allocated avg": 6777134090.24,
+        "mem reserved avg": 14518717513.728,
+        "elapsed time": 2609.523235476001
+      },
+      {
+        "step": 5000,
+        "test accuracy": 0.4162244124336619,
+        "train loss": 0.6398445825576782,
+        "train samples": 20000,
+        "train total tokens": 4198051
+      }
+    ]
+  },
+  "meta_info": {
+    "model_info": {
+      "sha": "13afe5124825b4f3751f836b40dafda64c1ed062",
+      "created_at": "2024-09-18T15:23:48+00:00"
+    },
+    "dataset_info": {
+      "metamath": {
+        "sha": "aa4f34d3d2d3231299b5b03d9b3e5a20da45aa18",
+        "created_at": "2023-09-21T17:22:46+00:00"
+      },
+      "gsm8k": {
+        "sha": "e53f048856ff4f594e959d75785d2c2d37b678ee",
+        "created_at": "2022-04-12T10:22:10+00:00"
+      }
+    },
+    "package_info": {
+      "transformers-version": "4.57.1",
+      "transformers-commit-hash": null,
+      "peft-version": "0.17.2.dev0",
+      "peft-commit-hash": "a18ba67f242ab2eb74cdabab76ea2fd836b5cd83",
+      "datasets-version": "4.2.0",
+      "datasets-commit-hash": null,
+      "bitsandbytes-version": "0.46.0",
+      "bitsandbytes-commit-hash": null,
+      "torch-version": "2.9.0+cu128",
+      "torch-commit-hash": null
+    },
+    "system_info": {
+      "system": "Linux",
+      "release": "6.14.0-1014-aws",
+      "version": "#14~24.04.1-Ubuntu SMP Tue Sep 23 14:51:14 UTC 2025",
+      "machine": "x86_64",
+      "processor": "x86_64",
+      "accelerator": "NVIDIA L40S"
+    },
+    "pytorch_info": "PyTorch built with:\n  - GCC 13.3\n  - C++ Version: 201703\n  - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n  - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n  - OpenMP 201511 (a.k.a. OpenMP 4.5)\n  - LAPACK is enabled (usually provided by MKL)\n  - NNPACK is enabled\n  - CPU capability usage: AVX2\n  - CUDA Runtime 12.8\n  - NVCC architecture flags: -gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90;-gencode;arch=compute_100,code=sm_100;-gencode;arch=compute_120,code=sm_120\n  - CuDNN 90.7.1\n    - Built with CuDNN 90.8\n  - Magma 2.6.1\n  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=0fabc3ba44823f257e70ce397d989c8de5e362c1, CUDA_VERSION=12.8, CUDNN_VERSION=9.8.0, CXX_COMPILER=/opt/rh/gcc-toolset-13/root/usr/bin/c++, CXX_FLAGS= -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -DC10_NODEPRECATED -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-dangling-reference -Wno-error=dangling-reference -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.9.0, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, USE_XCCL=OFF, USE_XPU=OFF, \n"
+  }
+}
\ No newline at end of file