diff --git a/NAMESPACE b/NAMESPACE
index 00b6170..8878dc0 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -12,6 +12,10 @@ export(get_hierarchical_clusters)
 export(get_robust_colocalization)
 export(get_robust_ucos)
 export(get_ucos_summary)
+importFrom(Rfast,correls)
+importFrom(Rfast,med)
+importFrom(Rfast,standardise)
+importFrom(Rfast,upper_tri)
 importFrom(grDevices,adjustcolor)
 importFrom(graphics,abline)
 importFrom(graphics,axis)
diff --git a/R/colocboost.R b/R/colocboost.R
index 3f9f860..02e3005 100644
--- a/R/colocboost.R
+++ b/R/colocboost.R
@@ -22,12 +22,20 @@
 #'                  \code{variant} is required if sumstat for different outcomes do not have the same number of variables.
 #'                  \code{var_y} is the variance of phenotype (default is 1 meaning that the Y is in the \dQuote{standardized} scale).
 #' @param LD A list of correlation matrix indicating the LD matrix for each genotype. It also could be a single matrix if all sumstats were
-#'           obtained from the same genotypes.
+#'           obtained from the same genotypes. Provide either \code{LD} or \code{X_ref}, not both.
+#'           If neither is provided, LD-free mode is used.
+#' @param X_ref A reference panel genotype matrix (N_ref x P) or a list of matrices, as an alternative to providing a precomputed
+#'              \code{LD} matrix. Column names must include variant names matching those in \code{sumstat}.
+#'              When the number of reference panel samples is less than the number of variants (N_ref < P),
+#'              this avoids storing the full P x P LD matrix and reduces memory usage.
+#'              When N_ref >= P, LD is precomputed from \code{X_ref} internally.
+#'              Provide either \code{LD} or \code{X_ref}, not both. If neither is provided, LD-free mode is used.
 #' @param dict_YX A L by 2 matrix of dictionary for \code{X} and \code{Y} if there exist subsets of outcomes corresponding to the same X matrix.
 #'                  The first column should be 1:L for L outcomes. The second column should be the index of \code{X} corresponding to the outcome.
 #'                  The innovation: do not provide the same matrix in \code{X} to reduce the computational burden.
-#' @param dict_sumstatLD A L by 2 matrix of dictionary for \code{sumstat} and \code{LD} if there exist subsets of outcomes corresponding to the same sumstat.
-#'                  The first column should be 1:L for L sumstat The second column should be the index of \code{LD} corresponding to the sumstat.
+#' @param dict_sumstatLD A L by 2 matrix of dictionary for \code{sumstat} and \code{LD} (or \code{X_ref}) if there exist subsets of outcomes
+#'                  corresponding to the same sumstat.
+#'                  The first column should be 1:L for L sumstat The second column should be the index of \code{LD} (or \code{X_ref}) corresponding to the sumstat.
 #'                  The innovation: do not provide the same matrix in \code{LD} to reduce the computational burden.
 #' @param outcome_names The names of outcomes, which has the same order for Y.
 #' @param focal_outcome_idx The index of the focal outcome if perform GWAS-xQTL ColocBoost
@@ -129,9 +137,10 @@
 #'
 #' @family colocboost
 #' @importFrom stats na.omit
+#' @importFrom Rfast correls standardise upper_tri med
 #' @export
 colocboost <- function(X = NULL, Y = NULL, # individual data
-                       sumstat = NULL, LD = NULL, # summary statistics: either Z, bhat, sebhat, N, var_Y,
+                       sumstat = NULL, LD = NULL, X_ref = NULL, # summary statistics: either Z, bhat, sebhat, N, var_Y,
                        ###### - index dict for X match multiple Y / LD match multiple sumstat
                        dict_YX = NULL, # Y index for 1st column, X index for 2nd column
                        dict_sumstatLD = NULL, # sumstat index for 1st column, LD index for 2nd column
@@ -202,11 +211,15 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
     warning("Error: No individual data (X, Y) or summary statistics (sumstat) or (effect_est, effect_se) are provided! Please check!")
     return(NULL)
   }
+  if (!is.null(LD) && !is.null(X_ref)) {
+    warning("Error: Provide either LD or X_ref, not both.")
+    return(NULL)
+  }
 
   # - check input data: individual level data and summary-level data
   validated_data <- colocboost_validate_input_data(
     X = X, Y = Y,
-    sumstat = sumstat, LD = LD,
+    sumstat = sumstat, LD = LD, X_ref = X_ref,
     dict_YX = dict_YX, dict_sumstatLD = dict_sumstatLD,
     effect_est = effect_est, effect_se = effect_se, effect_n = effect_n,
     overlap_variables = overlap_variables,
@@ -227,6 +240,8 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
   keep_variable_individual <- validated_data$keep_variable_individual
   sumstat <- validated_data$sumstat
   LD <- validated_data$LD
+  X_ref <- validated_data$X_ref
+  ref_label <- validated_data$ref_label
   sumstatLD_dict <- validated_data$sumstatLD_dict
   keep_variable_sumstat <- validated_data$keep_variable_sumstat
   Z <- validated_data$Z
@@ -277,7 +292,8 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
   }
   cb_data <- colocboost_init_data(
     X = X, Y = Y, dict_YX = yx_dict,
-    Z = Z, LD = LD, N_sumstat = N_sumstat, dict_sumstatLD = sumstatLD_dict,
+    Z = Z, LD = LD, X_ref = X_ref, ref_label = ref_label,
+    N_sumstat = N_sumstat, dict_sumstatLD = sumstatLD_dict,
     Var_y = Var_y, SeBhat = SeBhat,
     keep_variables = keep_variables,
     focal_outcome_idx = focal_outcome_idx,
@@ -377,7 +393,15 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
 #' @param X A list of genotype matrices for different outcomes, or a single matrix if all outcomes share the same genotypes.
 #' @param Y A list of vectors of outcomes or an N by L matrix if it is considered for the same X and multiple outcomes.
 #' @param sumstat A list of data.frames of summary statistics.
-#' @param LD A list of correlation matrices indicating the LD matrix for each genotype.
+#' @param LD A list of correlation matrix indicating the LD matrix for each genotype. It also could be a single matrix if all sumstats were
+#'           obtained from the same genotypes. Provide either \code{LD} or \code{X_ref}, not both.
+#'           If neither is provided, LD-free mode is used.
+#' @param X_ref A reference panel genotype matrix (N_ref x P) or a list of matrices, as an alternative to providing a precomputed
+#'              \code{LD} matrix. Column names must include variant names matching those in \code{sumstat}.
+#'              When the number of reference panel samples is less than the number of variants (N_ref < P),
+#'              this avoids storing the full P x P LD matrix and reduces memory usage.
+#'              When N_ref >= P, LD is precomputed from \code{X_ref} internally.
+#'              Provide either \code{LD} or \code{X_ref}, not both. If neither is provided, LD-free mode is used.
 #' @param dict_YX A L by 2 matrix of dictionary for X and Y if there exist subsets of outcomes corresponding to the same X matrix.
 #' @param dict_sumstatLD A L by 2 matrix of dictionary for sumstat and LD if there exist subsets of outcomes corresponding to the same sumstat.
 #' @param effect_est Matrix of variable regression coefficients (i.e. regression beta values) in the genomic region
@@ -394,6 +418,8 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
 #'   \item{keep_variable_individual}{List of variable names for each X matrix}
 #'   \item{sumstat}{Processed list of summary statistics data.frames}
 #'   \item{LD}{Processed list of LD matrices}
+#'   \item{X_ref}{Processed list of reference genotype matrices}
+#'   \item{ref_label}{Style of reference matrics}
 #'   \item{sumstatLD_dict}{Dictionary mapping sumstat to LD}
 #'   \item{keep_variable_sumstat}{List of variant names for each sumstat}
 #'   \item{Z}{List of z-scores for each outcome}
@@ -408,7 +434,7 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
 #'
 #' @keywords internal
 colocboost_validate_input_data <- function(X = NULL, Y = NULL,
-                                           sumstat = NULL, LD = NULL,
+                                           sumstat = NULL, LD = NULL, X_ref = NULL,
                                            dict_YX = NULL, dict_sumstatLD = NULL,
                                            effect_est = NULL, effect_se = NULL, effect_n = NULL,
                                            overlap_variables = FALSE,
@@ -418,7 +444,7 @@ colocboost_validate_input_data <- function(X = NULL, Y = NULL,
                                            cos_npc_cutoff = 0.2,
                                            npc_outcome_cutoff = 0.2) {
   
-  # - check individual level data
+  ############### Check individual level data ###########################
   if (!is.null(X) & !is.null(Y)) {
     # --- check input
     if (is.data.frame(X))  X <- as.matrix(X)
@@ -557,8 +583,9 @@ colocboost_validate_input_data <- function(X = NULL, Y = NULL,
   } else {
     yx_dict <- keep_variable_individual <- NULL
   }
+  ############### Done! Check individual level data ###########################
   
-  # - check summary-level data
+  ############### Check summary level data ###########################
   if ((!is.null(sumstat)) | (!is.null(effect_est) & !is.null(effect_se))) {
     # --- check input of (effect_est, effect_se)
     if ((!is.null(effect_est) & !is.null(effect_se))) {
@@ -649,8 +676,9 @@ colocboost_validate_input_data <- function(X = NULL, Y = NULL,
       }
       return(xx)
     })
+    ############### Done! Check summary level data ###########################
     
-    # --- check input of LD
+    ############### Check input of LD or reference X_ref ###########################
     M_updated <- M
     min_abs_corr_updated <- min_abs_corr
     jk_equiv_corr_updated <- jk_equiv_corr
@@ -659,10 +687,13 @@ colocboost_validate_input_data <- function(X = NULL, Y = NULL,
     cos_npc_cutoff_updated <- cos_npc_cutoff
     npc_outcome_cutoff_updated <- npc_outcome_cutoff
     
-    if (is.null(LD)) {
+    # --- Handle X_ref: convert to LD when N_ref >= P, or keep for on-the-fly computation
+    
+    
+    if (is.null(LD) && is.null(X_ref)) {
       # if no LD input, set diagonal matrix to LD
       warning(
-        "Providing the LD for summary statistics data is highly recommended. ",
+        "Providing the LD or X_ref for summary statistics data is highly recommended. ",
         "Without LD, only a single iteration will be performed under the assumption of one causal variable per outcome. ",
         "Additionally, the purity of CoS cannot be evaluated!"
       )
@@ -677,61 +708,98 @@ colocboost_validate_input_data <- function(X = NULL, Y = NULL,
       func_simplex_updated <- "only_z2z"
       cos_npc_cutoff_updated <- 0
       npc_outcome_cutoff_updated <- 0
+      ref_label <- "No_ref"
       
     } else {
       
-      if (is.data.frame(LD)) LD <- as.matrix(LD)
-      if (is.matrix(LD)) LD <- list(LD)
-      # - check if NA in LD matrix
-      num_na <- sapply(LD, sum)
-      if (any(is.na(num_na))){
-        warning("Error: Input LD must not contain missing values (NA).")
-        return(NULL)
+      # -- Determine reference list and variant extraction for LD or X_ref
+      if (!is.null(LD)){
+        
+        if (is.data.frame(LD)) LD <- as.matrix(LD)
+        if (is.matrix(LD)) LD <- list(LD)
+        # - check if NA in LD matrix
+        num_na <- sapply(LD, sum)
+        if (any(is.na(num_na))){
+          warning("Error: Input LD must not contain missing values (NA).")
+          return(NULL)
+        }
+        ref_list <- LD
+        ref_label <- "LD"
+        
+      } else {
+        if (is.data.frame(X_ref)) X_ref <- as.matrix(X_ref)
+        if (is.matrix(X_ref)) X_ref <- list(X_ref)
+        
+        # When N_ref >= P, precompute LD (avoids repeated O(N_ref*P) in boosting loop)
+        # When N_ref < P, keep X_ref for on-the-fly computation (avoids P*P memory)
+        all_large <- all(sapply(X_ref, function(xr) nrow(xr) >= ncol(xr)))
+        if (all_large) {
+          message("N_ref >= P: precomputing LD from X_ref.")
+          LD <- lapply(X_ref, function(xr) {
+            ld <- get_cormat(xr)
+            rownames(ld) <- colnames(ld) <- colnames(xr)
+            ld
+          })
+          X_ref <- NULL
+          ref_list <- LD
+          ref_label <- "LD"
+        } else {
+          # N_ref < P: standardize and keep for on-the-fly crossprod/(N_ref-1)
+          for (idx in seq_along(X_ref)) {
+            X_ref[[idx]] <- standardise(X_ref[[idx]], center = TRUE, scale = TRUE)
+            X_ref[[idx]][which(is.na(X_ref[[idx]]))] <- 0
+          }
+          ref_list <- X_ref
+          ref_label <- "X_ref"
+        }
       }
-      # Create sumstat-LD mapping ===
-      if (length(LD) == 1) {
+
+      # -- Create sumstat - LD/X_ref mapping ===
+      if (length(ref_list) == 1) {
         sumstatLD_dict <- rep(1, length(sumstat))
-      } else if (length(LD) == length(sumstat)) {
+      } else if (length(ref_list) == length(sumstat)) {
         sumstatLD_dict <- seq_along(sumstat)
       } else {
         if (is.null(dict_sumstatLD)) {
           warning('Error: Please provide dict_sumstatLD: you have ', length(sumstat), 
-                  ' sumstats but only ', length(LD), ' LD matrices')
+                  ' sumstats but only ', length(ref_list), ' ', ref_label, ' matrices')
           return(NULL)
         } else {
-          # - dict for sumstat to LD mapping
+          # - dict for sumstat to LD/X_ref mapping
           sumstatLD_dict <- rep(NA, length(sumstat))
           for (i in 1:length(sumstat)) {
             tmp <- unique(dict_sumstatLD[dict_sumstatLD[, 1] == i, 2])
             if (length(tmp) == 0) {
-              warning(paste("Error: You don't provide matched LD for sumstat", i))
+              warning("Error: You don't provide matched", ref_label, "for sumstat", i)
               return(NULL)
             } else if (length(tmp) != 1) {
-              warning(paste("Error: You provide multiple matched LD for sumstat", i))
+              warning("Error: You provide multiple matched", ref_label, "for sumstat", i)
               return(NULL)
             } else {
               sumstatLD_dict[i] <- tmp
             }
           }
-          if (max(sumstatLD_dict) > length(LD)) {
-            warning("Error: You don't provide enough LD matrices!")
+          if (max(sumstatLD_dict) > length(ref_list)) {
+            warning("Error: You don't provide enough", ref_label, "matrices!")
             return(NULL)
           }
         }
       }
       
-      # === Filter variants for each sumstat ===
+      # -- Filter variants for each sumstat
+      get_ref_variants <- function(ref_mat) colnames(ref_mat)
+      ref_ncol <- function(ref_mat) ncol(ref_mat)
       for (i in seq_along(sumstat)) {
         # Get sumstat variants (adjust field name based on your data structure)
         sumstat_variants <- sumstat[[i]]$variant
         n_total <- length(sumstat_variants)
-        # Get LD variants
+        # Get LD/X_ref variants
         ld_idx <- sumstatLD_dict[i]
-        current_ld <- LD[[ld_idx]]
-        ld_variants <- rownames(current_ld)
+        current_ref <- ref_list[[ld_idx]]
+        ld_variants <- get_ref_variants(current_ref)
         if (is.null(ld_variants)) {
-          if (ncol(current_ld) != n_total){
-            warning('Error: LD matrix ', ld_idx, ' has no rownames. Please ensure all LD matrices have variant names as rownames.')
+          if (ncol(current_ref) != n_total){
+            warning('Error: ', ref_label, ' matrix ', ld_idx, ' has no variant names. Please ensure all ', ref_label, ' matrices have variant names.')
             return(NULL)
           }
         }
@@ -755,6 +823,7 @@ colocboost_validate_input_data <- function(X = NULL, Y = NULL,
     keep_variable_sumstat <- lapply(sumstat, function(xx) {
       xx$variant
     })
+    ############### Done! Check input of LD or reference X_ref ###########################
     
     # - checking sample size existency
     n_exist <- sapply(sumstat, function(ss) {
@@ -834,7 +903,7 @@ colocboost_validate_input_data <- function(X = NULL, Y = NULL,
       Z[[i.summstat]] <- z
     }
   } else {
-    Z <- N_sumstat <- Var_y <- SeBhat <- sumstatLD_dict <- keep_variable_sumstat <- NULL
+    Z <- N_sumstat <- Var_y <- SeBhat <- sumstatLD_dict <- keep_variable_sumstat <- X_ref <- ref_label <- NULL
     M_updated <- M
     min_abs_corr_updated <- min_abs_corr
     jk_equiv_corr_updated <- jk_equiv_corr
@@ -851,6 +920,8 @@ colocboost_validate_input_data <- function(X = NULL, Y = NULL,
     keep_variable_individual = keep_variable_individual,
     sumstat = sumstat,
     LD = LD,
+    X_ref = X_ref,
+    ref_label = ref_label,
     sumstatLD_dict = sumstatLD_dict,
     keep_variable_sumstat = keep_variable_sumstat,
     Z = Z,
diff --git a/R/colocboost_assemble.R b/R/colocboost_assemble.R
index 94cbbe5..46e1d52 100644
--- a/R/colocboost_assemble.R
+++ b/R/colocboost_assemble.R
@@ -132,10 +132,13 @@ colocboost_assemble <- function(cb_obj,
           }
         }
         if (!is.null(cb_obj_single$cb_data$data[[1]][["XtY"]])) {
+          X_dict <- cb_obj$cb_data$dict[i]
           if (is.null(cb_obj_single$cb_data$data[[1]]$XtX)) {
-            X_dict <- cb_obj$cb_data$dict[i]
             cb_obj_single$cb_data$data[[1]]$XtX <- cb_obj$cb_data$data[[X_dict]]$XtX
           }
+          if (is.null(cb_obj_single$cb_data$data[[1]]$ref_label)) {
+            cb_obj_single$cb_data$data[[1]]$ref_label <- cb_obj$cb_data$data[[X_dict]]$ref_label
+          }
         }
         class(cb_obj_single) <- "colocboost"
         out_ucos_each <- colocboost_assemble_ucos(cb_obj_single,
diff --git a/R/colocboost_assemble_cos.R b/R/colocboost_assemble_cos.R
index 6a1243a..00a052c 100644
--- a/R/colocboost_assemble_cos.R
+++ b/R/colocboost_assemble_cos.R
@@ -43,7 +43,8 @@ colocboost_assemble_cos <- function(cb_obj,
         X = cb_data$data[[X_dict]]$X, Xcorr = cb_data$data[[X_dict]]$XtX,
         N = cb_data$data[[coloc_outcomes[iiii]]]$N, n = n_purity, coverage = sec_coverage_thresh,
         min_abs_corr = min_abs_corr, median_abs_corr = median_abs_corr,
-        miss_idx = cb_data$data[[coloc_outcomes[iiii]]]$variable_miss
+        miss_idx = cb_data$data[[coloc_outcomes[iiii]]]$variable_miss,
+        ref_label = cb_data$data[[X_dict]]$ref_label
       )
       check_purity[iiii] <- length(tmp) == 1
     }
@@ -83,7 +84,8 @@ colocboost_assemble_cos <- function(cb_obj,
         p_tmp <- matrix(get_purity(pos,
           X = cb_data$data[[X_dict]]$X,
           Xcorr = cb_data$data[[X_dict]]$XtX,
-          N = cb_data$data[[i]]$N, n = n_purity
+          N = cb_data$data[[i]]$N, n = n_purity,
+          ref_label = cb_data$data[[X_dict]]$ref_label
         ), 1, 3)
         purity <- c(purity, list(p_tmp))
       }
@@ -148,7 +150,8 @@ colocboost_assemble_cos <- function(cb_obj,
             X = cb_data$data[[X_dict]]$X, Xcorr = cb_data$data[[X_dict]]$XtX,
             N = cb_data$data[[coloc_outcomes[iiii]]]$N, n = n_purity, coverage = sec_coverage_thresh,
             min_abs_corr = min_abs_corr, median_abs_corr = median_abs_corr,
-            miss_idx = cb_data$data[[coloc_outcomes[iiii]]]$variable_miss
+            miss_idx = cb_data$data[[coloc_outcomes[iiii]]]$variable_miss,
+            ref_label = cb_data$data[[X_dict]]$ref_label
           )
           check_purity[iiii] <- length(tmp) == 1
         }
@@ -208,7 +211,8 @@ colocboost_assemble_cos <- function(cb_obj,
               X = cb_data$data[[X_dict]]$X, Xcorr = cb_data$data[[X_dict]]$XtX,
               N = cb_data$data[[coloc_outcomes[iiii]]]$N, n = n_purity, coverage = sec_coverage_thresh,
               min_abs_corr = min_abs_corr, median_abs_corr = median_abs_corr,
-              miss_idx = cb_data$data[[coloc_outcomes[iiii]]]$variable_miss
+              miss_idx = cb_data$data[[coloc_outcomes[iiii]]]$variable_miss,
+              ref_label = cb_data$data[[X_dict]]$ref_label
             )
             check_purity[[iiii]] <- tmp
           }
@@ -351,7 +355,8 @@ colocboost_assemble_cos <- function(cb_obj,
                 X = cb_data$data[[X_dict]]$X,
                 Xcorr = cb_data$data[[X_dict]]$XtX,
                 miss_idx = cb_data$data[[i]]$variable_miss,
-                P = cb_model_para$P
+                P = cb_model_para$P,
+                ref_label = cb_data$data[[X_dict]]$ref_label
               )
             }
             res <- Reduce(pmax, res)
@@ -438,7 +443,8 @@ colocboost_assemble_cos <- function(cb_obj,
           tmp <- matrix(get_purity(pos,
             X = cb_data$data[[X_dict]]$X,
             Xcorr = cb_data$data[[X_dict]]$XtX,
-            N = cb_data$data[[i3]]$N, n = n_purity
+            N = cb_data$data[[i3]]$N, n = n_purity,
+            ref_label = cb_data$data[[X_dict]]$ref_label
           ), 1, 3)
           p_tmp <- rbind(p_tmp, tmp)
         }
diff --git a/R/colocboost_assemble_ucos.R b/R/colocboost_assemble_ucos.R
index 3336f19..2b44f54 100644
--- a/R/colocboost_assemble_ucos.R
+++ b/R/colocboost_assemble_ucos.R
@@ -64,7 +64,8 @@ colocboost_assemble_ucos <- function(cb_obj_single,
       }
       purity <- matrix(get_purity(pos,
         X = cb_data$data[[1]]$X, Xcorr = cb_data$data[[1]]$XtX,
-        N = cb_data$data[[1]]$N, n = n_purity
+        N = cb_data$data[[1]]$N, n = n_purity,
+        ref_label = cb_data$data[[1]]$ref_label
       ), 1, 3)
       purity <- as.data.frame(purity)
       colnames(purity) <- c("min_abs_corr", "mean_abs_corr", "median_abs_corr")
@@ -131,7 +132,8 @@ colocboost_assemble_ucos <- function(cb_obj_single,
         X = cb_data$data[[1]]$X, Xcorr = cb_data$data[[1]]$XtX,
         N = cb_data$data[[1]]$N, n = n_purity, coverage = coverage,
         min_abs_corr = min_abs_corr, median_abs_corr = median_abs_corr,
-        miss_idx = cb_data$data[[1]]$variable_miss
+        miss_idx = cb_data$data[[1]]$variable_miss,
+        ref_label = cb_data$data[[1]]$ref_label
       )
       if (length(check_purity) != 0) {
         w <- w[check_purity]
@@ -237,7 +239,8 @@ colocboost_assemble_ucos <- function(cb_obj_single,
               X = cb_data$data[[1]]$X,
               Xcorr = cb_data$data[[1]]$XtX,
               miss_idx = cb_data$data[[1]]$variable_miss,
-              P = cb_model_para$P
+              P = cb_model_para$P,
+              ref_label = cb_data$data[[1]]$ref_label
             )
             min_between[i.between, j.between] <- min_between[j.between, i.between] <- res[1]
             max_between[i.between, j.between] <- max_between[j.between, i.between] <- res[2]
@@ -301,7 +304,8 @@ colocboost_assemble_ucos <- function(cb_obj_single,
             purity,
             matrix(get_purity(pos,
               X = cb_data$data[[1]]$X, Xcorr = cb_data$data[[1]]$XtX,
-              N = cb_data$data[[1]]$N, n = n_purity
+              N = cb_data$data[[1]]$N, n = n_purity,
+              ref_label = cb_data$data[[1]]$ref_label
             ), 1, 3)
           )
       }
diff --git a/R/colocboost_check_update_jk.R b/R/colocboost_check_update_jk.R
index d449b03..57ebbc3 100644
--- a/R/colocboost_check_update_jk.R
+++ b/R/colocboost_check_update_jk.R
@@ -122,7 +122,9 @@ boost_check_update_jk_nofocal <- function(cb_model, cb_model_para, cb_data) {
             YtY = data_update[[ii]]$YtY,
             XtY = data_update[[ii]]$XtY,
             beta_k = model_update[[ii]]$beta,
-            miss_idx = data_update[[ii]]$variable_miss
+            miss_idx = data_update[[ii]]$variable_miss,
+            ref_label = cb_data$data[[X_dict[ii]]]$ref_label,
+            XtX_beta_cache = model_update[[ii]]$XtX_beta_cache
           )
         })
         change_res_each <- as.numeric(unlist(change_res_each))
@@ -197,7 +199,9 @@ boost_check_update_jk_nofocal <- function(cb_model, cb_model_para, cb_data) {
             YtY = data_update[[ii]]$YtY,
             XtY = data_update[[ii]]$XtY,
             beta_k = model_update[[ii]]$beta,
-            miss_idx = data_update[[ii]]$variable_miss
+            miss_idx = data_update[[ii]]$variable_miss,
+            ref_label = cb_data$data[[X_dict[ii]]]$ref_label,
+            XtX_beta_cache = model_update[[ii]]$XtX_beta_cache
           )
         })
         change_res_each <- as.numeric(unlist(change_res_each))
@@ -241,7 +245,9 @@ boost_check_update_jk_nofocal <- function(cb_model, cb_model_para, cb_data) {
             YtY = data_update[[ii]]$YtY,
             XtY = data_update[[ii]]$XtY,
             beta_k = model_update[[ii]]$beta,
-            miss_idx = data_update[[ii]]$variable_miss
+            miss_idx = data_update[[ii]]$variable_miss,
+            ref_label = cb_data$data[[X_dict[ii]]]$ref_label,
+            XtX_beta_cache = model_update[[ii]]$XtX_beta_cache
           )
         })
         change_res_each <- as.numeric(unlist(change_res_each))
@@ -368,7 +374,8 @@ boost_check_update_jk_focal <- function(cb_model, cb_model_para, cb_data,
           X = cb_data$data[[X_dict[pp_focal]]]$X,
           XtX = cb_data$data[[X_dict[pp_focal]]]$XtX,
           N = cb_data$data[[X_dict[pp_focal]]]$N,
-          remain_jk = 1:cb_model_para$P
+          remain_jk = 1:cb_model_para$P,
+          ref_label = cb_data$data[[X_dict[pp_focal]]]$ref_label
         )
       })
       # ----- second, if within the same LD buddies, select the following variants
@@ -379,7 +386,8 @@ boost_check_update_jk_focal <- function(cb_model, cb_model_para, cb_data,
         # ----- third, if picked variant within the same LD buddies
         ld_tmp <- get_LD_jk1_jk2(jk_focal, jk_focal_tmp,
           XtX = cb_data$data[[X_dict[pp_focal]]]$XtX,
-          remain_jk = 1:cb_model_para$P
+          remain_jk = 1:cb_model_para$P,
+          ref_label = cb_data$data[[X_dict[pp_focal]]]$ref_label
         )
         if (ld_tmp > jk_equiv_corr) {
           jk_focal <- jk_focal_tmp
@@ -491,7 +499,7 @@ boost_check_update_jk_focal <- function(cb_model, cb_model_para, cb_data,
 #' @importFrom stats cor
 get_LD_jk1_jk2 <- function(jk1, jk2,
                            X = NULL, XtX = NULL, N = NULL,
-                           remain_jk = NULL) {
+                           remain_jk = NULL, ref_label = "LD") {
   if (!is.null(X)) {
     LD_temp <- suppressWarnings({
       get_cormat(X[, c(jk1, jk2)])
@@ -499,12 +507,18 @@ get_LD_jk1_jk2 <- function(jk1, jk2,
     LD_temp[which(is.na(LD_temp))] <- 0
     LD_temp <- LD_temp[1, 2]
   } else if (!is.null(XtX)) {
-    if (length(XtX) == 1){
+    if (identical(ref_label, "No_ref")) {
       LD_temp <- 0
     } else {
       jk1.remain <- match(jk1, remain_jk)
       jk2.remain <- match(jk2, remain_jk)
-      LD_temp <- XtX[jk1.remain, jk2.remain]
+      if (identical(ref_label, "X_ref")) {
+        LD_temp <- suppressWarnings({ get_cormat(XtX[, c(jk1.remain, jk2.remain)]) })
+        LD_temp[which(is.na(LD_temp))] <- 0
+        LD_temp <- LD_temp[1, 2]
+      } else {
+        LD_temp <- XtX[jk1.remain, jk2.remain]
+      }
     }
   }
   return(LD_temp)
@@ -528,7 +542,8 @@ check_jk_jkeach <- function(jk, jk_each,
         X = cb_data$data[[X_dict[i]]]$X,
         XtX = cb_data$data[[X_dict[i]]]$XtX,
         N = data_update[[i]]$N,
-        remain_jk = setdiff(1:length(model_update[[i]]$res), data_update[[i]]$variable_miss)
+        remain_jk = setdiff(1:length(model_update[[i]]$res), data_update[[i]]$variable_miss),
+        ref_label = cb_data$data[[X_dict[i]]]$ref_label
       )
       judge[i] <- (change_each <= jk_equiv_loglik) & (abs(LD_temp) >= jk_equiv_corr)
     } else {
@@ -549,7 +564,7 @@ check_pair_jkeach <- function(jk_each,
   #' @importFrom stats cor
   get_LD_jk_each <- function(jk_each,
                              X = NULL, XtX = NULL, N = NULL,
-                             remain_jk = NULL) {
+                             remain_jk = NULL, ref_label = "LD") {
     if (!is.null(X)) {
       LD_temp <- suppressWarnings({
         get_cormat(X[, jk_each])
@@ -557,11 +572,15 @@ check_pair_jkeach <- function(jk_each,
       LD_temp[which(is.na(LD_temp))] <- 0
       # LD_temp <- LD_temp[1, 2]
     } else if (!is.null(XtX)) {
-      if (length(XtX) == 1){
+      if (identical(ref_label, "No_ref")) {
         LD_temp <- matrix(0, length(jk_each), length(jk_each))
       } else {
         jk.remain <- match(jk_each, remain_jk)
-        LD_temp <- XtX[jk.remain, jk.remain]
+        if (identical(ref_label, "X_ref")) {
+          LD_temp <- suppressWarnings({ get_cormat(XtX[, jk.remain]) })
+        } else {
+          LD_temp <- XtX[jk.remain, jk.remain]
+        }
         LD_temp[which(is.na(LD_temp))] <- 0
       }
     }
@@ -582,7 +601,8 @@ check_pair_jkeach <- function(jk_each,
                    X = cb_data$data[[X_dict[idx]]]$X,
                    XtX = cb_data$data[[X_dict[idx]]]$XtX,
                    N = data_update[[idx]]$N,
-                   remain_jk = setdiff(1:length(model_update[[idx]]$res), data_update[[idx]]$variable_miss)
+                   remain_jk = setdiff(1:length(model_update[[idx]]$res), data_update[[idx]]$variable_miss),
+                   ref_label = cb_data$data[[X_dict[idx]]]$ref_label
     )
   })
 
@@ -612,7 +632,9 @@ estimate_change_profile_res <- function(jk,
                                         X = NULL, res = NULL, N = NULL,
                                         XtX = NULL, YtY = NULL, XtY = NULL,
                                         beta_k = NULL,
-                                        miss_idx = NULL) {
+                                        miss_idx = NULL,
+                                        ref_label = "LD",
+                                        XtX_beta_cache = NULL) {
   if (!is.null(X)) {
     rtr <- sum(res^2) / (N - 1)
     xtr <- t(X[, jk]) %*% res / (N - 1)
@@ -629,10 +651,13 @@ estimate_change_profile_res <- function(jk,
     } else {
       xty <- XtY / scaling_factor
     }
-    if (length(xtx) == 1){
+    if (identical(ref_label, "No_ref")) {
       rtr <- yty - 2 * sum(beta_k * xty) + sum(beta_k^2)
+    } else if (!is.null(XtX_beta_cache)) {
+      rtr <- yty - 2 * sum(beta_k * xty) + sum(XtX_beta_cache * beta_k)
     } else {
-      rtr <- yty - 2 * sum(beta_k * xty) + sum((xtx %*% as.matrix(beta_k)) * beta_k)
+      XtX_beta <- compute_XtX_product(xtx, beta_k, ref_label)
+      rtr <- yty - 2 * sum(beta_k * xty) + sum(XtX_beta * beta_k)
     }
   }
   numerator <- xtr^2 / (2 * rtr)
diff --git a/R/colocboost_inference.R b/R/colocboost_inference.R
index ff9f312..6df5ab5 100644
--- a/R/colocboost_inference.R
+++ b/R/colocboost_inference.R
@@ -190,7 +190,7 @@ get_n_cluster <- function(hc, Sigma, m = ncol(Sigma), min_cluster_corr = 0.8) {
 #' @keywords cb_post_inference
 #' @noRd
 w_purity <- function(weights, X = NULL, Xcorr = NULL, N = NULL, n = 100, coverage = 0.95,
-                     min_abs_corr = 0.5, median_abs_corr = NULL, miss_idx = NULL) {
+                     min_abs_corr = 0.5, median_abs_corr = NULL, miss_idx = NULL, ref_label = "LD") {
   
   tmp_purity <- apply(weights, 2, function(w) {
     pos <- w_cs(w, coverage = coverage)
@@ -198,7 +198,7 @@ w_purity <- function(weights, X = NULL, Xcorr = NULL, N = NULL, n = 100, coverag
     if (!is.null(Xcorr)) {
       pos <- match(pos, setdiff(1:length(w), miss_idx))
     }
-    get_purity(pos, X = X, Xcorr = Xcorr, N = N, n = n)
+    get_purity(pos, X = X, Xcorr = Xcorr, N = N, n = n, ref_label = ref_label)
   })
   if (is.null(median_abs_corr)) {
     is_pure <- which(tmp_purity[1, ] >= min_abs_corr)
@@ -227,7 +227,8 @@ check_null_post <- function(cb_obj,
   }
 
   get_profile <- function(cs_beta, X = NULL, Y = NULL, N = NULL,
-                          XtX = NULL, YtY = NULL, XtY = NULL, miss_idx, adj_dep = 1) {
+                          XtX = NULL, YtY = NULL, XtY = NULL, miss_idx, adj_dep = 1,
+                          ref_label = "LD") {
     if (!is.null(X)) {
       mean((Y - X %*% as.matrix(cs_beta))^2) * N / (N - 1)
     } else if (!is.null(XtY)) {
@@ -242,20 +243,23 @@ check_null_post <- function(cb_obj,
       } else {
         xty <- XtY / scaling_factor
       }
-      if (length(xtx) == 1){
+      if (identical(ref_label, "No_ref")) {
         (yty - 2 * sum(cs_beta * xty) + sum(cs_beta^2)) * adj_dep
       } else {
-        (yty - 2 * sum(cs_beta * xty) + sum((xtx %*% as.matrix(cs_beta)) * cs_beta)) * adj_dep
+        XtX_beta <- compute_XtX_product(xtx, cs_beta, ref_label)
+        (yty - 2 * sum(cs_beta * xty) + sum(XtX_beta * cs_beta)) * adj_dep
       }
     }
   }
 
   get_cs_obj <- function(cs_beta, res, tau, func_simplex, lambda, adj_dep, LD_free,
                          X = NULL, Y = NULL, N = NULL,
-                         XtX = NULL, YtY = NULL, XtY = NULL, miss_idx = NULL) {
+                         XtX = NULL, YtY = NULL, XtY = NULL, miss_idx = NULL,
+                         ref_label = "LD") {
     correlation <- get_correlation(
       X = X, res = res, XtY = XtY, N = N, YtY = YtY,
-      XtX = XtX, beta_k = cs_beta, miss_idx = miss_idx
+      XtX = XtX, beta_k = cs_beta, miss_idx = miss_idx,
+      ref_label = ref_label
     )
     z <- get_z(correlation, n = N, res)
     abs_cor <- abs(correlation)
@@ -264,7 +268,7 @@ check_null_post <- function(cb_obj,
     P <- length(z)
     ld_jk <- get_LD_jk(jk,
       X = X, XtX = XtX, N = N,
-      remain_idx = setdiff(1:P, miss_idx), P = P
+      remain_idx = setdiff(1:P, miss_idx), P = P, ref_label = ref_label
     )
     ld_feature <- sqrt(abs(ld_jk))
     # - calculate delta
@@ -284,27 +288,28 @@ check_null_post <- function(cb_obj,
     return(tau * matrixStats::logSumExp(exp_term / tau + log(delta)))
   }
 
-  update_res <- function(X = NULL, Y = NULL, XtX = NULL, XtY = NULL, N = NULL, cs_beta, miss_idx) {
+  update_res <- function(X = NULL, Y = NULL, XtX = NULL, XtY = NULL, N = NULL, cs_beta, miss_idx, ref_label = "LD") {
     if (!is.null(X)) {
       return(Y - X %*% cs_beta)
     } else if (!is.null(XtX)) {
       scaling.factor <- if (!is.null(N)) N - 1 else 1
       beta_scaling <- if (!is.null(N)) 1 else 100
-      xtx <- XtX / scaling.factor
       if (length(miss_idx) != 0) {
         xty <- XtY[-miss_idx] / scaling.factor
         res.tmp <- rep(0, length(XtY))
-        if (length(xtx) == 1){
+        if (identical(ref_label, "No_ref")) {
           res.tmp[-miss_idx] <- xty - cs_beta[-miss_idx] / beta_scaling
         } else {
-          res.tmp[-miss_idx] <- xty - xtx %*% (cs_beta[-miss_idx] / beta_scaling)
+          XtX_beta <- compute_XtX_product(XtX, cs_beta[-miss_idx] / beta_scaling, ref_label)
+          res.tmp[-miss_idx] <- xty - XtX_beta / scaling.factor
         }
       } else {
         xty <- XtY / scaling.factor
-        if (length(xtx) == 1){
+        if (identical(ref_label, "No_ref")) {
           res.tmp <- xty - (cs_beta / beta_scaling)
         } else {
-          res.tmp <- xty - xtx %*% (cs_beta / beta_scaling)
+          XtX_beta <- compute_XtX_product(XtX, cs_beta / beta_scaling, ref_label)
+          res.tmp <- xty - XtX_beta / scaling.factor
         }
       }
       return(res.tmp)
@@ -325,12 +330,14 @@ check_null_post <- function(cb_obj,
       cs_beta[cs_variants] <- 0
       X_dict <- cb_data$dict[j]
       adj_dep <- cb_data$data[[j]]$dependency
+      ref_label_j <- cb_data$data[[X_dict]]$ref_label
       if (check_null_method == "profile") {
         cs_profile <- get_profile(cs_beta,
           X = cb_data$data[[X_dict]]$X, Y = cb_data$data[[j]]$Y,
           XtX = cb_data$data[[X_dict]]$XtX, XtY = cb_data$data[[j]]$XtY,
           YtY = cb_data$data[[j]]$YtY, N = cb_data$data[[j]]$N,
-          miss_idx = cb_data$data[[j]]$variable_miss, adj_dep = adj_dep
+          miss_idx = cb_data$data[[j]]$variable_miss, adj_dep = adj_dep,
+          ref_label = ref_label_j
         )
         last_profile <- extract_last(cb_obj$cb_model[[j]]$profile_loglike_each)
         change <- abs(cs_profile - last_profile)
@@ -348,7 +355,8 @@ check_null_post <- function(cb_obj,
           X = cb_data$data[[X_dict]]$X, Y = cb_data$data[[j]]$Y,
           XtX = cb_data$data[[X_dict]]$XtX, XtY = cb_data$data[[j]]$XtY,
           N = cb_data$data[[j]]$N, cs_beta,
-          miss_idx = cb_data$data[[j]]$variable_miss
+          miss_idx = cb_data$data[[j]]$variable_miss,
+          ref_label = ref_label_j
         )
         cs_obj <- get_cs_obj(cs_beta, res, cb_obj$cb_model_para$tau, cb_obj$cb_model_para$func_simplex,
           cb_obj$cb_model_para$lambda,
@@ -357,7 +365,8 @@ check_null_post <- function(cb_obj,
           X = cb_data$data[[X_dict]]$X, N = cb_data$data[[j]]$N,
           XtX = cb_data$data[[X_dict]]$XtX, XtY = cb_data$data[[X_dict]]$XtY,
           YtY = cb_data$data[[X_dict]]$YtY,
-          miss_idx = cb_data$data[[j]]$variable_miss
+          miss_idx = cb_data$data[[j]]$variable_miss,
+          ref_label = ref_label_j
         )
         last_obj <- min(cb_obj$cb_model[[j]]$obj_path)
         change <- abs(cs_obj - last_obj)
@@ -401,7 +410,7 @@ check_null_post <- function(cb_obj,
 #' @keywords cb_post_inference
 #' @noRd
 #' @importFrom stats na.omit
-get_purity <- function(pos, X = NULL, Xcorr = NULL, N = NULL, n = 100) {
+get_purity <- function(pos, X = NULL, Xcorr = NULL, N = NULL, n = 100, ref_label = "LD") {
   get_upper_tri <- Rfast::upper_tri
   get_median <- Rfast::med
 
@@ -420,16 +429,17 @@ get_purity <- function(pos, X = NULL, Xcorr = NULL, N = NULL, n = 100) {
     if (is.null(Xcorr)) {
       X_sub <- X[, pos]
       X_sub <- as.matrix(X_sub)
-      corr <- suppressWarnings({
-        get_cormat(X_sub)
-      })
+      corr <- suppressWarnings({ get_cormat(X_sub) })
       corr[which(is.na(corr))] <- 0
       value <- abs(get_upper_tri(corr))
     } else {
-      if (length(Xcorr) == 1){
+      if (identical(ref_label, "No_ref") || length(Xcorr) == 1) {
         value <- 0
+      } else if (identical(ref_label, "X_ref")) {
+        corr <- suppressWarnings({ get_cormat(Xcorr[, pos]) })
+        corr[which(is.na(corr))] <- 0
+        value <- abs(get_upper_tri(corr))
       } else {
-        Xcorr <- Xcorr # if (!is.null(N)) Xcorr/(N-1) else Xcorr
         value <- abs(get_upper_tri(Xcorr[pos, pos]))
       }
     }
@@ -445,7 +455,7 @@ get_purity <- function(pos, X = NULL, Xcorr = NULL, N = NULL, n = 100) {
 #' @keywords cb_post_inference
 #' @noRd
 #' @importFrom stats na.omit
-get_between_purity <- function(pos1, pos2, X = NULL, Xcorr = NULL, miss_idx = NULL, P = NULL) {
+get_between_purity <- function(pos1, pos2, X = NULL, Xcorr = NULL, miss_idx = NULL, P = NULL, ref_label = "LD") {
   get_matrix_mult <- function(X_sub1, X_sub2) {
     X_sub1 <- t(X_sub1)
     X_sub2 <- t(X_sub2)
@@ -465,15 +475,21 @@ get_between_purity <- function(pos1, pos2, X = NULL, Xcorr = NULL, miss_idx = NU
     X_sub2 <- scale(X[, pos2, drop = FALSE], center = T, scale = F)
     value <- abs(get_matrix_mult(X_sub1, X_sub2))
   } else {
-    if (sum(Xcorr)==1){
+    if (identical(ref_label, "No_ref") || sum(Xcorr) == 1) {
       value <- 0
     } else {
-      if (length(miss_idx)!=0){
+      if (length(miss_idx) != 0) {
         pos1 <- na.omit(match(pos1, setdiff(1:P, miss_idx)))
         pos2 <- na.omit(match(pos2, setdiff(1:P, miss_idx)))
       }
       if (length(pos1) != 0 & length(pos2) != 0) {
-        value <- abs(Xcorr[pos1, pos2])
+        if (identical(ref_label, "X_ref")) {
+          X_sub1 <- scale(Xcorr[, pos1, drop = FALSE], center = T, scale = F)
+          X_sub2 <- scale(Xcorr[, pos2, drop = FALSE], center = T, scale = F)
+          value <- abs(get_matrix_mult(X_sub1, X_sub2))
+        } else {
+          value <- abs(Xcorr[pos1, pos2])
+        }
       } else {
         value <- 0
       }
@@ -498,7 +514,8 @@ get_cos_evidence <- function(cb_obj, coloc_out, data_info) {
   }
 
   get_cos_profile <- function(cs_beta, outcome_idx, X = NULL, Y = NULL, N = NULL,
-                              XtX = NULL, YtY = NULL, XtY = NULL, miss_idx = NULL, adj_dep = 1) {
+                              XtX = NULL, YtY = NULL, XtY = NULL, miss_idx = NULL, adj_dep = 1,
+                              ref_label = "LD") {
     if (!is.null(X)) {
       cos_profile <- mean((Y - X %*% as.matrix(cs_beta))^2) * N / (N - 1)
       yty <- var(Y)
@@ -514,10 +531,11 @@ get_cos_evidence <- function(cb_obj, coloc_out, data_info) {
       } else {
         xty <- XtY / scaling_factor
       }
-      if (length(xtx) == 1){
+      if (identical(ref_label, "No_ref")) {
         cos_profile <- (yty - 2 * sum(cs_beta * xty) + sum(cs_beta^2)) * adj_dep
       } else {
-        cos_profile <- (yty - 2 * sum(cs_beta * xty) + sum((xtx %*% as.matrix(cs_beta)) * cs_beta)) * adj_dep
+        XtX_beta <- compute_XtX_product(xtx, cs_beta, ref_label)
+        cos_profile <- (yty - 2 * sum(cs_beta * xty) + sum(XtX_beta * cs_beta)) * adj_dep
       }
     }
     delta <- yty - cos_profile
@@ -543,7 +561,8 @@ get_cos_evidence <- function(cb_obj, coloc_out, data_info) {
       XtX = cb_data$data[[X_dict]]$XtX, XtY = cb_data$data[[outcome_idx]]$XtY,
       YtY = cb_data$data[[outcome_idx]]$YtY, N = cb_data$data[[outcome_idx]]$N,
       miss_idx = cb_data$data[[outcome_idx]]$variable_miss,
-      adj_dep = cb_data$data[[outcome_idx]]$dependency
+      adj_dep = cb_data$data[[outcome_idx]]$dependency,
+      ref_label = cb_data$data[[X_dict]]$ref_label
     )
     max_profile <- max(cb_obj$cb_model[[outcome_idx]]$profile_loglike_each)
     ifelse(max_profile < cos_profile, 0, max_profile - cos_profile)
@@ -612,7 +631,8 @@ get_cos_evidence <- function(cb_obj, coloc_out, data_info) {
 get_ucos_evidence <- function(cb_obj, ucoloc_info) {
     
   get_ucos_profile <- function(cs_beta, outcome_idx, X = NULL, Y = NULL, N = NULL,
-                              XtX = NULL, YtY = NULL, XtY = NULL, miss_idx = NULL, adj_dep = 1) {
+                              XtX = NULL, YtY = NULL, XtY = NULL, miss_idx = NULL, adj_dep = 1,
+                              ref_label = "LD") {
     if (!is.null(X)) {
       cos_profile <- mean((Y - X %*% as.matrix(cs_beta))^2) * N / (N - 1)
       yty <- var(Y)
@@ -628,10 +648,11 @@ get_ucos_evidence <- function(cb_obj, ucoloc_info) {
       } else {
         xty <- XtY / scaling_factor
       }
-      if (length(xtx) == 1){
+      if (identical(ref_label, "No_ref")) {
         cos_profile <- (yty - 2 * sum(cs_beta * xty) + sum(cs_beta^2)) * adj_dep
       } else {
-        cos_profile <- (yty - 2 * sum(cs_beta * xty) + sum((xtx %*% as.matrix(cs_beta)) * cs_beta)) * adj_dep
+        XtX_beta <- compute_XtX_product(xtx, cs_beta, ref_label)
+        cos_profile <- (yty - 2 * sum(cs_beta * xty) + sum(XtX_beta * cs_beta)) * adj_dep
       }
     }
     delta <- yty - cos_profile
@@ -657,7 +678,8 @@ get_ucos_evidence <- function(cb_obj, ucoloc_info) {
       XtX = cb_data$data[[X_dict]]$XtX, XtY = cb_data$data[[outcome_idx]]$XtY,
       YtY = cb_data$data[[outcome_idx]]$YtY, N = cb_data$data[[outcome_idx]]$N,
       miss_idx = cb_data$data[[outcome_idx]]$variable_miss,
-      adj_dep = cb_data$data[[outcome_idx]]$dependency
+      adj_dep = cb_data$data[[outcome_idx]]$dependency,
+      ref_label = cb_data$data[[X_dict]]$ref_label
     )
     max_profile <- max(cb_obj$cb_model[[outcome_idx]]$profile_loglike_each)
     ifelse(max_profile < cos_profile, 0, max_profile - cos_profile)
diff --git a/R/colocboost_init.R b/R/colocboost_init.R
index 4fa8815..7385dd9 100644
--- a/R/colocboost_init.R
+++ b/R/colocboost_init.R
@@ -28,7 +28,8 @@ colocboost_inits <- function() {
 #' @noRd
 #' @keywords cb_objects
 colocboost_init_data <- function(X, Y, dict_YX,
-                                 Z, LD, N_sumstat, dict_sumstatLD,
+                                 Z, LD, X_ref, ref_label,
+                                 N_sumstat, dict_sumstatLD,
                                  Var_y, SeBhat,
                                  keep_variables,
                                  focal_outcome_idx = NULL,
@@ -98,6 +99,7 @@ colocboost_init_data <- function(X, Y, dict_YX,
     )
     for (i in 1:length(Y)) {
       cb_data$data[[flag]] <- ind_formated$result[[i]]
+      cb_data$data[[flag]]$ref_label <- "individual"
       names(cb_data$data)[flag] <- paste0("ind_outcome_", i)
       flag <- flag + 1
     }
@@ -105,17 +107,20 @@ colocboost_init_data <- function(X, Y, dict_YX,
   } 
   n_ind <- flag - 1
   # if summary: XtX XtY, YtY
-  if (!is.null(Z) & !is.null(LD)) {
+  if (!is.null(Z)) {
     ####################### need to consider more #########################
     # ------ only code up one sumstat
     variant_lists <- keep_variables[c((n_ind_variable+1):length(keep_variables))]
     sumstat_formated <- process_sumstat(
-      Z, N_sumstat, Var_y, SeBhat, LD,
+      Z, N_sumstat, Var_y, SeBhat,
+      ld_matrices  =  if (ref_label == "X_ref") X_ref else LD,
       variant_lists, dict_sumstatLD,
-      keep_variable_names
+      keep_variable_names,
+      ref_label = ref_label
     )
     for (i in 1:length(Z)) {
       cb_data$data[[flag]] <- sumstat_formated$results[[i]]
+      cb_data$data[[flag]]$ref_label <- ref_label
       names(cb_data$data)[flag] <- paste0("sumstat_outcome_", i)
       flag <- flag + 1
     }
@@ -196,7 +201,8 @@ colocboost_init_model <- function(cb_data,
       N = data_each$N, YtY = data_each$YtY,
       XtX = cb_data$data[[X_dict]]$XtX,
       beta_k = tmp$beta,
-      miss_idx = data_each$variable_miss
+      miss_idx = data_each$variable_miss,
+      ref_label = cb_data$data[[X_dict]]$ref_label
     )
     # - initial z-score between X and residual based on correlation
     tmp$z <- get_z(tmp$correlation, n = data_each$N, tmp$res)
@@ -378,7 +384,8 @@ inital_residual <- function(Y = NULL, XtY = NULL) {
 # - Calculate correlation between X and res
 get_correlation <- function(X = NULL, res = NULL, XtY = NULL, N = NULL,
                             YtY = NULL, XtX = NULL, beta_k = NULL, miss_idx = NULL,
-                            XtX_beta_cache = NULL) {
+                            XtX_beta_cache = NULL,
+                            ref_label = "LD") {
   if (!is.null(X)) {
     corr <- suppressWarnings({
       Rfast::correls(res, X)[, "correlation"]
@@ -400,12 +407,13 @@ get_correlation <- function(X = NULL, res = NULL, XtY = NULL, N = NULL,
       Xtr <- res / scaling_factor
       XtY <- XtY / scaling_factor
     }
-    if (length(XtX) == 1){
+    if (identical(ref_label, "No_ref")) {
       var_r <- YtY - 2 * sum(beta_k * XtY) + sum(beta_k^2)
     } else if (!is.null(XtX_beta_cache)) {
       var_r <- YtY - 2 * sum(beta_k * XtY) + sum(XtX_beta_cache * beta_k)
     } else {
-      var_r <- YtY - 2 * sum(beta_k * XtY) + sum((XtX %*% as.matrix(beta_k)) * beta_k)
+      XtX_beta_val <- compute_XtX_product(XtX, beta_k, ref_label)
+      var_r <- YtY - 2 * sum(beta_k * XtY) + sum(XtX_beta_val * beta_k)
     }
     if (var_r > 1e-6) {
       corr_nomiss <- Xtr / sqrt(var_r)
@@ -561,7 +569,10 @@ get_multiple_testing_correction <- function(z, miss_idx = NULL, func_multi_test
 #'
 #' @return List containing processed data with optimized LD submatrix storage
 #' @noRd
-process_sumstat <- function(Z, N, Var_y, SeBhat, ld_matrices, variant_lists, dict, target_variants) {
+process_sumstat <- function(Z, N, Var_y, SeBhat, 
+                            ld_matrices, variant_lists, dict, 
+                            target_variants,
+                            ref_label = "LD") {
   
   
   # Step 1: Identify unique combinations of (variant list, LD matrix)
@@ -582,7 +593,6 @@ process_sumstat <- function(Z, N, Var_y, SeBhat, ld_matrices, variant_lists, dic
           break
         }
       }
-
       if (!is_duplicate) {
         # If not a duplicate, assign its exact index
         unified_dict[i] <- i
@@ -606,14 +616,12 @@ process_sumstat <- function(Z, N, Var_y, SeBhat, ld_matrices, variant_lists, dic
     current_variants <- variant_lists[[i]]
     current_z <- Z[[i]]
     current_n <- N[[i]]
-
     # Get corresponding LD matrix from original dictionary mapping
     ld_index <- dict[i]
-    current_ld_matrix <- ld_matrices[[ld_index]]
+    current_ref <- ld_matrices[[ld_index]]
 
     # Find common variants between current list and target variants
     common_variants <- intersect(current_variants, target_variants)
-
     # Find variants in target but not in current list
     missing_variants <- setdiff(target_variants, current_variants)
     tmp$variable_miss <- which(target_variants %in% missing_variants)
@@ -625,51 +633,76 @@ process_sumstat <- function(Z, N, Var_y, SeBhat, ld_matrices, variant_lists, dic
     Z_extend[pos_target] <- current_z[pos_z]
 
     # Calculate submatrix for each unique entry (not duplicates)
-    if (length(current_ld_matrix) == 1){
-      ld_submatrix <- current_ld_matrix
+    ref_submatrix <- NULL
+    
+    if (ref_label == "No_ref"){
+      
+      ref_submatrix <- current_ref
+      
+    } else if (ref_label == "X_ref") {
+      
+      # match by column names only (rows = samples)
+      if (length(common_variants) > 0) {
+        if (i == unified_dict[i]) {
+          col_idx <- match(common_variants, colnames(current_ref))
+          col_idx <- col_idx[!is.na(col_idx)]
+          if (length(col_idx) > 0) {
+            ref_submatrix <- current_ref[, col_idx, drop = FALSE]
+            colnames(ref_submatrix) <- common_variants
+          }
+        }
+      }
+      
     } else {
-      ld_submatrix <- NULL
+      
+      # ref_label == "LD": match by both row and column names
       if (length(common_variants) > 0) {
-        # Only include the submatrix if this entry is unique or is the first occurrence
         if (i == unified_dict[i]) {
-          # Check if common_variants and rownames have identical order
-          if (identical(common_variants, rownames(current_ld_matrix))) {
-            # If order is identical, use the matrix directly without reordering
-            ld_submatrix <- current_ld_matrix
+          if (identical(common_variants, rownames(current_ref))) {
+            # Order is identical, use matrix directly without reordering
+            ref_submatrix <- current_ref
           } else {
-            # If order is different, reorder using matched indices
-            matched_indices <- match(common_variants, rownames(current_ld_matrix))
-            ld_submatrix <- current_ld_matrix[matched_indices, matched_indices, drop = FALSE]
-            rownames(ld_submatrix) <- common_variants
-            colnames(ld_submatrix) <- common_variants
+            # Reorder to match common_variants order
+            matched_indices <- match(common_variants, rownames(current_ref))
+            ref_submatrix <- current_ref[matched_indices, matched_indices, drop = FALSE]
+            rownames(ref_submatrix) <- common_variants
+            colnames(ref_submatrix) <- common_variants
           }
         }
       }
+      
     }
 
     # Organize data
     if (is.null(current_n)) {
-      tmp$XtX <- ld_submatrix
+      tmp$XtX <- ref_submatrix
       tmp$XtY <- Z_extend
       tmp$YtY <- 1
     } else {
       if (!is.null(SeBhat[[i]]) & !is.null(Var_y[[i]])) {
+        
         # var_y, shat (and bhat) are provided, so the effects are on the
         # *original scale*.
         adj <- 1 / (Z_extend^2 + current_n - 2)
-        if (!is.null(ld_submatrix)) {
+        tmp$YtY <- (current_n - 1) * Var_y[[i]]
+        tmp$XtY <- Z_extend * sqrt(adj) * Var_y[[i]] / SeBhat[[i]]
+        if (ref_label == "X_ref") {
+          # Store X_ref slice directly; XtX computed on-the-fly downstream
+          tmp$XtX <- ref_submatrix
+        } else {
+          # LD or No_ref: scale to original scale
           XtXdiag <- Var_y[[i]] * adj / (SeBhat[[i]]^2)
-          xtx <- t(ld_submatrix * sqrt(XtXdiag)) * sqrt(XtXdiag)
+          xtx <- t(ref_submatrix * sqrt(XtXdiag)) * sqrt(XtXdiag)
           tmp$XtX <- (xtx + t(xtx)) / 2
         }
-        tmp$YtY <- (current_n - 1) * Var_y[[i]]
-        tmp$XtY <- Z_extend * sqrt(adj) * Var_y[[i]] / SeBhat[[i]]
+        
       } else {
-        if (!is.null(ld_submatrix)) {
-          tmp$XtX <- ld_submatrix
-        }
+        # Standardised scale
         tmp$YtY <- (current_n - 1)
         tmp$XtY <- sqrt(current_n - 1) * Z_extend
+        if (!is.null(ref_submatrix)) {
+          tmp$XtX <- ref_submatrix   # LD, X_ref, or scalar 1 stored as-is
+        }
       }
     }
 
diff --git a/R/colocboost_update.R b/R/colocboost_update.R
index 0e50aba..52efd62 100644
--- a/R/colocboost_update.R
+++ b/R/colocboost_update.R
@@ -32,7 +32,8 @@ colocboost_update <- function(cb_model, cb_model_para, cb_data) {
         XtX = cb_data$data[[X_dict]]$XtX,
         N = cb_data$data[[i]]$N,
         remain_idx = setdiff(1:cb_model_para$P, cb_data$data[[i]]$variable_miss),
-        P = cb_model_para$P
+        P = cb_model_para$P,
+        ref_label = cb_data$data[[X_dict]]$ref_label
       )
       cb_model[[i]]$ld_jk <- rbind(cb_model[[i]]$ld_jk, ld_jk)
     }
@@ -107,39 +108,27 @@ colocboost_update <- function(cb_model, cb_model_para, cb_data) {
       beta_scaling <- cb_model[[i]]$beta_scaling
       # - summary statistics
       xtx <- cb_data$data[[X_dict]]$XtX
+      ref_label_i <- cb_data$data[[X_dict]]$ref_label
       cb_model[[i]]$res <- rep(0, cb_model_para$P)
       if (length(cb_data$data[[i]]$variable_miss) != 0) {
         beta <- cb_model[[i]]$beta[-cb_data$data[[i]]$variable_miss]  / beta_scaling
         xty <- cb_data$data[[i]]$XtY[-cb_data$data[[i]]$variable_miss]
-        if (length(xtx) == 1){
-          XtX_beta <- beta
-          cb_model[[i]]$res[-cb_data$data[[i]]$variable_miss] <- xty - scaling_factor * beta
-        } else {
-          XtX_beta <- xtx %*% beta
-          cb_model[[i]]$res[-cb_data$data[[i]]$variable_miss] <- xty - scaling_factor * XtX_beta
-        }
+        XtX_beta <- compute_XtX_product(xtx, beta, ref_label_i)
+        cb_model[[i]]$res[-cb_data$data[[i]]$variable_miss] <- xty - scaling_factor * XtX_beta
 
       } else {
         beta <- cb_model[[i]]$beta / beta_scaling
         xty <- cb_data$data[[i]]$XtY
-        if (length(xtx) == 1){
-          XtX_beta <- beta
-          cb_model[[i]]$res <- xty - scaling_factor * beta
-        } else {
-          XtX_beta <- xtx %*% beta
-          cb_model[[i]]$res <- xty - scaling_factor * XtX_beta
-        }
+        XtX_beta <- compute_XtX_product(xtx, beta, ref_label_i)
+        cb_model[[i]]$res <- xty - scaling_factor * XtX_beta
       }
       # - cache XtX %*% beta for reuse in get_correlation (avoids redundant O(P^2) computation)
       cb_model[[i]]$XtX_beta_cache <- XtX_beta
       # - profile-loglikelihood (reuses cached XtX_beta)
       yty <- cb_data$data[[i]]$YtY / scaling_factor
       xty <- xty / scaling_factor
-      if (length(xtx) == 1){
-        profile_log <- (yty - 2 * sum(beta * xty) + sum(beta^2)) * adj_dep
-      } else {
-        profile_log <- (yty - 2 * sum(beta * xty) + sum(XtX_beta * beta)) * adj_dep
-      }
+      profile_log <- (yty - 2 * sum(beta * xty) + sum(XtX_beta * beta)) * adj_dep
+      
     }
     cb_model[[i]]$profile_loglike_each <- c(cb_model[[i]]$profile_loglike_each, profile_log)
   }
@@ -149,7 +138,7 @@ colocboost_update <- function(cb_model, cb_model_para, cb_data) {
 
 
 # - calculate LD for update_jk
-get_LD_jk <- function(jk1, X = NULL, XtX = NULL, N = NULL, remain_idx = NULL, P = NULL) {
+get_LD_jk <- function(jk1, X = NULL, XtX = NULL, N = NULL, remain_idx = NULL, P = NULL, ref_label = "LD") {
   if (!is.null(X)) {
     corr <- suppressWarnings({
       Rfast::correls(X[, jk1], X)[, "correlation"]
@@ -158,8 +147,14 @@ get_LD_jk <- function(jk1, X = NULL, XtX = NULL, N = NULL, remain_idx = NULL, P
   } else if (!is.null(XtX)) {
     jk1.remain <- which(remain_idx == jk1)
     corr <- rep(0, P)
-    if (length(XtX) == 1 | length(jk1.remain)==0){
+    if (identical(ref_label, "No_ref") | length(jk1.remain) == 0) {
       corr[remain_idx] <- 1
+    } else if (identical(ref_label, "X_ref")) {
+      corr_common <- suppressWarnings({
+        correls(XtX[, jk1.remain], XtX)[, "correlation"]
+      })
+      corr_common[which(is.na(corr_common))] <- 0
+      corr[remain_idx] <- corr_common
     } else {
       corr[remain_idx] <- XtX[, jk1.remain]
     }
@@ -262,7 +257,8 @@ boost_obj_last <- function(cb_data, cb_model, cb_model_para) {
         XtX = cb_data$data[[X_dict]]$XtX,
         N = cb_data$data[[i]]$N,
         remain_idx = setdiff(1:cb_model_para$P, cb_data$data[[i]]$variable_miss),
-        P = cb_model_para$P
+        P = cb_model_para$P,
+        ref_label = cb_data$data[[X_dict]]$ref_label
       )
       ld_feature <- sqrt(abs(ld_jk))
 
diff --git a/R/colocboost_utils.R b/R/colocboost_utils.R
index b486bf8..4c75b1a 100644
--- a/R/colocboost_utils.R
+++ b/R/colocboost_utils.R
@@ -43,7 +43,8 @@ merge_cos_ucos <- function(cb_obj, out_cos, out_ucos, coverage = 0.95,
           X = cb_obj$cb_data$data[[X_dict]]$X,
           Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX,
           miss_idx = cb_obj$cb_data$data[[fine_outcome]]$variable_miss,
-          P = cb_obj$cb_model_para$P
+          P = cb_obj$cb_model_para$P,
+          ref_label = cb_obj$cb_data$data[[X_dict]]$ref_label
         )
         # is.between <- length(intersect(cset1, cset2)) != 0
         is.between <- (abs(res[2] - 1) < tol)
@@ -66,7 +67,8 @@ merge_cos_ucos <- function(cb_obj, out_cos, out_ucos, coverage = 0.95,
             X = cb_obj$cb_data$data[[X_dict]]$X,
             Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX,
             miss_idx = cb_obj$cb_data$data[[ii]]$variable_miss,
-            P = cb_obj$cb_model_para$P
+            P = cb_obj$cb_model_para$P,
+            ref_label = cb_obj$cb_data$data[[X_dict]]$ref_label
           )
         }
         res <- Reduce(pmax, res)
@@ -151,7 +153,8 @@ merge_ucos <- function(cb_obj, past_out,
           X = cb_obj$cb_data$data[[X_dict]]$X,
           Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX,
           miss_idx = cb_obj$cb_data$data[[ii]]$variable_miss,
-          P = cb_obj$cb_model_para$P
+          P = cb_obj$cb_model_para$P,
+          ref_label = cb_obj$cb_data$data[[X_dict]]$ref_label
         )
         flag <- flag + 1
       }
@@ -221,7 +224,8 @@ merge_ucos <- function(cb_obj, past_out,
           tmp <- matrix(get_purity(pos,
             X = cb_obj$cb_data$data[[X_dict]]$X,
             Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX,
-            N = cb_obj$cb_data$data[[i3]]$N, n = n_purity
+            N = cb_obj$cb_data$data[[i3]]$N, n = n_purity,
+            ref_label = cb_obj$cb_data$data[[X_dict]]$ref_label
           ), 1, 3)
           p_tmp <- rbind(p_tmp, tmp)
         }
@@ -689,7 +693,8 @@ get_cos_details <- function(cb_obj, coloc_out, data_info = NULL) {
         tmp <- matrix(get_purity(pos,
                                  X = cb_obj$cb_data$data[[X_dict]]$X,
                                  Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX,
-                                 N = cb_obj$cb_data$data[[i3]]$N, n = cb_obj$cb_model_para$n_purity
+                                 N = cb_obj$cb_data$data[[i3]]$N, n = cb_obj$cb_model_para$n_purity,
+                                 ref_label = cb_obj$cb_data$data[[X_dict]]$ref_label
         ), 1, 3)
         p_tmp <- rbind(p_tmp, tmp)
       }
@@ -763,7 +768,8 @@ get_cos_details <- function(cb_obj, coloc_out, data_info = NULL) {
               X = cb_obj$cb_data$data[[X_dict]]$X,
               Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX,
               miss_idx = cb_obj$cb_data$data[[ii]]$variable_miss,
-              P = cb_obj$cb_model_para$P
+              P = cb_obj$cb_model_para$P,
+              ref_label = cb_obj$cb_data$data[[X_dict]]$ref_label
             )
             flag <- flag + 1
           }
@@ -1025,7 +1031,8 @@ get_full_output <- function(cb_obj, past_out = NULL, variables = NULL, cb_output
                   X = cb_obj$cb_data$data[[X_dict]]$X,
                   Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX,
                   miss_idx = cb_obj$cb_data$data[[ii]]$variable_miss,
-                  P = cb_obj$cb_model_para$P
+                  P = cb_obj$cb_model_para$P,
+                  ref_label = cb_obj$cb_data$data[[X_dict]]$ref_label
                 )
                 flag <- flag + 1
               }
@@ -1069,7 +1076,8 @@ get_full_output <- function(cb_obj, past_out = NULL, variables = NULL, cb_output
                   X = cb_obj$cb_data$data[[X_dict]]$X,
                   Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX,
                   miss_idx = cb_obj$cb_data$data[[ii]]$variable_miss,
-                  P = cb_obj$cb_model_para$P
+                  P = cb_obj$cb_model_para$P,
+                  ref_label = cb_obj$cb_data$data[[X_dict]]$ref_label
                 )
                 flag <- flag + 1
               }
@@ -1124,3 +1132,17 @@ get_full_output <- function(cb_obj, past_out = NULL, variables = NULL, cb_output
   return(ll)
 }
 
+
+#' Compute XtX %*% beta, dispatching on ref_label
+#' @noRd
+compute_XtX_product <- function(XtX, beta, ref_label = "LD") {
+  if (identical(ref_label, "No_ref")) return(beta)
+  if (identical(ref_label, "X_ref")) {
+    N_ref <- nrow(XtX)
+    temp <- XtX %*% as.matrix(beta)
+    return(as.vector(crossprod(XtX, temp)) / (N_ref - 1))
+  }
+  as.vector(XtX %*% as.matrix(beta))
+}
+
+
diff --git a/R/colocboost_workhorse.R b/R/colocboost_workhorse.R
index 2635e00..2f0a4f7 100644
--- a/R/colocboost_workhorse.R
+++ b/R/colocboost_workhorse.R
@@ -284,7 +284,8 @@ cb_model_update <- function(cb_data, cb_model, cb_model_para) {
       XtX = cb_data$data[[X_dict]]$XtX,
       beta_k = model_each$beta,
       miss_idx = data_each$variable_miss,
-      XtX_beta_cache = model_each$XtX_beta_cache
+      XtX_beta_cache = model_each$XtX_beta_cache,
+      ref_label = cb_data$data[[X_dict]]$ref_label
     )
     cb_model[[i]]$correlation <- tmp
     cb_model[[i]]$z <- get_z(tmp, n = data_each$N, model_each$res)
diff --git a/man/colocboost.Rd b/man/colocboost.Rd
index 4f6c3df..7e1b0a0 100644
--- a/man/colocboost.Rd
+++ b/man/colocboost.Rd
@@ -12,6 +12,7 @@ colocboost(
   Y = NULL,
   sumstat = NULL,
   LD = NULL,
+  X_ref = NULL,
   dict_YX = NULL,
   dict_sumstatLD = NULL,
   outcome_names = NULL,
@@ -81,14 +82,23 @@ The columns of data.frame should include either \code{z} or \code{beta}/\code{se
 \code{var_y} is the variance of phenotype (default is 1 meaning that the Y is in the \dQuote{standardized} scale).}
 
 \item{LD}{A list of correlation matrix indicating the LD matrix for each genotype. It also could be a single matrix if all sumstats were
-obtained from the same genotypes.}
+obtained from the same genotypes. Provide either \code{LD} or \code{X_ref}, not both.
+If neither is provided, LD-free mode is used.}
+
+\item{X_ref}{A reference panel genotype matrix (N_ref x P) or a list of matrices, as an alternative to providing a precomputed
+\code{LD} matrix. Column names must include variant names matching those in \code{sumstat}.
+When the number of reference panel samples is less than the number of variants (N_ref < P),
+this avoids storing the full P x P LD matrix and reduces memory usage.
+When N_ref >= P, LD is precomputed from \code{X_ref} internally.
+Provide either \code{LD} or \code{X_ref}, not both. If neither is provided, LD-free mode is used.}
 
 \item{dict_YX}{A L by 2 matrix of dictionary for \code{X} and \code{Y} if there exist subsets of outcomes corresponding to the same X matrix.
 The first column should be 1:L for L outcomes. The second column should be the index of \code{X} corresponding to the outcome.
 The innovation: do not provide the same matrix in \code{X} to reduce the computational burden.}
 
-\item{dict_sumstatLD}{A L by 2 matrix of dictionary for \code{sumstat} and \code{LD} if there exist subsets of outcomes corresponding to the same sumstat.
-The first column should be 1:L for L sumstat The second column should be the index of \code{LD} corresponding to the sumstat.
+\item{dict_sumstatLD}{A L by 2 matrix of dictionary for \code{sumstat} and \code{LD} (or \code{X_ref}) if there exist subsets of outcomes
+corresponding to the same sumstat.
+The first column should be 1:L for L sumstat The second column should be the index of \code{LD} (or \code{X_ref}) corresponding to the sumstat.
 The innovation: do not provide the same matrix in \code{LD} to reduce the computational burden.}
 
 \item{outcome_names}{The names of outcomes, which has the same order for Y.}
diff --git a/man/colocboost_validate_input_data.Rd b/man/colocboost_validate_input_data.Rd
index d9c45f4..ec7e40a 100644
--- a/man/colocboost_validate_input_data.Rd
+++ b/man/colocboost_validate_input_data.Rd
@@ -9,6 +9,7 @@ colocboost_validate_input_data(
   Y = NULL,
   sumstat = NULL,
   LD = NULL,
+  X_ref = NULL,
   dict_YX = NULL,
   dict_sumstatLD = NULL,
   effect_est = NULL,
@@ -31,7 +32,16 @@ colocboost_validate_input_data(
 
 \item{sumstat}{A list of data.frames of summary statistics.}
 
-\item{LD}{A list of correlation matrices indicating the LD matrix for each genotype.}
+\item{LD}{A list of correlation matrix indicating the LD matrix for each genotype. It also could be a single matrix if all sumstats were
+obtained from the same genotypes. Provide either \code{LD} or \code{X_ref}, not both.
+If neither is provided, LD-free mode is used.}
+
+\item{X_ref}{A reference panel genotype matrix (N_ref x P) or a list of matrices, as an alternative to providing a precomputed
+\code{LD} matrix. Column names must include variant names matching those in \code{sumstat}.
+When the number of reference panel samples is less than the number of variants (N_ref < P),
+this avoids storing the full P x P LD matrix and reduces memory usage.
+When N_ref >= P, LD is precomputed from \code{X_ref} internally.
+Provide either \code{LD} or \code{X_ref}, not both. If neither is provided, LD-free mode is used.}
 
 \item{dict_YX}{A L by 2 matrix of dictionary for X and Y if there exist subsets of outcomes corresponding to the same X matrix.}
 
@@ -57,6 +67,8 @@ A list containing:
 \item{keep_variable_individual}{List of variable names for each X matrix}
 \item{sumstat}{Processed list of summary statistics data.frames}
 \item{LD}{Processed list of LD matrices}
+\item{X_ref}{Processed list of reference genotype matrices}
+\item{ref_label}{Style of reference matrics}
 \item{sumstatLD_dict}{Dictionary mapping sumstat to LD}
 \item{keep_variable_sumstat}{List of variant names for each sumstat}
 \item{Z}{List of z-scores for each outcome}
diff --git a/tests/testthat/Rplots.pdf b/tests/testthat/Rplots.pdf
new file mode 100644
index 0000000..55da3dc
Binary files /dev/null and b/tests/testthat/Rplots.pdf differ
diff --git a/tests/testthat/test_Xref.R b/tests/testthat/test_Xref.R
new file mode 100644
index 0000000..5d30abc
--- /dev/null
+++ b/tests/testthat/test_Xref.R
@@ -0,0 +1,758 @@
+library(testthat)
+
+# ============================================================================
+# Shared test data generators for X_ref tests
+# ============================================================================
+
+generate_xref_test_data <- function(n = 200, n_ref = 50, p = 30, L = 2, seed = 42) {
+  set.seed(seed)
+  
+  # Generate X with LD structure (for individual-level truth)
+  sigma <- matrix(0, p, p)
+  for (i in 1:p) {
+    for (j in 1:p) {
+      sigma[i, j] <- 0.9^abs(i - j)
+    }
+  }
+  X <- MASS::mvrnorm(n, rep(0, p), sigma)
+  colnames(X) <- paste0("SNP", 1:p)
+  
+  # Generate X_ref (reference panel, separate samples)
+  X_ref <- MASS::mvrnorm(n_ref, rep(0, p), sigma)
+  colnames(X_ref) <- paste0("SNP", 1:p)
+  
+  # Generate true effects
+  true_beta <- matrix(0, p, L)
+  true_beta[5, 1] <- 0.7   # SNP5 affects trait 1
+  true_beta[5, 2] <- 0.6   # SNP5 also affects trait 2 (colocalized)
+  true_beta[20, 2] <- 0.5  # SNP20 only affects trait 2
+  
+  # Generate Y with some noise
+  Y <- matrix(0, n, L)
+  for (l in 1:L) {
+    Y[, l] <- X %*% true_beta[, l] + rnorm(n, 0, 1)
+  }
+  
+  # Calculate summary statistics
+  beta <- matrix(0, p, L)
+  se <- matrix(0, p, L)
+  for (i in 1:L) {
+    for (j in 1:p) {
+      fit <- summary(lm(Y[, i] ~ X[, j]))$coef
+      if (nrow(fit) == 2) {
+        beta[j, i] <- fit[2, 1]
+        se[j, i] <- fit[2, 2]
+      }
+    }
+  }
+  
+  sumstat_list <- lapply(1:L, function(i) {
+    data.frame(
+      beta = beta[, i],
+      sebeta = se[, i],
+      n = n,
+      variant = colnames(X)
+    )
+  })
+  
+  LD <- cor(X)
+  
+  list(
+    X = X,
+    Y = Y,
+    X_ref = X_ref,
+    LD = LD,
+    sumstat = sumstat_list,
+    true_beta = true_beta
+  )
+}
+
+
+# ============================================================================
+# Test 1: colocboost rejects LD + X_ref together
+# ============================================================================
+
+test_that("colocboost rejects when both LD and X_ref are provided", {
+  test_data <- generate_xref_test_data()
+  
+  expect_warning(
+    result <- colocboost(
+      sumstat = test_data$sumstat,
+      LD = test_data$LD,
+      X_ref = test_data$X_ref,
+      M = 1
+    ),
+    "Provide either LD or X_ref, not both"
+  )
+  expect_null(result)
+})
+
+
+# ============================================================================
+# Test 2: X_ref with N_ref >= P precomputes LD
+# ============================================================================
+
+test_that("X_ref with N_ref >= P precomputes LD and produces valid results", {
+  # N_ref = 50, P = 30 => N_ref >= P, should precompute LD
+  test_data <- generate_xref_test_data(n_ref = 50, p = 30)
+  
+  expect_message(
+    suppressWarnings({
+      result_xref <- colocboost(
+        sumstat = test_data$sumstat,
+        X_ref = test_data$X_ref,
+        M = 10,
+        output_level = 2
+      )
+    }),
+    "N_ref >= P: precomputing LD from X_ref"
+  )
+  
+  expect_s3_class(result_xref, "colocboost")
+  expect_equal(result_xref$data_info$n_outcomes, 2)
+  expect_equal(length(result_xref$data_info$variables), 30)
+})
+
+
+# ============================================================================
+# Test 3: X_ref with N_ref < P keeps X_ref for on-the-fly computation
+# ============================================================================
+
+test_that("X_ref with N_ref < P runs correctly without precomputing LD", {
+  # N_ref = 20, P = 30 => N_ref < P, should keep X_ref
+  test_data <- generate_xref_test_data(n_ref = 50, p = 30)
+  
+  suppressWarnings(suppressMessages({
+    result_xref <- colocboost(
+      sumstat = test_data$sumstat,
+      X_ref = test_data$X_ref,
+      M = 10,
+      output_level = 2
+    )
+  }))
+  
+  expect_s3_class(result_xref, "colocboost")
+  expect_equal(result_xref$data_info$n_outcomes, 2)
+  expect_equal(length(result_xref$data_info$variables), 30)
+})
+
+
+# ============================================================================
+# Test 4: X_ref results are consistent with LD results (N_ref >= P case)
+# ============================================================================
+
+test_that("X_ref (N_ref >= P) produces results consistent with precomputed LD", {
+  test_data <- generate_xref_test_data(n = 200, n_ref = 200, p = 20, seed = 99)
+  
+  # Run with precomputed LD
+  suppressWarnings(suppressMessages({
+    result_ld <- colocboost(
+      sumstat = test_data$sumstat,
+      LD = test_data$LD,
+      M = 50,
+      output_level = 2
+    )
+  }))
+  
+  # Run with X_ref (same samples as LD source, so N_ref >= P => precomputes LD)
+  suppressWarnings(suppressMessages({
+    result_xref <- colocboost(
+      sumstat = test_data$sumstat,
+      X_ref = test_data$X_ref,
+      M = 50,
+      output_level = 2
+    )
+  }))
+  
+  # Both should produce valid colocboost objects
+  expect_s3_class(result_ld, "colocboost")
+  expect_s3_class(result_xref, "colocboost")
+  
+  # Same number of outcomes and variables
+  expect_equal(result_ld$data_info$n_outcomes, result_xref$data_info$n_outcomes)
+  expect_equal(length(result_ld$data_info$variables), length(result_xref$data_info$variables))
+})
+
+
+# ============================================================================
+# Test 5: X_ref with N_ref < P and one iteration (one_causal mode)
+# ============================================================================
+
+test_that("X_ref with N_ref < P works in one-causal mode (M=1 with LD)", {
+  test_data <- generate_xref_test_data(n_ref = 15, p = 30)
+  
+  suppressWarnings(suppressMessages({
+    result <- colocboost(
+      sumstat = test_data$sumstat,
+      X_ref = test_data$X_ref,
+      M = 1,
+      output_level = 2
+    )
+  }))
+  
+  expect_s3_class(result, "colocboost")
+  expect_equal(result$model_info$model_coveraged, "one_causal")
+  expect_equal(result$model_info$n_updates, 1)
+})
+
+
+# ============================================================================
+# Test 6: X_ref with 3 outcomes
+# ============================================================================
+
+test_that("X_ref works with 3 outcomes", {
+  set.seed(123)
+  n <- 200
+  n_ref <- 25
+  p <- 25
+  L <- 3
+  
+  sigma <- 0.9^abs(outer(1:p, 1:p, "-"))
+  X <- MASS::mvrnorm(n, rep(0, p), sigma)
+  colnames(X) <- paste0("SNP", 1:p)
+  X_ref <- MASS::mvrnorm(n_ref, rep(0, p), sigma)
+  colnames(X_ref) <- paste0("SNP", 1:p)
+  
+  true_beta <- matrix(0, p, L)
+  true_beta[5, 1] <- 0.6
+  true_beta[5, 2] <- 0.5
+  true_beta[15, 3] <- 0.7
+  
+  Y <- matrix(0, n, L)
+  for (l in 1:L) {
+    Y[, l] <- X %*% true_beta[, l] + rnorm(n, 0, 1)
+  }
+  
+  beta <- se <- matrix(0, p, L)
+  for (i in 1:L) {
+    for (j in 1:p) {
+      fit <- summary(lm(Y[, i] ~ X[, j]))$coef
+      if (nrow(fit) == 2) { beta[j, i] <- fit[2, 1]; se[j, i] <- fit[2, 2] }
+    }
+  }
+  sumstat <- lapply(1:L, function(i) data.frame(beta = beta[, i], sebeta = se[, i], n = n, variant = colnames(X)))
+  
+  suppressWarnings(suppressMessages({
+    result <- colocboost(
+      sumstat = sumstat,
+      X_ref = X_ref,
+      M = 10,
+      output_level = 2
+    )
+  }))
+  
+  expect_s3_class(result, "colocboost")
+  expect_equal(result$data_info$n_outcomes, 3)
+})
+
+
+# ============================================================================
+# Test 7: X_ref with focal outcome
+# ============================================================================
+
+test_that("X_ref works with focal outcome", {
+  test_data <- generate_xref_test_data(n_ref = 20, p = 30)
+  
+  suppressWarnings(suppressMessages({
+    result <- colocboost(
+      sumstat = test_data$sumstat,
+      X_ref = test_data$X_ref,
+      focal_outcome_idx = 1,
+      M = 10,
+      output_level = 2
+    )
+  }))
+  
+  expect_s3_class(result, "colocboost")
+  expect_equal(result$data_info$outcome_info$is_focal[1], TRUE)
+  expect_equal(result$data_info$outcome_info$is_focal[2], FALSE)
+})
+
+
+# ============================================================================
+# Test 8: X_ref with dict_sumstatLD mapping
+# ============================================================================
+
+test_that("X_ref works with dict_sumstatLD mapping for shared reference panel", {
+  test_data <- generate_xref_test_data(n_ref = 20, p = 30)
+  
+  # Both sumstats share the same X_ref
+  suppressWarnings(suppressMessages({
+    result <- colocboost(
+      sumstat = test_data$sumstat,
+      X_ref = list(test_data$X_ref),
+      dict_sumstatLD = matrix(c(1, 1, 2, 1), ncol = 2),
+      M = 10,
+      output_level = 2
+    )
+  }))
+  
+  expect_s3_class(result, "colocboost")
+  expect_equal(result$data_info$n_outcomes, 2)
+})
+
+
+# ============================================================================
+# Test 9: X_ref with multiple reference panels
+# ============================================================================
+
+test_that("X_ref works with multiple reference panels", {
+  test_data <- generate_xref_test_data(n_ref = 20, p = 30)
+  
+  # Two separate X_ref panels (identical for testing)
+  suppressWarnings(suppressMessages({
+    result <- colocboost(
+      sumstat = test_data$sumstat,
+      X_ref = list(test_data$X_ref, test_data$X_ref),
+      M = 10,
+      output_level = 2
+    )
+  }))
+  
+  expect_s3_class(result, "colocboost")
+  expect_equal(result$data_info$n_outcomes, 2)
+})
+
+
+# ============================================================================
+# Test 10: X_ref with missing variants (partial overlap)
+# ============================================================================
+
+test_that("X_ref handles partial variant overlap with sumstat", {
+  test_data <- generate_xref_test_data(n_ref = 20, p = 30)
+  
+  # X_ref has fewer variants than sumstat
+  X_ref_partial <- test_data$X_ref[, 1:25]  # Only first 25 of 30 SNPs
+  
+  suppressWarnings(suppressMessages({
+    result <- colocboost(
+      sumstat = test_data$sumstat,
+      X_ref = X_ref_partial,
+      M = 10,
+      output_level = 2
+    )
+  }))
+  
+  expect_s3_class(result, "colocboost")
+  # Should have 25 variables (intersection)
+  expect_equal(length(result$data_info$variables), 25)
+})
+
+
+# ============================================================================
+# Test 11: compute_XtX_product correctness
+# ============================================================================
+
+test_that("compute_XtX_product produces correct results for all ref_label types", {
+  # Access internal function
+  compute_XtX_product <- get("compute_XtX_product", envir = asNamespace("colocboost"))
+  
+  set.seed(42)
+  n_ref <- 50
+  p <- 20
+  
+  sigma <- 0.9^abs(outer(1:p, 1:p, "-"))
+  X_ref_raw <- MASS::mvrnorm(n_ref, rep(0, p), sigma)
+  
+  # Standardize X_ref (as done in colocboost_validate_input_data)
+  X_ref <- Rfast::standardise(X_ref_raw, center = TRUE, scale = TRUE)
+  X_ref[is.na(X_ref)] <- 0
+  
+  # Precompute LD from X_ref
+  LD <- get_cormat(X_ref_raw)
+  
+  beta <- rnorm(p)
+  
+  # Test LD case
+  result_ld <- compute_XtX_product(LD, beta, ref_label = "LD")
+  expected_ld <- as.vector(LD %*% beta)
+  expect_equal(result_ld, expected_ld)
+  
+  # Test X_ref case
+  result_xref <- compute_XtX_product(X_ref, beta, ref_label = "X_ref")
+  # Should approximate LD %*% beta
+  # X_ref' * X_ref / (N_ref - 1) ≈ LD
+  expected_xref <- as.vector(crossprod(X_ref, X_ref %*% beta) / (n_ref - 1))
+  expect_equal(result_xref, expected_xref)
+  
+  # Test No_ref case
+  result_noref <- compute_XtX_product(1, beta, ref_label = "No_ref")
+  expect_equal(result_noref, beta)
+  
+  # X_ref and LD results should be similar (not exact due to finite N_ref)
+  expect_equal(result_xref, expected_ld, tolerance = 0.3)
+})
+
+
+# ============================================================================
+# Test 12: X_ref validation in colocboost_validate_input_data
+# ============================================================================
+
+test_that("colocboost_validate_input_data correctly handles X_ref", {
+  test_data <- generate_xref_test_data(n_ref = 20, p = 30)
+  
+  # Test N_ref < P case
+  validated <- colocboost_validate_input_data(
+    sumstat = test_data$sumstat,
+    X_ref = test_data$X_ref
+  )
+  
+  expect_equal(validated$ref_label, "X_ref")
+  expect_true(!is.null(validated$X_ref))
+  expect_null(validated$LD)
+  
+  # Test N_ref >= P case
+  test_data_large <- generate_xref_test_data(n_ref = 50, p = 30)
+  
+  validated_large <- suppressMessages(
+    colocboost_validate_input_data(
+      sumstat = test_data_large$sumstat,
+      X_ref = test_data_large$X_ref
+    )
+  )
+  
+  expect_equal(validated_large$ref_label, "LD")
+  expect_null(validated_large$X_ref)
+  expect_true(!is.null(validated_large$LD))
+})
+
+
+# ============================================================================
+# Test 13: X_ref with output_level 3 (diagnostic details)
+# ============================================================================
+
+test_that("X_ref works with output_level 3 and has correct ref_label in cb_data", {
+  test_data <- generate_xref_test_data(n_ref = 20, p = 30)
+  
+  suppressWarnings(suppressMessages({
+    result <- colocboost(
+      sumstat = test_data$sumstat,
+      X_ref = test_data$X_ref,
+      M = 10,
+      output_level = 3
+    )
+  }))
+  
+  expect_s3_class(result, "colocboost")
+  expect_true("diagnostic_details" %in% names(result))
+  
+  # Check that ref_label is correctly stored in the model's cb_data
+  cb_data <- result$diagnostic_details$cb_data
+  for (i in seq_along(cb_data$data)) {
+    ref_label_i <- cb_data$data[[i]]$ref_label
+    expect_true(!is.null(ref_label_i),
+                info = paste("ref_label missing for data element", i))
+    expect_true(ref_label_i %in% c("X_ref", "LD", "No_ref", "individual"),
+                info = paste("Invalid ref_label for data element", i))
+  }
+})
+
+
+# ============================================================================
+# Test 14: X_ref with XtX_beta_cache in diagnostic output
+# ============================================================================
+
+test_that("X_ref model has XtX_beta_cache in diagnostic output", {
+  test_data <- generate_xref_test_data(n_ref = 20, p = 30)
+  
+  suppressWarnings(suppressMessages({
+    result <- colocboost(
+      sumstat = test_data$sumstat,
+      X_ref = test_data$X_ref,
+      M = 10,
+      output_level = 3
+    )
+  }))
+  
+  cb_model <- result$diagnostic_details$cb_model
+  for (i in seq_along(cb_model)) {
+    expect_true("XtX_beta_cache" %in% names(cb_model[[i]]),
+                info = paste("XtX_beta_cache missing for outcome", i))
+  }
+})
+
+
+# ============================================================================
+# Test 15: X_ref purity functions work correctly
+# ============================================================================
+
+test_that("purity functions dispatch correctly for X_ref", {
+  get_purity <- get("get_purity", envir = asNamespace("colocboost"))
+  get_between_purity <- get("get_between_purity", envir = asNamespace("colocboost"))
+  
+  set.seed(42)
+  n_ref <- 50
+  p <- 20
+  
+  sigma <- 0.9^abs(outer(1:p, 1:p, "-"))
+  X_ref <- MASS::mvrnorm(n_ref, rep(0, p), sigma)
+  X_ref <- Rfast::standardise(X_ref, center = TRUE, scale = TRUE)
+  X_ref[is.na(X_ref)] <- 0
+  
+  LD <- cor(MASS::mvrnorm(200, rep(0, p), sigma))
+  
+  # Test get_purity with X_ref
+  purity_xref <- get_purity(c(1, 2, 3), Xcorr = X_ref, ref_label = "X_ref")
+  expect_length(purity_xref, 3)
+  expect_true(all(purity_xref >= 0 & purity_xref <= 1))
+  
+  # Test get_purity with LD
+  purity_ld <- get_purity(c(1, 2, 3), Xcorr = LD, ref_label = "LD")
+  expect_length(purity_ld, 3)
+  expect_true(all(purity_ld >= 0 & purity_ld <= 1))
+  
+  # Test get_purity with No_ref
+  purity_noref <- get_purity(c(1, 2, 3), Xcorr = 1, ref_label = "No_ref")
+  expect_equal(purity_noref, c(0, 0, 0))
+  
+  # Results should be similar between X_ref and LD (not exact due to different N)
+  expect_equal(purity_xref[1], purity_ld[1], tolerance = 0.3)
+  
+  # Test get_between_purity with X_ref
+  between_xref <- get_between_purity(c(1, 2), c(3, 4), Xcorr = X_ref,
+                                     miss_idx = NULL, P = p, ref_label = "X_ref")
+  expect_length(between_xref, 3)
+  expect_true(all(between_xref >= 0 & between_xref <= 1))
+  
+  # Test get_between_purity with LD
+  between_ld <- get_between_purity(c(1, 2), c(3, 4), Xcorr = LD,
+                                   miss_idx = NULL, P = p, ref_label = "LD")
+  expect_length(between_ld, 3)
+  
+  # Test get_between_purity with No_ref
+  between_noref <- get_between_purity(c(1, 2), c(3, 4), Xcorr = 1,
+                                      miss_idx = NULL, P = p, ref_label = "No_ref")
+  expect_equal(between_noref, c(0, 0, 0))
+})
+
+
+# ============================================================================
+# Test 16: X_ref with get_robust_colocalization post-processing
+# ============================================================================
+
+test_that("get_robust_colocalization works with X_ref results", {
+  test_data <- generate_xref_test_data(n_ref = 20, p = 30)
+  
+  suppressWarnings(suppressMessages({
+    result <- colocboost(
+      sumstat = test_data$sumstat,
+      X_ref = test_data$X_ref,
+      M = 10,
+      output_level = 2
+    )
+  }))
+  
+  # Should not error
+  expect_error(
+    suppressMessages(
+      filtered <- get_robust_colocalization(result, cos_npc_cutoff = 0.2, npc_outcome_cutoff = 0.1)
+    ),
+    NA
+  )
+  expect_s3_class(filtered, "colocboost")
+})
+
+
+# ============================================================================
+# Test 17: X_ref with get_robust_ucos post-processing
+# ============================================================================
+
+test_that("get_robust_ucos works with X_ref results", {
+  test_data <- generate_xref_test_data(n_ref = 500, p = 30)
+  
+  suppressWarnings(suppressMessages({
+    result <- colocboost(
+      sumstat = test_data$sumstat,
+      X_ref = test_data$X_ref,
+      M = 10,
+      output_level = 2
+    )
+  }))
+  
+  skip_if(is.null(result$ucos_details), "No ucos detected")
+  
+  expect_error(
+    suppressMessages(
+      filtered <- get_robust_ucos(result, npc_outcome_cutoff = 0.1)
+    ),
+    NA
+  )
+  expect_s3_class(filtered, "colocboost")
+})
+
+
+# ============================================================================
+# Test 18: X_ref plotting works
+# ============================================================================
+
+test_that("colocboost_plot works with X_ref results", {
+  test_data <- generate_xref_test_data(n_ref = 20, p = 30)
+  
+  suppressWarnings(suppressMessages({
+    result <- colocboost(
+      sumstat = test_data$sumstat,
+      X_ref = test_data$X_ref,
+      M = 10,
+      output_level = 2
+    )
+  }))
+  
+  expect_error(suppressWarnings(colocboost_plot(result)), NA)
+  expect_error(suppressWarnings(colocboost_plot(result, y = "vcp")), NA)
+})
+
+
+# ============================================================================
+# Test 19: X_ref with get_colocboost_summary
+# ============================================================================
+
+test_that("get_colocboost_summary works with X_ref results", {
+  test_data <- generate_xref_test_data(n_ref = 20, p = 30)
+  
+  suppressWarnings(suppressMessages({
+    result <- colocboost(
+      sumstat = test_data$sumstat,
+      X_ref = test_data$X_ref,
+      M = 10,
+      output_level = 2
+    )
+  }))
+  
+  summary1 <- get_colocboost_summary(result, summary_level = 1)
+  expect_type(summary1, "list")
+  
+  summary2 <- get_colocboost_summary(result, summary_level = 2)
+  expect_type(summary2, "list")
+})
+
+
+# ============================================================================
+# Test 20: X_ref with HyPrColoc format input
+# ============================================================================
+
+test_that("X_ref works with effect_est and effect_se input", {
+  test_data <- generate_xref_test_data(n_ref = 20, p = 30)
+  
+  beta <- se <- matrix(0, 30, 2)
+  for (i in 1:2) {
+    for (j in 1:30) {
+      fit <- summary(lm(test_data$Y[, i] ~ test_data$X[, j]))$coef
+      if (nrow(fit) == 2) { beta[j, i] <- fit[2, 1]; se[j, i] <- fit[2, 2] }
+    }
+  }
+  rownames(beta) <- rownames(se) <- colnames(test_data$X)
+  
+  suppressWarnings(suppressMessages({
+    result <- colocboost(
+      effect_est = beta,
+      effect_se = se,
+      effect_n = rep(200, 2),
+      X_ref = test_data$X_ref,
+      M = 10,
+      output_level = 2
+    )
+  }))
+  
+  expect_s3_class(result, "colocboost")
+  expect_equal(result$data_info$n_outcomes, 2)
+})
+
+
+# ============================================================================
+# Test 21: LD_jk functions dispatch correctly for X_ref
+# ============================================================================
+
+test_that("get_LD_jk and get_LD_jk1_jk2 dispatch correctly for X_ref", {
+  get_LD_jk <- get("get_LD_jk", envir = asNamespace("colocboost"))
+  get_LD_jk1_jk2 <- get("get_LD_jk1_jk2", envir = asNamespace("colocboost"))
+  
+  set.seed(42)
+  n_ref <- 50
+  p <- 20
+  
+  sigma <- 0.9^abs(outer(1:p, 1:p, "-"))
+  X_ref <- MASS::mvrnorm(n_ref, rep(0, p), sigma)
+  X_ref <- Rfast::standardise(X_ref, center = TRUE, scale = TRUE)
+  X_ref[is.na(X_ref)] <- 0
+  
+  LD <- cor(MASS::mvrnorm(200, rep(0, p), sigma))
+  
+  remain_idx <- 1:p
+  
+  # get_LD_jk with X_ref
+  ld_jk_xref <- get_LD_jk(5, XtX = X_ref, remain_idx = remain_idx, P = p, ref_label = "X_ref")
+  expect_length(ld_jk_xref, p)
+  expect_equal(ld_jk_xref[5], 1.0, tolerance = 0.01)  # Self-correlation ~ 1
+  
+  # get_LD_jk with LD
+  ld_jk_ld <- get_LD_jk(5, XtX = LD, remain_idx = remain_idx, P = p, ref_label = "LD")
+  expect_length(ld_jk_ld, p)
+  expect_equal(ld_jk_ld[5], 1.0, tolerance = 0.01)
+  
+  # get_LD_jk with No_ref
+  ld_jk_noref <- get_LD_jk(5, XtX = 1, remain_idx = remain_idx, P = p, ref_label = "No_ref")
+  expect_true(all(ld_jk_noref[remain_idx] == 1))
+  
+  # get_LD_jk1_jk2 with X_ref
+  ld_pair_xref <- get_LD_jk1_jk2(1, 2, XtX = X_ref, remain_jk = remain_idx, ref_label = "X_ref")
+  expect_true(abs(ld_pair_xref) <= 1)
+  expect_true(abs(ld_pair_xref) > 0.5)  # Adjacent SNPs with 0.9 LD should be high
+  
+  # get_LD_jk1_jk2 with LD
+  ld_pair_ld <- get_LD_jk1_jk2(1, 2, XtX = LD, remain_jk = remain_idx, ref_label = "LD")
+  expect_true(abs(ld_pair_ld) <= 1)
+  
+  # get_LD_jk1_jk2 with No_ref
+  ld_pair_noref <- get_LD_jk1_jk2(1, 2, XtX = 1, remain_jk = remain_idx, ref_label = "No_ref")
+  expect_equal(ld_pair_noref, 0)
+})
+
+
+# ============================================================================
+# Test 22: ref_label is never NULL in internal cb_data after processing
+# ============================================================================
+
+test_that("ref_label is always explicitly set in cb_data, never NULL", {
+  test_data <- generate_xref_test_data(n_ref = 20, p = 30)
+  
+  # Build cb_data directly to inspect ref_label
+  sumstat <- test_data$sumstat
+  Z_list <- lapply(sumstat, function(s) s$beta / s$sebeta)
+  
+  # X_ref case
+  cb_data_xref <- colocboost_init_data(
+    X = NULL, Y = NULL, dict_YX = NULL,
+    Z = Z_list, LD = NULL, X_ref = list(test_data$X_ref), ref_label = "X_ref",
+    N_sumstat = lapply(sumstat, function(s) s$n[1]),
+    dict_sumstatLD = c(1, 1), Var_y = NULL, SeBhat = NULL,
+    keep_variables = lapply(sumstat, function(s) s$variant)
+  )
+  for (i in seq_along(cb_data_xref$data)) {
+    expect_equal(cb_data_xref$data[[i]]$ref_label, "X_ref")
+  }
+  
+  # LD case
+  cb_data_ld <- colocboost_init_data(
+    X = NULL, Y = NULL, dict_YX = NULL,
+    Z = Z_list, LD = list(test_data$LD), X_ref = NULL, ref_label = "LD",
+    N_sumstat = lapply(sumstat, function(s) s$n[1]),
+    dict_sumstatLD = c(1, 1), Var_y = NULL, SeBhat = NULL,
+    keep_variables = lapply(sumstat, function(s) s$variant)
+  )
+  for (i in seq_along(cb_data_ld$data)) {
+    expect_equal(cb_data_ld$data[[i]]$ref_label, "LD")
+  }
+  
+  # Individual data case
+  Y_list <- lapply(1:2, function(l) as.matrix(test_data$Y[, l]))
+  X_list <- list(test_data$X, test_data$X)
+  cb_data_ind <- colocboost_init_data(
+    X = X_list, Y = Y_list, dict_YX = c(1, 2),
+    Z = NULL, LD = NULL, X_ref = NULL, ref_label = NULL,
+    N_sumstat = NULL, dict_sumstatLD = NULL, Var_y = NULL, SeBhat = NULL,
+    keep_variables = lapply(X_list, colnames)
+  )
+  for (i in seq_along(cb_data_ind$data)) {
+    expect_equal(cb_data_ind$data[[i]]$ref_label, "individual")
+  }
+})
\ No newline at end of file
diff --git a/tests/testthat/test_inference.R b/tests/testthat/test_inference.R
index 9c2c9d0..1b23c0f 100644
--- a/tests/testthat/test_inference.R
+++ b/tests/testthat/test_inference.R
@@ -790,6 +790,8 @@ generate_test_cb_obj_with_ucos <- function(n = 100, p = 20, L = 2, seed = 42) {
     dict_YX = c(1, 2),
     Z = NULL,
     LD = NULL,
+    X_ref = NULL,
+    ref_label = NULL,
     N_sumstat = NULL,
     dict_sumstatLD = NULL,
     Var_y = NULL,
@@ -935,6 +937,8 @@ test_that("get_ucos_evidence handles individual-level data", {
     dict_YX = c(1, 2),
     Z = NULL,
     LD = NULL,
+    X_ref = NULL,
+    ref_label = NULL,
     N_sumstat = NULL,
     dict_sumstatLD = NULL,
     Var_y = NULL,
@@ -1040,6 +1044,8 @@ test_that("get_ucos_evidence handles summary statistics data", {
     dict_YX = NULL,
     Z = Z_list,
     LD = list(LD_matrix),
+    X_ref = NULL,
+    ref_label = "LD",
     N_sumstat = lapply(sumstat_list, function(s) s$n[1]),
     dict_sumstatLD = c(1, 1),
     Var_y = NULL,
diff --git a/tests/testthat/test_model.R b/tests/testthat/test_model.R
index df998e8..fa28c1d 100644
--- a/tests/testthat/test_model.R
+++ b/tests/testthat/test_model.R
@@ -237,6 +237,7 @@ test_that("colocboost correctly maps focal outcome to keep_variables with dict_k
         dict_YX = dict_YX,
         Z = lapply(sumstat_list, function(s) s$beta / s$sebeta),  # z-scores
         LD = list(LD_superset),
+        ref_label = "LD",
         N_sumstat = lapply(sumstat_list, function(s) s$n[1]),
         dict_sumstatLD = dict_sumstatLD,
         Var_y = NULL,
diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R
index fb1b999..e62fcf9 100644
--- a/tests/testthat/test_utils.R
+++ b/tests/testthat/test_utils.R
@@ -571,6 +571,7 @@ test_that("colocboost_init_data handles complex dictionary mappings", {
         dict_YX = dict_YX,
         Z = Z_list,
         LD = LD_list,
+        ref_label = "LD",
         N_sumstat = N_sumstat,
         dict_sumstatLD = dict_sumstatLD,
         Var_y = NULL,
diff --git a/vignettes/Input_Data_Format.Rmd b/vignettes/Input_Data_Format.Rmd
index d1deba3..9a7aca5 100644
--- a/vignettes/Input_Data_Format.Rmd
+++ b/vignettes/Input_Data_Format.Rmd
@@ -59,6 +59,8 @@ head(Sumstat_5traits$sumstat[[1]])
 
 - `LD` is a matrix of LD. This matrix does not need to contain the exact same variants as in `sumstat`, but the `colnames` and `rownames` of `LD` should include the `variant` names for proper alignment.
 
+- `X_ref` (alternative to `LD`) is a reference panel genotype matrix (N_ref x P). When the number of variants is large, passing `X_ref` directly avoids storing the full P x P LD matrix. See [Summary Statistics Colocalization](https://statfungen.github.io/colocboost/articles/Summary_Level_Colocalization.html) for details.
+
 The input format for multiple traits is similar, but `sumstat` should be a list of data frames `sumstat = list(sumstat1, sumstat2, sumstat3)`. 
 The flexibility of input format for multiple traits is as follows (see detailed usage with different input formats, 
 refer to [Summary Statistics Colocalization](https://statfungen.github.io/colocboost/articles/Summary_Level_Colocalization.html)):
diff --git a/vignettes/Partial_Overlap_Variants.Rmd b/vignettes/Partial_Overlap_Variants.Rmd
index e179fa4..b74d7f3 100644
--- a/vignettes/Partial_Overlap_Variants.Rmd
+++ b/vignettes/Partial_Overlap_Variants.Rmd
@@ -22,7 +22,7 @@ This vignette demonstrates how ColocBoost handles partial overlapping variants a
 library(colocboost)
 ```
 
-![](../man/figures/missing_representation.png)
+![Illustration of partial overlapping variants across traits](../man/figures/missing_representation.png)
 
 
 
diff --git a/vignettes/Summary_Statistics_Colocalization.Rmd b/vignettes/Summary_Statistics_Colocalization.Rmd
index 3177a68..2dc127a 100644
--- a/vignettes/Summary_Statistics_Colocalization.Rmd
+++ b/vignettes/Summary_Statistics_Colocalization.Rmd
@@ -177,7 +177,31 @@ res$cos_details$cos$cos_index
 ```
 
 
-## 3.4. HyPrColoc compatible format: effect size and standard error matrices
+## 3.4. Using a reference panel genotype matrix (X_ref) instead of LD
+
+When the number of variants P is very large, storing the full P x P LD matrix may be infeasible.
+If you have the reference panel genotype matrix from which LD would be computed, you can pass it directly via `X_ref`.
+ColocBoost will compute LD products on the fly, avoiding the P x P memory cost.
+
+This is beneficial when the reference panel sample size (N_ref) is less than the number of variants (P).
+When N_ref >= P, ColocBoost automatically precomputes the LD matrix internally for efficiency.
+
+Provide either `LD` or `X_ref`, not both. The `dict_sumstatLD` dictionary works with `X_ref` the same way as with `LD`.
+
+```{r x-ref-example}
+# Use genotype matrix directly as reference panel
+data("Ind_5traits")
+X_ref <- Ind_5traits$X[[1]]
+
+# Run colocboost with X_ref instead of LD
+res <- colocboost(sumstat = Sumstat_5traits$sumstat, X_ref = X_ref)
+
+# Identified CoS
+res$cos_details$cos$cos_index
+```
+
+
+## 3.5. HyPrColoc compatible format: effect size and standard error matrices
 
 ColocBoost also provides a flexibility to use HyPrColoc compatible format for summary statistics with and without LD matrix.