StatFunGen
diff --git a/‎DESCRIPTION‎
Lines changed: 3 additions & 3 deletions b/‎DESCRIPTION‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎NAMESPACE‎
Lines changed: 3 additions & 3 deletions b/‎NAMESPACE‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎R/simulate_eQTL.R‎
Lines changed: 5 additions & 3 deletions b/‎R/simulate_eQTL.R‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎R/simulate_genotype.R‎
Lines changed: 5 additions & 5 deletions b/‎R/simulate_genotype.R‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎R/simulate_linreg.R‎
Lines changed: 56 additions & 0 deletions b/‎R/simulate_linreg.R‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎R/simulate_transQTL.R‎
Lines changed: 0 additions & 58 deletions b/‎R/simulate_transQTL.R‎
Lines changed: 0 additions & 58 deletions
diff --git a/‎_pkgdown.yml‎
Lines changed: 6 additions & 6 deletions b/‎_pkgdown.yml‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎inst/CITATION‎
Lines changed: 24 additions & 0 deletions b/‎inst/CITATION‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎man/generate_eqtl_data.Rd‎ ‎man/generate_cis_qtl_data.Rd‎man/generate_eqtl_data.Rd renamed to man/generate_cis_qtl_data.Rd
Lines changed: 8 additions & 5 deletions b/‎man/generate_eqtl_data.Rd‎ ‎man/generate_cis_qtl_data.Rd‎man/generate_eqtl_data.Rd renamed to man/generate_cis_qtl_data.Rd
Lines changed: 8 additions & 5 deletions
diff --git a/‎man/sim_geno_UKB.Rd‎
Lines changed: 0 additions & 26 deletions b/‎man/sim_geno_UKB.Rd‎
Lines changed: 0 additions & 26 deletions
@@ -9,10 +9,10 @@ Description: Provides a fast and memory-efficient method for
 URL: https://statfungen.github.io/simxQTL/, https://github.com/StatFunGen/simxQTL
 BugReports: https://github.com/StatFunGen/simxQTL/issues
 Authors@R: c(
-    person("Gao", "Wang", role = c("cre", "aut"), email = "wang.gao@columbia.edu"),
-    person("Alexander", "McCreight", role = "aut"),
+    person("Alexander", "McCreight", role = c("cre", "aut")),
     person("Xuewei", "Cao", role = "aut"),
-    person("Haochen", "Sun", role = "aut"))
+    person("Haochen", "Sun", role = "aut"),
+    person("Gao", "Wang", role = "aut", email = "wang.gao@columbia.edu"))
 License: MIT + file LICENSE
 Imports:
     matrixStats,
 
@@ -6,20 +6,20 @@ export(gene_data)
 export(gene_data_null1)
 export(gene_data_null2)
 export(gene_data_null3)
-export(generate_eqtl_data)
+export(generate_cis_qtl_data)
 export(get_correlation)
 export(get_lower_chol)
 export(get_random_A)
 export(parse_num_causal_snps)
 export(sim_beta)
 export(sim_beta_fix_variant)
 export(sim_geno_LD)
-export(sim_geno_UKB)
 export(sim_geno_indep)
+export(sim_geno_real)
 export(sim_multi_traits)
+export(sim_single_trait_simple)
 export(sim_sumstats)
 export(simulate_causal_config)
-export(simulate_cis_expression)
 export(simulate_polygenic_trait)
 export(simulate_trans_expression)
 export(simulate_trans_mixture_celltype)
 
@@ -166,12 +166,14 @@ is_causal_power <- function(G, beta, residual_variance, power = 0.80) {
 }
 
 ###############################################################################
-#' Generate eQTL Data with Multiple Genetic Architecture Components
+#' Generate Cis-QTL Data with Multiple Genetic Architecture Components
 #'
-#' This function generates simulated gene expression data with a partitioned
+#' This function generates simulated cis-eQTL data with a partitioned
 #' genetic architecture that enforces strict effect size hierarchies:
 #' |sparse| > |oligogenic| >> |infinitesimal|
 #'
+#' Originally developed for the "susieR 2.0" manuscript, McCreight et al (2025).
+#'
 #' @param G Genotype matrix.
 #' @param h2g Total SNP heritability (proportion of variance explained by genotyped SNPs).
 #' @param prop_h2_sparse Proportion of h2g explained by sparse effects.
@@ -191,7 +193,7 @@ is_causal_power <- function(G, beta, residual_variance, power = 0.80) {
 #'   combined beta values, indices for each effect component, realized heritability estimates,
 #'   effect size ranges, hierarchy validation results, and causal indices.
 #' @export
-generate_eqtl_data <- function(G,
+generate_cis_qtl_data <- function(G,
                                h2g = 0.30,
                                prop_h2_sparse = 0.50,
                                prop_h2_oligogenic = 0.15,
 
@@ -63,19 +63,19 @@ sim_geno_LD <- function(n, LD, min_maf = 0.01, max_maf = 0.4, lambda = 1e-3, is.
   return(G)
 }
 
-#' Simulate Genotypes Based on UK Biobank
+#' Simulate Genotypes Based on Real Data
 #'
 #' @param n Sample size.
-#' @param file_path Path to the UK Biobank file.
+#' @param file_path Path to the genotype file.
 #' @param min_maf Minimum minor allele frequency.
 #' @param scale Logical, whether to scale the data.
 #' @return A matrix of genotypes.
 #' @examples
-#' sim_geno_UKB(n = 100, file_path = "path/to/UKB/file", min_maf = 0.01, scale = TRUE)
+#' sim_geno_real(n = 100, file_path = "path/to/real/file", min_maf = 0.01, scale = TRUE)
 #' @export
-sim_geno_UKB <- function(n, file_path, min_maf = 0.01, scale = FALSE) {
+sim_geno_real <- function(n, file_path, min_maf = 0.01, scale = FALSE) {
   if (missing(n)) stop("Please provide the sample size")
-  if (is.null(file_path)) stop("Please provide the path of UK Biobank!")
+  if (is.null(file_path)) stop("Please provide the path to genotype data!")
   if (!grepl("\\.bed$", file_path)) stop("Please provide plink bfiles!")
 
   G <- process_ukb(file_path, n, min_maf)
 
@@ -414,4 +414,60 @@ calculate_sumstat <- function(X, Y){
         z = z
     )
     return(tb)
+}
+
+#' Simulate Single Trait with Simple Linear Model
+#'
+#' This function simulates a single trait (e.g., gene expression) based on genotype data.
+#' It generates a trait matrix Y using genotype matrix G and adjacency matrix A.
+#'
+#' @param G A matrix of genotypes with dimensions n x p.
+#' @param A A binary adjacency matrix of dimensions p x m indicating direct effects.
+#' @param phi_v The per-SNP heritability value.
+#'
+#' @return A matrix Y of dimensions n x m representing the simulated trait.
+#'
+#' @examples
+#' n <- 1000
+#' p <- 40
+#' m <- 8
+#' G <- matrix(rbinom(n * p, size = 2, prob = 0.3), ncol = p)
+#' A <- matrix(sample(0:1, p * m, replace = TRUE), nrow = p)
+#' phi_v <- 0.05
+#' Y <- sim_single_trait_simple(G, A, phi_v)
+#'
+#' @export
+sim_single_trait_simple <- function(G, A, phi_v) {
+    n <- nrow(G)
+    m <- ncol(A)
+    Y <- matrix(NA, n, m)
+
+    for (j in 1:m) {
+        connected_snps <- which(A[, j] == 1)
+        num_snps <- length(connected_snps)
+
+        beta <- rep(0, ncol(G))
+        for (i in connected_snps) {
+            if (i == connected_snps[1]) {
+                beta[i] <- 1
+            } else {
+                beta[i] <- sqrt(beta[connected_snps[1]]^2 * var(G[, connected_snps[1]]) / var(G[, i]))
+            }
+        }
+
+        # Corrected calculation of variance_sum and sigma2
+        variance_sum <- var(G[, connected_snps, drop = FALSE] %*% beta[connected_snps])
+        sigma2 <- var(G[, connected_snps[1]] * beta[connected_snps[1]]) / phi_v - variance_sum
+        while (sigma2 <= 0) {
+            phi_v <- phi_v - 0.01
+            sigma2 <- var(G[, connected_snps[1]] * beta[connected_snps[1]]) / phi_v - variance_sum
+        }
+
+        # Simulate trait
+        Y_tmp <- G %*% beta + rnorm(n, mean = 0, sd = sqrt(sigma2))
+        # Y[, j] <- scale(Y_tmp)
+        Y[, j] <- Y_tmp
+    }
+
+    return(Y)
 }
@@ -1,62 +1,4 @@
 
-#' Simulate cis gene expression
-#'
-#' This function simulates cis gene expression based on genotype data.
-#' It generates gene expression matrix E using genotype matrix G and adjacency matrix A.
-#'
-#' @param G A matrix of genotypes with dimensions n x p.
-#' @param A A binary adjacency matrix of dimensions p x m indicating direct effects.
-#' @param phi_v The per-SNP heritability value.
-#'
-#' @return A matrix E of dimensions n x m representing the simulated gene expression.
-#'
-#' @examples
-#' n <- 1000
-#' p <- 40
-#' m <- 8
-#' G <- matrix(rbinom(n * p, size = 2, prob = 0.3), ncol = p)
-#' A <- matrix(sample(0:1, p * m, replace = TRUE), nrow = p)
-#' phi_v <- 0.05
-#' E <- simulate_cis_expression(G, A, phi_v)
-#'
-#' @export
-simulate_cis_expression <- function(G, A, phi_v) {
-    n <- nrow(G)
-    m <- ncol(A)
-    E <- matrix(NA, n, m)
-    
-    for (j in 1:m) {
-        connected_snps <- which(A[, j] == 1)
-        num_snps <- length(connected_snps)
-        
-        beta <- rep(0, ncol(G))
-        for (i in connected_snps) {
-            if (i == connected_snps[1]) {
-                beta[i] <- 1
-            } else {
-                beta[i] <- sqrt(beta[connected_snps[1]]^2 * var(G[, connected_snps[1]]) / var(G[, i]))
-            }
-        }
-        
-        # Corrected calculation of variance_sum and sigma2_cis
-        variance_sum <- var(G[, connected_snps, drop = FALSE] %*% beta[connected_snps])
-        sigma2_cis <- var(G[, connected_snps[1]] * beta[connected_snps[1]]) / phi_v - variance_sum
-        while (sigma2_cis <= 0) {
-            phi_v <- phi_v - 0.01
-            sigma2_cis <- var(G[, connected_snps[1]] * beta[connected_snps[1]]) / phi_v - variance_sum
-        }
-        
-        # Simulate gene expression
-        E_tmp <- G %*% beta + rnorm(n, mean = 0, sd = sqrt(sigma2_cis))
-        # E[, j] <- scale(E_tmp)
-        E[, j] <- E_tmp
-    }
-    
-    return(E)
-}
-
-
-
 #' Simulate trans gene expression for different simulation scenarios
 #'
 #' This function can perform Type I error simulations or Accuracy and False Discovery Rate (ACC/FDR) simulations
 
@@ -50,7 +50,7 @@ reference:
     contents:
       - sim_geno_indep
       - sim_geno_LD
-      - sim_geno_UKB
+      - sim_geno_real
 
   - title: "Effect Size and Trait Simulation"
     desc: "Functions for simulating effect sizes and phenotypes"
@@ -60,6 +60,7 @@ reference:
       - sim_beta_fix_variant
       - simulate_polygenic_trait
       - sim_multi_traits
+      - sim_single_trait_simple
 
   - title: "Summary Statistics"
     desc: "Functions for simulating and calculating summary statistics"
@@ -68,15 +69,14 @@ reference:
       - calculate_sumstat
       - get_correlation
 
-  - title: "eQTL Simulation"
-    desc: "Functions for simulating eQTL data"
+  - title: "Cis-QTL Simulation"
+    desc: "Functions for simulating cis-eQTL data, originally developed for the \"susieR 2.0\" manuscript, McCreight et al (2025)"
     contents:
-      - generate_eqtl_data
+      - generate_cis_qtl_data
 
   - title: "Trans-QTL Simulation"
-    desc: "Functions for simulating trans-QTL and gene regulatory networks"
+    desc: "Functions for simulating trans-QTL and gene regulatory networks, originally developed for the \"transBoost\" manuscript, Cao et al (2026)"
     contents:
-      - simulate_cis_expression
       - simulate_trans_expression
       - simulate_trans_mixture_celltype
       - get_random_A
 
@@ -0,0 +1,24 @@
+bibentry(
+  bibtype = "Article",
+  title = paste("SuSiE 2.0: improved methods and implementations for",
+                "genetic fine-mapping and phenotype prediction"),
+  author = c(
+    person("Alexander", "McCreight"),
+    person("Yanghyeon", "Cho"),
+    person("Daniel", "Nachun"),
+    person("Ruixi", "Li"),
+    person("Hao-Yu", "Gan"),
+    person("Matthew", "Stephens"),
+    person("Peter", "Carbonetto"),
+    person("William", "R.P. Denault"),
+    person("Gao", "Wang")
+  ),
+  journal = "Submitting to Genome Biology",
+  year = "2025",
+  textVersion =
+    paste("Alexander McCreight, Yanghyeon Cho, Daniel Nachun, Ruixi Li,",
+          "Hao-Yu Gan, Matthew Stephens, Peter Carbonetto,",
+          "William R.P. Denault and Gao Wang (2025). SuSiE 2.0: improved",
+          "methods and implementations for genetic fine-mapping and",
+          "phenotype prediction. Submitting to Genome Biology")
+)