Skip to content

Commit 0354269

Browse files
committed
initial commit
0 parents  commit 0354269

12 files changed

+428
-0
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
.Rhistory
2+
*.bak

.perltidyrc

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# modified from DAGOLDEN .perltidyrc file
2+
3+
-se # Errors to STDERR
4+
5+
-l=85 # Max line width target
6+
-vmll # variable maximum line length
7+
-wc=10 # depth to reduce indentation levels
8+
-i=2 # Indent level
9+
-ci=2 # Continuation
10+
11+
-vt=0 # vertical tightness
12+
-cti=0 # extra indentation for closing brackets
13+
-vtc=0 # close parens on own line if possible
14+
15+
-nsot # stack opening
16+
-nsct # stack closing
17+
18+
-notr # opening tokens on right of a line
19+
-pt=1 # parenthesis tightness
20+
-bt=1 # brace tightness
21+
-sbt=1 # square bracket tightness
22+
-bbt=0 # block brace tightness
23+
-cab=1
24+
25+
-nsfp # no space after function
26+
-nsfs # No space before semicolons in for loops
27+
28+
-nolq # Don't outdent long quoted strings
29+
-nola # Don't outdent labels
30+
-nolc # Don't outdent long comments
31+
-nokw # Don't outdent keywords
32+
-nhsc # Don't expect hanging side comments
33+
-nbbc # No blank before comments
34+
-tso # Tight secret operators
35+
36+
-msc=1 # Space to side comment
37+
38+
-wbb="% + - \* / x != == >= <= =~ !~ < > | &" # Break before all operators except assignment
39+
40+
-ole=unix # line endings

.prettierrc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"endOfLine": "lf",
3+
"printWidth": 80,
4+
"proseWrap": "always",
5+
"tabWidth": 2,
6+
"useTabs": false
7+
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
args <- commandArgs(trailingOnly = T)
2+
library(tidyverse)
3+
4+
# gemma assoc is args[1];
5+
# lmm output
6+
# chr rs ps n_miss allele1 allele0 af beta se logl_H1 l_remle p_wald
7+
assoc <- read.table(args[1], header = T, as.is = T)
8+
9+
## allele list is args[2]; no header; first 3 columns (SNP,A1,A2)
10+
allele <- read.table(args[2], as.is = T)
11+
colnames(allele) <- c("SNP", "A1.tomatch", "A2.tomatch")
12+
13+
# plink .fam file is args[3]; only to get sample size N
14+
fam <- read.table(args[3], as.is = T)
15+
fam[fam[, 6] == -9, 6] <- NA
16+
N <- sum(!is.na(fam[, 6]))
17+
18+
output <- args[4]
19+
20+
this_asso <- inner_join(assoc, allele, by = c("rs" = "SNP"))
21+
# write.table(this_asso,"tep.file")
22+
# assoc[,c("rs","beta","se")]
23+
this_asso$Beta.tomatch <- ifelse(this_asso$allele1 == this_asso$A1.tomatch & this_asso$allele0 == this_asso$A2.tomatch,
24+
this_asso$beta, ifelse(this_asso$allele0 == this_asso$A1.tomatch & this_asso$allele1 == this_asso$A2.tomatch,
25+
-this_asso$beta, NA
26+
)
27+
)
28+
# colnames(summ)=c("SNP","Beta","Se")
29+
this_asso$Z <- this_asso$Beta.tomatch / this_asso$se
30+
this_asso$N <- N - this_asso$n_miss
31+
this_asso <- this_asso %>% select(rs, Beta.tomatch, se, Z, N)
32+
colnames(this_asso) <- c("SNP", "Beta", "Se", "Z", "N")
33+
34+
# rm NA
35+
this_asso <- this_asso[!is.na(this_asso$Beta), ]
36+
# keep SNP uniq
37+
# this_asso=this_asso[!duplicated(this_asso$SNP),]
38+
39+
write.table(this_asso, output, row.names = F, col.names = T, quote = F)

MESuSiE/MESuSiE_run_from_lmm.R

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# setwd("/Users/yueliu/temp/temp1/Project_Diversity_pQTL/finemapping/MESuSIE/ENSG00000172803")
2+
args <- commandArgs(trailingOnly = T)
3+
library(tidyverse)
4+
library(MESuSiE)
5+
6+
# args[1]="plink.race.list"
7+
race <- read.table(args[1], as.is = T)$V1
8+
if (length(race) < 2) {
9+
cat("fewer than two races, quit\n")
10+
q()
11+
}
12+
13+
if (length(args) < 2) {
14+
output <- "MESuSiE_res"
15+
} else {
16+
output <- args[2]
17+
}
18+
19+
bim.suf <- ".mind005.match_allele.bim"
20+
ld.suf <- ".mind005.match_allele.square.ld"
21+
asso.suf <- ".lmm.assoc.txt.match_allele"
22+
23+
24+
summ_stat_list <- list()
25+
LD_list <- list()
26+
27+
# snp list is the snp list in all files
28+
# take first race as the beginning snp list
29+
snp.list <- read.table(paste0(race[1], bim.suf), as.is = T)$V2
30+
# add ld to LD_list, and add assoc to summ_stat_list
31+
for (r in race) {
32+
this_bim <- read.table(paste0(r, bim.suf), as.is = T)$V2
33+
this_ld <- read.table(paste0(r, ld.suf), as.is = T)
34+
rownames(this_ld) <- this_bim
35+
colnames(this_ld) <- this_bim
36+
snp.list <- intersect(snp.list, this_bim)
37+
LD_list[[r]] <- this_ld
38+
39+
this_asso <- read.table(paste0(r, asso.suf), header = T, as.is = T)
40+
rownames(this_asso) <- this_asso$SNP
41+
snp.list <- intersect(snp.list, this_asso$SNP)
42+
summ_stat_list[[r]] <- this_asso
43+
}
44+
45+
# LD_list[["AA"]][1:5,1:5]
46+
# summ_stat_list[["AA"]][1:5,]
47+
48+
# subset LD_list and summ_stat_list to snp.list
49+
for (r in race) {
50+
LD_list[[r]] <- as.matrix(LD_list[[r]][snp.list, snp.list])
51+
summ_stat_list[[r]] <- summ_stat_list[[r]][snp.list, ]
52+
}
53+
54+
MESuSiE_res <- meSuSie_core(LD_list, summ_stat_list, L = 10)
55+
# MESuSiE_res.NHW_AA=meSuSie_core(LD_list[c("AA","NHW")],
56+
# summ_stat_list[c("AA","NHW")],L=10)
57+
58+
59+
save(LD_list, summ_stat_list, race, snp.list, MESuSiE_res, file = paste0(output, ".RDat"))
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
dirlist=$1
2+
3+
pwd=$(pwd)
4+
for f in $(less $dirlist); do
5+
cd $f
6+
Rscript ~/bin/MESuSiE_run_from_lmm.R plink.race.list > MESuSiE_run.log
7+
cd $pwd
8+
done
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
$fam_path = shift;
2+
$out_dir = shift;
3+
4+
if ( !$out_dir ) {
5+
$out_dir = "multi_out";
6+
}
7+
8+
$lmm_suf = shift; #lmm suf
9+
if ( !$lmm_suf ) {
10+
$lmm_suf = ".gemma.lmm.assoc.txt";
11+
}
12+
13+
open( FP, "$fam_path" );
14+
while (<FP>) {
15+
if (/(\S+)\s+(\S+)/) {
16+
$path{$2} = `readlink -f $1`;
17+
chomp( $path{$2} );
18+
}
19+
}
20+
21+
foreach $r ( sort keys %path ) {
22+
# print "$r\t$path{$r}\n";
23+
24+
$this_path = $path{$r};
25+
if ( $this_path =~ /(\S+)\// ) {
26+
$root = $1;
27+
}
28+
open( TH, "$this_path" );
29+
while (<TH>) {
30+
if (/((\S+\/(\S+?))\.\S+)\.fam/) {
31+
$prefix = $1;
32+
$lmm_prefix = $2;
33+
$gene = $3;
34+
#$bed=$prefix.".bed";
35+
#$bim=$prefix.".bim";
36+
#$fam=$prefix.".fam";
37+
$plink{$gene}{$r} = $root . "/$prefix";
38+
$lmm{$gene}{$r} = $root . "/$lmm_prefix" . $lmm_suf;
39+
}
40+
}
41+
}
42+
43+
foreach $g ( sort keys %plink ) {
44+
`mkdir -p $out_dir/$g`;
45+
chdir("$out_dir/$g");
46+
open( PL, ">plink.race.list" );
47+
open( PR, ">plink.race" );
48+
open( PN, ">plink.race.num" );
49+
$n = 0;
50+
foreach $r ( sort keys %{ $plink{$g} } ) {
51+
$check_bed = $plink{$g}{$r} . ".bed";
52+
if ( !-e $check_bed ) {
53+
print STDERR "no $check_bed\n";
54+
next;
55+
}
56+
57+
`ln -s $plink{$g}{$r}".bed" "$r.bed"`;
58+
`ln -s $plink{$g}{$r}".bim" "$r.bim"`;
59+
`ln -s $plink{$g}{$r}".fam" "$r.fam"`;
60+
61+
`ln -s $lmm{$g}{$r} "$r.lmm.assoc.txt"`;
62+
63+
print PL "$r\n";
64+
print PR "$r\t$r\n";
65+
$n++;
66+
$race = $r;
67+
68+
}
69+
if ( !$n ) {
70+
print STDERR "no data for $g; omit this gene \n";
71+
chdir("../../");
72+
`rm -rf $out_dir/$g`;
73+
next;
74+
}
75+
print PN "$n\n";
76+
77+
#use the last race bim for match alleles
78+
`ln -s $race.bim bim_for_match_alleles`;
79+
80+
chdir("../../");
81+
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
dirlist=$1
2+
3+
pwd=$(pwd)
4+
for f in $(less $dirlist); do
5+
cd $f
6+
7+
#double check missingness; require mind 0.05
8+
for r in $(less plink.race.list); do
9+
plink --bfile $r --mind 0.05 --geno 0.05 --out $r.mind005 --make-bed
10+
done
11+
12+
#get match alleles
13+
#remove ambiguous alleles (A/T and G/C)
14+
awk '{if ( ! ( ($5 == "A" && $6 == "T") || ($5 == "T" && $6 == "A") || ($5 == "G" && $6 == "C") || ($5 == "C" && $6 == "G") ) ) { print $2,$5,$6}}' bim_for_match_alleles > match.alleles.A1_A2
15+
awk '{print $1,$2}' match.alleles.A1_A2 > match.alleles
16+
17+
#new plink files that match alleles
18+
for r in $(less plink.race.list); do
19+
plink --bfile $r.mind005 --extract match.alleles --reference-allele match.alleles --make-bed --out $r.mind005.match_allele
20+
done
21+
22+
#ld matrix
23+
for r in $(less plink.race.list); do
24+
plink --bfile $r.mind005.match_allele --r square --out $r.mind005.match_allele.square --reference-allele match.alleles
25+
done
26+
27+
#match allele to lmm assoc
28+
for r in $(less plink.race.list); do
29+
Rscript ~/bin/MESuSiE_prepare_assoc_match_allele_from_lmm.R $r.lmm.assoc.txt match.alleles.A1_A2 $r.fam $r.lmm.assoc.txt.match_allele
30+
done
31+
32+
cd $pwd
33+
done

MESuSiE_notes.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
====Run MESuSiE in each gene====
2+
#the ld matrix is produced in-samples, i.e not from external panels
3+
#thus, ld-mismatch-checking step is not needed here.
4+
#in /home/yliu/work_dir/ProteinPrediction/Diversity_pQTL/run.11012023/finemap/MESuSiE
5+
1) on cluster: prepare plink files in each gene using lmm assoc results from all 3 populations
6+
qsub2 "perl ~/bin/multi_ancestry_prepare_files_for_MESuSiE.pl NHW_AA_Hisp.fam.path "
7+
2) on clusters: match allele; ld matrix; match allele for lmm assoc in each gene
8+
ls -d multi_out/ENSG* >all.dir.list
9+
for f in $(less all.dir.list); qsub2 "~/bin/multi_ancestry_prepare_files_for_MESuSiE_ld_and_asso.match.sh $f";done
10+
3) on clusters: run MESuSie for each gene
11+
for f in $(less all.dir.list); qsub2 "~/bin/MESuSiE_run_from_lmm_loopdir.match.sh $f";done

Makefile

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
# Directories to exclude from processing (space-separated)
2+
EXCLUDE_DIRS := .git
3+
4+
# Convert to find-compatible format
5+
FIND_EXCLUDE := $(foreach dir,$(EXCLUDE_DIRS),-path "./$(dir)" -prune -o)
6+
7+
.PHONY: format all check-deps show clean help
8+
9+
default: help
10+
11+
# Check if tools are installed and give installation instructions
12+
check-deps:
13+
@echo "Checking dependencies..."
14+
@if ! which perltidy >/dev/null 2>&1; then \
15+
echo "❌ perltidy not found"; \
16+
echo " Install with: brew install perltidy OR cpm install -g Perl::Tidy"; \
17+
else \
18+
echo "✅ perltidy found"; \
19+
fi
20+
@if ! which Rscript >/dev/null 2>&1; then \
21+
echo "❌ Rscript not found"; \
22+
echo " Install R from: https://r-project.org OR brew install r"; \
23+
else \
24+
echo "✅ Rscript found"; \
25+
fi
26+
@if ! which prettier >/dev/null 2>&1; then \
27+
echo "❌ prettier not found"; \
28+
echo " Install with: npm install -g prettier OR brew install prettier"; \
29+
else \
30+
echo "✅ prettier found"; \
31+
fi
32+
@if ! which shfmt >/dev/null 2>&1; then \
33+
echo "❌ shfmt not found"; \
34+
echo " Install with: brew install shfmt OR go install mvdan.cc/sh/v3/cmd/shfmt@latest"; \
35+
else \
36+
echo "✅ shfmt found"; \
37+
fi
38+
@echo ""
39+
@echo "Please manually check R::styler is installed."
40+
@echo " To install: Rscript src/install_r_packages.R styler"
41+
@echo ""
42+
43+
format:
44+
@echo "Formatting files..."
45+
@if which perltidy >/dev/null 2>&1; then \
46+
find . $(FIND_EXCLUDE) \( -name "*.pl" -o -name "*.pm" -o -name "*.t" \) -print0 | xargs -0 perltidy --pro=.perltidyrc -b; \
47+
else \
48+
echo "⚠️ Skipping Perl files (perltidy not installed - run 'make check-deps')"; \
49+
fi
50+
@if which Rscript >/dev/null 2>&1; then \
51+
if Rscript -e "if (!requireNamespace('styler', quietly=TRUE)) quit(status=1)" 2>/dev/null; then \
52+
find . $(FIND_EXCLUDE) -name "*.R" -print0 | xargs -0 -I {} Rscript -e "styler::style_file('{}')"; \
53+
else \
54+
echo "⚠️ Skipping R files (styler package not installed)"; \
55+
fi; \
56+
else \
57+
echo "⚠️ Skipping R files (Rscript not installed - run 'make check-deps')"; \
58+
fi
59+
@if which prettier >/dev/null 2>&1; then \
60+
find . $(FIND_EXCLUDE) \( -name "*.md" -o -name "*.markdown" \) -print0 | xargs -0 prettier --write; \
61+
else \
62+
echo "⚠️ Skipping markdown files (prettier not installed - run 'make check-deps')"; \
63+
fi
64+
@if which shfmt >/dev/null 2>&1; then \
65+
find . $(FIND_EXCLUDE) \( -name "*.sh" -o -name "*.bash" \) -print0 | xargs -0 shfmt -w -i 2 -ci -sr; \
66+
else \
67+
echo "⚠️ Skipping shell files (shfmt not installed - run 'make check-deps')"; \
68+
fi
69+
70+
show:
71+
@echo "Files that would be processed:"
72+
@echo "Excluded directories: $(EXCLUDE_DIRS)"
73+
@echo ""
74+
@echo "Perl files:"
75+
@find . $(FIND_EXCLUDE) \( -name "*.pl" -o -name "*.pm" -o -name "*.t" \) -print | head -5
76+
@echo "R files:"
77+
@find . $(FIND_EXCLUDE) -name "*.R" -print | head -5
78+
@echo "Markdown files:"
79+
@find . $(FIND_EXCLUDE) \( -name "*.md" -o -name "*.markdown" \) -print | head -5
80+
@echo "Shell files:"
81+
@find . $(FIND_EXCLUDE) \( -name "*.sh" -o -name "*.bash" \) -print | head -5
82+
83+
clean:
84+
@find . $(FIND_EXCLUDE) -name "*.bak" -print0 | xargs -0 rm -f
85+
86+
help:
87+
@echo "Available targets:"
88+
@echo " check-deps Check if required tools are installed"
89+
@echo " format Format all code files"
90+
@echo " show Show files that would be processed"
91+
@echo " clean Remove backup files"
92+
@echo " all Run format"
93+
@echo " help Show this help"
94+
@echo ""
95+
@echo "Excluded directories: $(EXCLUDE_DIRS)"

0 commit comments

Comments
 (0)