daisybio · alex-d13 · Feb 20, 2023 · Feb 20, 2023 · Feb 20, 2023 · Feb 20, 2023
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,13 @@
 *.log
 /target/
 
+.Rhistory
+data/
+mixed_model.Rout
+mixed_model.Rproj
+/.Rproj.user/
+
+
 .apt_generated
 .classpath
 .factorypath

diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,42 @@
+FROM ubuntu:20.04
+LABEL description="Image for DiMmer"
+LABEL maintainer="Alexander Dietrich"
+
+# needed so that R installation does not get stuck
+ENV DEBIAN_FRONTEND noninteractive
+
+# install system dependencies
+RUN apt-get update -y && apt-get --no-install-recommends --fix-broken install -y git \
+    wget \
+    vim \
+    software-properties-common \
+    dirmngr \
+    gdebi \
+    curl \
+    libicu-dev \ 
+    cmake
+
+# install JDK8 including JFX
+WORKDIR /opt
+ENV JDK_VERSION 11.0.16.1
+RUN wget https://cdn.azul.com/zulu/bin/zulu11.58.23-ca-jdk${JDK_VERSION}-linux_x64.tar.gz && tar -xzvf zulu11.58.23-ca-jdk${JDK_VERSION}-linux_x64.tar.gz && chmod 777 zulu11.58.23-ca-jdk${JDK_VERSION}-linux_x64/*
+ENV PATH=$PATH:/opt/zulu11.58.23-ca-jdk${JDK_VERSION}-linux_x64/bin
+
+# install R 4.2.1 https://docs.posit.co/resources/install-r/
+RUN curl -O https://cdn.rstudio.com/r/ubuntu-2004/pkgs/r-4.2.1_1_amd64.deb
+RUN gdebi r-4.2.1_1_amd64.deb -n
+RUN ln -s /opt/R/4.2.1/bin/R /usr/bin/R
+RUN ln -s /opt/R/4.2.1/bin/Rscript /usr/bin/Rscript
+
+# setup renv to handle R packages
+RUN R -e "install.packages(c('remotes','renv'), repos = c(CRAN = 'https://cloud.r-project.org'))"
+
+# install all packages
+COPY renv.lock renv.lock
+RUN R -e "renv::restore()"
+
+# download DiMmer jar
+WORKDIR /bin
+RUN wget https://github.com/baumbachlab/Dimmer/releases/download/2.3/dimmer.jar 
+
+CMD [ "java", "-jar", "/bin/dimmer.jar"]
diff --git a/dimmer_console.config b/dimmer_console.config
@@ -0,0 +1,247 @@
+
+
+##################### Settings without defaults ######################
+
+#Choose an output directory:
+output_path:
+
+#-------------------- For a new project ------------------------
+
+#Choose a sample annotation file:
+annotation_path:
+
+#Choose your variable of interest:
+#Only column names in your annotation file are accepted.
+variable:
+
+#-------------------- For an existing project ------------------
+
+#If an existing project is loaded, DiMmer will directly start the DMR search.
+
+#Select an existing DiMmer project file:
+dimmer_project_path:
+
+
+##################### General settings ###############################
+
+#Choose how many threads should be used for parallelization:
+#Only positive integers are accepted.
+threads: 4
+
+
+##################### Input and preprocessing settings ###############
+
+#Accepted values: {1: idat, 2: beta, 3: bisulfite}
+input_type: idat
+
+#-------------------- Settings for idat input type -------------
+
+#..... Preprocessing .....
+
+#Perform background correction:
+#Accepted values: {0: false, 1: true}
+background_correction: true
+
+#Perform probe quality filtering:
+#Accepted values: {0: false, 1: true}
+probe_filtering: true
+
+#..... Cell composition estimation .....
+
+#Only available for the "Regression" model and 450k chip.
+
+#Perform cell composition estimation.
+#Accepted values: {0: false, 1: true}
+cell_composition: false
+
+#Path to cell composition estimation files:
+cell_composition_path:
+
+#Select considered cell types:
+#Accepted values: {0: false, 1: true}
+cd8t: true
+cd4t: true
+nk: true
+ncell: true
+mono: true
+gran: true
+
+#-------------------- Settings for beta input type -------------
+
+#Select a beta-matrix file:
+beta_path:
+
+#Choose the array type, the beta-matrix originates from:
+#Accepted values: {1: 450k, 2: epic, 3: custom}
+array_type:
+
+#-------------------- Settings for bisulfite input type --------
+
+#Set the minimum number of reads mapped to a site.
+#If a site in a sample has less counts, it will be excluded from CpG statistics.
+#Only positive integers are accepted.
+min_reads: 10
+
+#Set the maximum allowed number of samples with a lower count than min_reads.
+#If a higher number of samples doesn't fulfill min_reads, the site will be removed.
+#Only positive integers are accepted.
+n_min_read_exceptions: 2
+
+#Set a minimum variance for the beta-values of a site.
+#Sites with a lower variance will be filtered out.
+#Only decimal numbers between 0 and 1 are accepted.
+min_variance: 0.0001
+
+
+##################### CpG statistics and permutations ################
+
+#Select a data type. 
+#Accepted values: {1: unpaired, 2: paired}
+data_type: unpaired
+
+#Set the number of permutations for empirical CpG p-value estimation:
+#Only positive integers are accepted.
+n_permutations_cpg: 1
+
+#Select a statistical model for CpG p-values.
+#Regression, rmANOVA and friedmanT only work on unpaired data
+#Accepted values: {1: Regression, 2: T-test, 3: mixedModel, 4: rmANOVA, 5: friedmanT}
+model: T-test
+
+#-------------------- Settings for T-test model ----------------
+
+#Only relevant, if the model is set to "T-test".
+
+#Alternative hypothesis:
+#Accepted values: {1: left, 2: right, 3: both}
+alternative_hypothesis: both
+
+#Assume, that both groups have equal variance:
+#Accepted values: {0: false, 1: true}
+assume_equal_variance: true
+
+#-------------------- Settings for Regression model ------------
+
+#Only relevant, if the model is set to "Regression".
+
+#Choose confounding variables.
+#Only column names in your annotation file are accepted.
+#Separate multiple column names with ", ". 
+confounding_variables:
+
+#-------------------- Settings for Mixed model ------------
+
+#Only relevant, if the model is set to "mixedModel".
+
+# CpGs with a global variance below this value will not be considered for a mixed model; a p-value of 0.99 will
+# be returned instead. This can be used to reduce runtime by skipping CpGs which are likely to be non-significant
+mm_variance_cutoff: 0.05
+
+#Set Formula for the Mixed Model (of form: fixed_effect+(1|random_effect). By default, the beta values will be used as dependent variable
+mm_formula: fixed_effect+(1|random_effect)
+
+#-------------------- Settings for Friedman model ------------
+
+#Only relevant, if the model is set to "friedmanT".
+
+# CpGs with a global variance below this value will not be considered for a Friedman Test model during permutation tests;
+# an empirical p-value of 0.99 will be returned instead. This can be used to reduce runtime for permutations by skipping
+# CpGs which are likely to be non-significant. The original p-value is calculated regardless of the variance.
+ft_variance_cutoff: 0.05
+
+# Set Formula for the Friedman Test (of form: timestamp|Patient_ID). By default, the beta values will be used as dependent variable
+ft_formula:  timestamp|Patient_ID
+
+#-------------------- Settings for repeated measures ANOVA model ------------
+
+#Only relevant, if the model is set to "rmANOVA".
+
+# CpGs with a global variance below this value will not be considered for a repeated measures ANOVA model
+# during permutation tests; an empirical p-value of 0.99 will be returned instead. This can be used to reduce runtime for
+# permutations by skipping CpGs which are likely to be non-significant. The original p-value is calculated regardless of
+# the variance.
+rma_variance_cutoff: 0.05
+
+# Set Formula for the repeated measures ANOVA (of form: timestamp+Error(Patient_ID))
+# or timestamp+Error(Patient_ID/timestamp))
+# When performing a two-way repeated measures Anova the formula is of the form:
+# timestamp*treatment+Error(Patient_ID/(timestamp*treatment))
+# For Imputation it is important that the timestamp (or corresponding) variable remains at the first position of this
+# formula
+# one option for more information: http://dwoll.de/rexrepos/posts/anovaMixed.html#one-way-repeated-measures-anova-rb-p-design
+rma_formula: timestamp+Error(Patient_ID/timestamp)
+
+
+##################### DMR search settings ############################
+
+#Select whether the program should execute the DMR search. 
+#If false, DiMmer will terminate after the CpG permutations.
+#Accepted values: {0: false, 1: true} 
+dmr_search: 1
+
+#Select if the program should pause before the DMR search. 
+#Some of the following variables might need information from the previous results to be set properly.
+#If the pause option is set, the program will pause and let you inspect the interim results. 
+#Then you have the option to refine the variables from the next section by hand via console inputs. 
+#Accepted values: {0: false, 1: true} 
+pause: 0
+
+#Set the maximum distance between CpGs in an island:
+#Only positive integers are accepted.
+max_cpg_dist: 1000
+
+#Set the window size for the DMR search:
+#Only positive integers are accepted.
+w_size: 5
+
+#Set the number of exceptions (number of not significantly diff. methylated CpGs allowed in the window):
+#Only non-negative integers are accepted.
+n_exceptions: 2
+
+#Set the p-value cutoff (CpGs with a lower value are will be considered as significantly diff. methylated):
+#Only decimal numbers between 0 and 1 are accepted.
+p_value_cutoff: 0.05
+
+#Set the minimum mean methylation difference:
+#If T-test is selected, the minimum mean methylation difference is an additional criterium for a CpG to count as significantly diff. methylated.
+#Only decimal numbers between 0 and 1 are accepted.
+min_diff: 0.0
+
+#Select, which p-value type should be used to check the significance of the CpGs.
+#Accepted values: {1: empirical, 2: original, 3: FWER, 4: FDR, 5: minP}
+p_value_type: original
+
+#Set the number of permutations to calculate the statistical significance of the DMRs:
+#Only positive integers are accepted.
+n_permutations_dmr: 1000
+
+#Set the number of random regions used to get a distribution for the p-value calculation:
+#Only positive integers are accepted.
+n_random_regions: 100000
+
+
+##################### Output settings ################################
+
+#-------------------- CpG statistics ---------------------------
+
+#Select whether the result plots of the permutation tests should be saved.
+#Accepted values: {0: false, 1: true}
+save_permu_plots: 1
+
+#Accepted values: {0: false, 1: true}
+save_beta: 1
+
+#-------------------- DMR search -------------------------------
+
+#Select whether the result plots of the DMR search should be saved.
+#Accepted values: {0: false, 1: true} 
+save_search_plots: 0
+
+#Select whether the result plots of the DMR permutations should be saved (one plot for every DMR size)
+#Accepted values: {0: false, 1: true} 
+save_dmr_permu_plots: 0
+
+#Select whether the result tables of the DMR search should be saved.
+#Accepted values: {0: false, 1: true} 
+save_search_tables: 1
+
diff --git a/pom.xml b/pom.xml
@@ -107,8 +107,8 @@
 			<version>11</version>
 			<classifier>win</classifier>
 		</dependency>
-		
-    	
+
+
 	</dependencies>
 
 	<build>