Skip to content
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
d205b53
initialised workspace
seneschall Dec 10, 2025
ab3f1c8
added test data, dependencies
seneschall Dec 10, 2025
609a372
implemented wrapper; added test data
Dec 12, 2025
d59ec3c
worked on wrapper
seneschall Dec 18, 2025
5e2defa
fixed environment.yaml ; added test data
seneschall Dec 19, 2025
265399c
started adding params
seneschall Dec 22, 2025
26636a1
added test; appended meta.yaml; cleanup
seneschall Dec 23, 2025
df7be10
changed flag in test
seneschall Dec 23, 2025
c850121
removed testing pixi envs
seneschall Dec 23, 2025
88b7ccc
changed wrapper path
seneschall Dec 23, 2025
86efe3c
pinned environment; cleanup
seneschall Dec 23, 2025
aa73d2f
Merge branch 'master' into mofa2-wrapper
johanneskoester Jan 7, 2026
97a5512
Apply suggestion from @coderabbitai[bot]
johanneskoester Jan 7, 2026
8f20ee7
deleted requested files
seneschall Jan 8, 2026
4480162
changed params froms strings to native bools
seneschall Jan 8, 2026
c788d45
fixed assignment bug; cleaned up comments
seneschall Jan 8, 2026
fe651a1
changed handling of output
seneschall Jan 8, 2026
8072bd5
removed log file
seneschall Jan 8, 2026
762a4ea
removed dependencies; readded gitignore
seneschall Jan 12, 2026
af345bf
updated pinned environment
seneschall Jan 12, 2026
da1b6b0
Merge branch 'master' into mofa2-wrapper
fgvieira Jan 13, 2026
dc9ac50
removed .gitattributes
seneschall Jan 13, 2026
2aa6ed3
started working on subwrappers
seneschall Jan 27, 2026
3bb4dff
added functionality for multiple plots
seneschall Jan 30, 2026
eac269f
added test data
seneschall Jan 30, 2026
eb482c2
adding params
seneschall Jan 30, 2026
db39e37
cleanup
seneschall Jan 30, 2026
bd61cb5
started working on meta.yaml
seneschall Jan 30, 2026
15a040b
added notes to meta.yaml ; added test cases
Feb 4, 2026
e66771b
pinned environment
seneschall Feb 6, 2026
c92b290
fix to remove undesired output
seneschall Feb 6, 2026
34a23a6
Merge branch 'mofa2-subwrappers' into mofa2-wrapper
seneschall Feb 6, 2026
181b022
fixed issues
seneschall Feb 6, 2026
a679d95
changed notes to params in plotting/meta.yaml
seneschall Feb 6, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# SCM syntax highlighting & preventing 3-way merges
pixi.lock merge=binary linguist-language=YAML linguist-generated=true
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@ __pycache__
.idea
*~
docs/_build
docs/meta-wrappers
docs/meta-wrappers# pixi environments
.pixi/*
!.pixi/config.toml
2 changes: 2 additions & 0 deletions bio/mofa2/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# SCM syntax highlighting & preventing 3-way merges
pixi.lock merge=binary linguist-language=YAML linguist-generated=true -diff
3 changes: 3 additions & 0 deletions bio/mofa2/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# pixi environments
.pixi/*
!.pixi/config.toml
267 changes: 267 additions & 0 deletions bio/mofa2/environment.linux-64.pin.txt

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions bio/mofa2/environment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
channels:
- conda-forge
- bioconda
- nodefaults
dependencies:
- bioconductor-mofa2 =1.16.0
- r-base =4.4.3
- r-arrow =22.0.0
- mofapy2 =0.7.2
- python =3.14.2
26 changes: 26 additions & 0 deletions bio/mofa2/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
name: mofa2
description: |
Train a model on a multi-omic data set with default options.
url: https://www.bioconductor.org/packages/release/bioc/html/MOFA2.html
authors:
- Simon Sack
input:
- |
A parquet file in tidy format containing data with the headers: `sample, feature, view, group (optional), value`

`sample`: The name of the sample

`feature`: The name of the observed feature

`group` (optional, advanced): Discouraged for beginners. The aim of the multi-group framework is not to capture differential changes in mean levels between the groups (as for example when doing differential RNA expression). The goal is to compare the sources of variability that drive each group.

`value`: The observed value

`view`: The view the observed feature is grouped into
output:
- An HDF5-file with the trained model.
notes: |
In the params, set `scale_group` and/or `scale_views` to `TRUE`, if your groups/views
have different ranges/variances. This scales them to unit variance.
Defaults to `FALSE` if no params are given.
For all other training variables, this wrapper uses the default values.
12 changes: 12 additions & 0 deletions bio/mofa2/test/Snakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
rule mofa2:
input:
"{data}.parquet",
output:
"{data}.hdf5",
log:
"log/{data}.log",
params:
scale_groups="FALSE", # set to TRUE if groups have different ranges/variances
scale_views="FALSE", # set to TRUE if views have different ranges/variances
wrapper:
"master/bio/mofa2"
Binary file added bio/mofa2/test/data.parquet
Binary file not shown.
29 changes: 29 additions & 0 deletions bio/mofa2/test/log/data.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
Creating MOFA object from a data.frame...

# Multi-group mode requested.

This is an advanced option, if this is the first time that you are running MOFA, we suggest that you try do some exploration first without specifying groups. Two important remarks:

- The aim of the multi-group framework is to identify the sources of variability *within* the groups. If your aim is to find a factor that 'separates' the groups, you DO NOT want to use the multi-group framework. Please see the FAQ on the MOFA2 webpage.

- It is important to account for the group effect before selecting highly variable features (HVFs). We suggest that either you calculate HVFs per group and then take the union, or regress out the group effect before HVF selection
Checking data options...
Checking training options...
Checking model options...
Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)...
Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'

10 factors were found to explain no variance and they were removed for downstream analysis. You can disable this option by setting load_model(..., remove_inactive_factors = FALSE)
Trained MOFA with the following characteristics:
Number of views: 2
Views names: view_0 view_1
Number of features (per view): 1000 1000
Number of groups: 2
Groups names: group_0 group_1
Number of samples (per group): 100 100
Number of factors: 5

Warning message:
In run_mofa(mofa_object, outfile, ) :
The latest mofapy2 version is 0.7.0, you are using 0.7.2. Please upgrade with 'pip install mofapy2'
24 changes: 24 additions & 0 deletions bio/mofa2/test/log/microbiome.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
Creating MOFA object from a data.frame...
Checking data options...
Checking training options...
Checking model options...
Warning message:
In prepare_mofa(object = mofa_object, data_options = data_opts, :
The total number of samples is very small for learning 15 factors.
Try to reduce the number of factors to obtain meaningful results. It should not exceed ~14.
Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)...
Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'

Trained MOFA with the following characteristics:
Number of views: 3
Views names: Bacteria Fungi Viruses
Number of features (per view): 180 18 42
Number of groups: 1
Groups names: single_group
Number of samples (per group): 59
Number of factors: 15

Warning message:
In run_mofa(mofa_object, outfile, ) :
The latest mofapy2 version is 0.7.0, you are using 0.7.2. Please upgrade with 'pip install mofapy2'
Binary file added bio/mofa2/test/microbiome.parquet
Binary file not shown.
68 changes: 68 additions & 0 deletions bio/mofa2/wrapper.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/bin/R

# load libraries
library(MOFA2)
library(arrow)

# connect to conda environment
conda_prefix <- Sys.getenv("CONDA_PREFIX")
reticulate::use_condaenv(conda_prefix)

# if log file is provided, write log to that file
if (length(snakemake@log) > 0) {
log <- file(snakemake@log[[1]], open = "wt")
sink(log)
sink(log, type = "message")
}

# load long.data frame from parquet file with following headers:
# `sample, feature, view, group (optional), value`

# cast input path as character to avoid errors
path <- as.character(snakemake@input[[1]])

df <- read_parquet(path)

mofa_object <- create_mofa(df)

data_opts <- get_default_data_options(mofa_object)
model_opts <- get_default_model_options(mofa_object)
train_opts <- get_default_training_options(mofa_object)

# add params:
# model params: scale_groups, scale_views

if ("scale_groups" %in% names(snakemake@params)) {
if (snakemake@params[["scale_groups"]] == "FALSE") {
data_opts$scale_groups <- FALSE
}
if (snakemake@params[["scale_groups"]] == "TRUE") {
data_opts$scale_groups <- TRUE
}
}

if ("scale_views" %in% names(snakemake@params)) {
if (snakemake@params[["scale_views"]] == "FALSE") {
data_opts$scale_views <- FALSE
}
if (snakemake@params[["scale_views"]] == "TRUE") {
data_opts$scale_views <- TRUE
}
}

# training params: maxiter (int), convergence_mode, gpu_mode, verbose

mofa_object <- prepare_mofa(
object = mofa_object,
data_options = data_opts,
model_options = model_opts,
training_options = train_opts
)

outfile <- file.path(getwd(), snakemake@output[[1]])

# train the MOFA model and write the result to `outfile`
run_mofa(
mofa_object,
outfile,
)
21 changes: 16 additions & 5 deletions test_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@ def _run(wrapper, cmd, check_log=None, compare_results_with_expected=None):
f"file://{tmp_test_subdir}/",
]


if CONTAINERIZED:
# run snakemake in container
cmd = [
Expand All @@ -129,9 +128,7 @@ def _run(wrapper, cmd, check_log=None, compare_results_with_expected=None):
with open(generated) as genf, open(expected) as expf:
gen_lines = genf.readlines()
exp_lines = expf.readlines()
diff = "".join(
difflib.Differ().compare(gen_lines, exp_lines)
)
diff = "".join(difflib.Differ().compare(gen_lines, exp_lines))
raise ValueError(
f"Unexpected results: {generated} != {expected}."
f"Diff:\n{diff}"
Expand Down Expand Up @@ -271,9 +268,19 @@ def test_agat(run):
def test_alignoth(run):
run(
"bio/alignoth",
["snakemake", "--cores", "1", "--use-conda", "-F", "out/json_plot.vl.json", "out/plot.html", "output-dir/"],
[
"snakemake",
"--cores",
"1",
"--use-conda",
"-F",
"out/json_plot.vl.json",
"out/plot.html",
"output-dir/",
],
)


def test_alignoth_report_meta(run):
run(
"meta/bio/alignoth_report",
Expand Down Expand Up @@ -7165,3 +7172,7 @@ def test_orthanq(run):
"out/calls_virus",
],
)


def test_mofa2(run):
run("bio/mofa2", ["snakemake", "--cores", "1", "data.hdf5", "--use-conda", "-F"])