Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
.nf-env
__pycache__/
*.pyc
*.pyo
7 changes: 7 additions & 0 deletions custom_files/group_path_map.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
group_paths:
infotech:
scratch: "/data/scratch/DCO/DIGOPS/SCIENCOM/{username}"
rds: "/data/rds/DIT/SCICOM/SCRSE"
default:
scratch: "/scratch/{username}/{group}"
rds: "/rds/general/user/{username}/home"
16 changes: 2 additions & 14 deletions index.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,5 @@
layout="wide",
initial_sidebar_state="auto",
)


def main():
p1 = st.Page("views/login.py")
p2 = st.Page("views/run_pipeline.py")
nav_pages = [p1, p2]
pages = {"p1": p1, "p2": p2}
st.session_state["pages"] = pages
pg = st.navigation(nav_pages)
pg.run()


if __name__ == "__main__":
main()
# redirect directly to login page
st.switch_page("pages/login.py")
52 changes: 12 additions & 40 deletions views/login.py → pages/login.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,22 @@
import pandas as pd
import streamlit as st

import shared.sessionstate as ss
import tabs.tab_logon as tl
from shared.sessionstate import retrieve_all_from_ss, ss_set
from shared.visual import header

header = """
<span style=
"color:darkred;font-size:40px;"> -🍃 </span><span style=
"color:green;font-size:40px;">RUN NEXTFLOW on ALMA</span><span style=
"color:darkred;font-size:40px;">🍃- </span>
"""
st.markdown(header, unsafe_allow_html=True)

st.write("--- ")
st.title("🔑 Login Page")

header()
st.write("## Login")
st.write("Login to your alma account before running a nextflow pipeline.")

OK, MY_SSH, username = tl.tab()

ss.ss_set("LOGIN_OK", OK)
ss.ss_set("MY_SSH", MY_SSH)
ss.ss_set("user_name", username)
tl.tab()


# I want to move between tabs automatically
# move between tabs
def display():
st.session_state["run_pipeline"] = True
if st.session_state.get("run_pipeline", False) and "login" in st.session_state:
if "pages" in st.session_state:
page = st.session_state["pages"].get("p2", None)
if page:
st.switch_page(page)
ss_values = retrieve_all_from_ss()
OK = ss_values["OK"]
username = ss_values["user_name"]
GROUP = ss_values["GROUP"]


def display_restricted_access(username):
Expand All @@ -54,30 +38,18 @@ def display_restricted_access(username):
)


def update_session_info(group, cost_account):
ss.ss_set("user_group", group)
ss.ss_set("user_cost_account", cost_account)


def check_whiteList(username):
whitelist = "custom_files/user_whitelist.tsv"
df = pd.read_csv(whitelist, delimiter="\t")
row = df.loc[df["username"] == username]
if row.empty: # user not on the white liste
return False

# update session info
update_session_info(row["group"], row["account-code"])
ss_set("user_group", row["group"])
ss_set("user_cost_account", row["account-code"])
return True


if "login" not in st.session_state:
st.session_state["login"] = {}
if "run_pipeline" not in st.session_state:
st.session_state["run_pipeline"] = False

if OK:
if OK and GROUP != "Select an option":
if not check_whiteList(username):
display_restricted_access(username)
else:
display()
95 changes: 95 additions & 0 deletions pages/run_pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import streamlit as st

import tabs.tab_command as tt
from pipeline_project_map import map_pipeline_project
from shared.sessionstate import retrieve_all_from_ss, save_in_ss, ss_set
from shared.visual import header


def reset_button_state():
ss_set("button_clicked", False)


header()
st.write("## Running Nextflow pipeline on Alma")
st.write("Select your pipeline and your project, then submit the process")

# pull saved values if set, otherwise set to defaults
ss_values = retrieve_all_from_ss()
OK = ss_values["OK"]
MY_SSH = ss_values["MY_SSH"]
username = ss_values["user_name"]
server = ss_values["server"]
GROUP = ss_values["GROUP"]
GROUPS = ss_values["GROUPS"]
SCRATCH = ss_values["SCRATCH"]
RDS = ss_values["RDS"]
SAMPLE = ss_values["SAMPLE"]
PIPELINE = ss_values["PIPELINE"]
password = ss_values["password"]
PROJECT = ss_values["PROJECT"]
JOB_ID = ss_values["JOB_ID"]
WORKDIR = ss_values["WORK_DIR"]
OUTPUT_DIR = ss_values["OUTPUT_DIR"]
run_pipeline_clicked = ss_values["run_pipeline_clicked"]
button_clicked = ss_values["button_clicked"]

samples = ["all", "demo"] # , "customised"]

# Create the selectbox and update session state
options = ["select"] + list(map_pipeline_project.keys())
index = options.index(PIPELINE)
PIPELINE = st.selectbox("Select a pipeline", options=options, index=index) # , key="PIPELINE")
# adding "select" as the first and default choice
if PIPELINE != "select":
PROJECT = st.selectbox(
"Select your project",
options=map_pipeline_project[PIPELINE],
index=map_pipeline_project[PIPELINE].index(PIPELINE) if PIPELINE in map_pipeline_project[PIPELINE] else 0,
on_change=reset_button_state,
)

SAMPLE = st.selectbox(
"Select your samples",
options=samples,
index=samples.index(SAMPLE) if SAMPLE in samples else 0,
on_change=reset_button_state,
)

WORK_DIR = st.text_input("Working directory", value=SCRATCH)
OUTPUT_DIR = st.text_input("Output directory", value=SCRATCH)

# passing inputs between tabs
if OK:
tt.tab(
username,
MY_SSH,
PIPELINE,
PROJECT,
selected_samples=SAMPLE,
work_dir=WORK_DIR,
output_dir=OUTPUT_DIR,
)
save_in_ss(
{
"OK": OK,
"MY_SSH": MY_SSH,
"user_name": username,
"server": server,
"password": password,
"GROUP": GROUP,
"GROUPS": GROUPS,
"SCRATCH": SCRATCH,
"RDS": RDS,
"SAMPLE": SAMPLE,
"PIPELINE": PIPELINE,
"PROJECT": PROJECT,
"JOB_ID": JOB_ID,
"WORKDIR": WORK_DIR,
"OUTPUT_DIR": OUTPUT_DIR,
"run_pipeline_clicked": run_pipeline_clicked,
"button_clicked": button_clicked,
}
)
else:
st.write("#### Log in first to use run nextflow on Alma")
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
[tool.isort]
profile = "black"
filter_files = true

[tool.black]
line-length = 127
extend-exclude = '''
Expand Down
Binary file removed shared/__pycache__/sessionstate.cpython-312.pyc
Binary file not shown.
72 changes: 72 additions & 0 deletions shared/command_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import os


def get_path_to_script(selected_pipeline, selected_project, selected="all"):
NX_SHARED_PATH = "/data/scratch/shared/RSE/NF-project-configurations"
# e.g., /data/scratch/shared/RSE/NF-project-configurations/epi2me-human-variation/nf-long-reads/scripts
base_path = os.path.join(NX_SHARED_PATH, selected_pipeline, selected_project, "scripts")

script_mapping = {
"all": "launch_samples.sh",
"demo": "launch_demo.sh",
"single": "launch_sample_analysis.sh", # not yet supported
}

if selected in script_mapping:
return os.path.join(base_path, script_mapping[selected])

raise ValueError(f"Invalid selection '{selected}'. Only 'all' and 'demo' are supported.")


# launch command based on the project
def pipe_cmd(
username,
selected_pipeline="",
selected_project="",
cmd_num=0,
selected_samples="all",
work_dir="work",
output_dir="output",
):
def get_pipeline_command():
"""Generate the pipeline execution command based on the sample selection."""
path_to_script = get_path_to_script(selected_pipeline, selected_project, selected_samples)

cmd_pipeline = f"""
mkdir -p {work_dir}/logs
cd {work_dir}
"""
# not sure if this is the best thing-using job id for filenamne
log_out = f"{work_dir}/logs/%j.out"
log_err = f"{work_dir}/logs/%j.err"

if selected_samples == "demo":
cmd_pipeline += f"sbatch -o {log_out} -e {log_err} {path_to_script} {work_dir} {output_dir}"
elif selected_samples == "all":
cmd_pipeline += f"sbatch -o {log_out} -e {log_err} {path_to_script} --work-dir {work_dir} --outdir {output_dir}"
##./your_script.sh --env "/my/custom/env" --work-dir "my_work" --outdir "my_output" --config "my_config" --params "parans.json"
# Usage:
# bash launch_batch_analysis.sh \
# --work-dir "workdir" \
# --outdir "output" \
# --env "/data/rds/DIT/SCICOM/SCRSE/shared/conda/nextflow_env" \
# --params "/data/params/parameters.json" \
# --config "custom_config.config"

return cmd_pipeline.strip()

# Command mappings
command_map = {
0: get_pipeline_command(),
1: f"squeue -u {username}",
2: (
f"sacct --user {username} "
"--format UID,User,JobID,JobName,Submit,Elapsed,Partition,"
"NNodes,NCPUS,TotalCPU,CPUTime,ReqMem,MaxRSS,WorkDir,State,"
"Account,AllocTres -P"
),
3: "echo hello from nextflow-on-Alma app",
}

# Return the corresponding command
return command_map.get(cmd_num, "echo 'Invalid command number'")
56 changes: 56 additions & 0 deletions shared/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from contextlib import contextmanager, redirect_stdout
from io import StringIO

import yaml
from pyalma import SshClient

############################################################################


# Helper function that streams stdout to a streamlit component
@contextmanager
def st_capture(output_func):
try:
with StringIO() as stdout, redirect_stdout(stdout):
old_write = stdout.write

def new_write(string):
ret = old_write(string)
output_func(stdout.getvalue())
return True, ret

stdout.write = new_write
yield
except Exception as e:
return False, e


############################################################################
# Validate user with pyalma library
def validate_user(ssh_host, sftp_host, username, password):
MY_SSH = SshClient(server=ssh_host, sftp=sftp_host, username=username, password=password)
print("Validating login...")
cmd_usr = "sacctmgr list association user=$USER format=Account -P | tail -n +2"
print("Command:\n", cmd_usr)
results = MY_SSH.run_cmd(cmd_usr)
if results["err"] != None:
print("Errors")
err_msg = "Connection failed: " + results["err"]
return False, None, err_msg, []
else:
groups = results["output"].strip().split("\n")
return True, MY_SSH, "Session validated successfully", groups


############################################################################
def get_scratch_rds_path(username, group, yaml_file="custom_files/group_path_map.yaml"):
print(username, group)
with open(yaml_file, "r") as file:
config = yaml.safe_load(file)

group_config = config["group_paths"].get(group, config["group_paths"].get("default"))

scratch = group_config["scratch"].format(username=username, group=group)
rds = group_config["rds"].format(username=username)
print(scratch, rds)
return scratch, rds
38 changes: 36 additions & 2 deletions shared/sessionstate.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,46 @@
import os

import streamlit as st
import yaml


def ss_set(key, value):
st.session_state[key] = value


def ss_get(key, default=""):
def ss_get(key, default="todrop"):
if key in st.session_state:
return st.session_state[key]
else:
return default
# we should update it to read from ss_defaults
return keys_defaults[key]


def save_in_ss(data_dict):
for key, value in data_dict.items():
ss_set(key, value)


def load_defaults_from_yaml():
file_path = "ss_defaults.yaml"
file_path = os.path.join(os.path.dirname(__file__), file_path)
try:
with open(file_path, "r") as file:
defaults = yaml.safe_load(file)
return defaults.get("keys_defaults", {})
except Exception as e:
print(f"Error loading YAML: {e}")
return {}


def retrieve_all_from_ss():
return {key: ss_get(key, default) for key, default in keys_defaults.items()}


def list_all():
# for key, value in st.session_state.items():
# st.write(f"🔹 **{key}**: {value}")
st.json(st.session_state.to_dict())


keys_defaults = load_defaults_from_yaml()
Loading