Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build_package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
strategy:
matrix:
os: [macos-latest, ubuntu-latest, windows-latest]
python: ["3.9", "3.10"]
python: ["3.11", "3.12"]

steps:
- uses: actions/checkout@v4
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/publish_docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: 3.9
python-version: 3.11

- name: Install pip
run: |
Expand Down
16 changes: 8 additions & 8 deletions .github/workflows/run_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@ jobs:
fail-fast: false
matrix:
include:
- { python-version: "3.10", os: ubuntu-latest, session: "pre-commit" }
- { python-version: "3.10", os: ubuntu-latest, session: "safety" }
- { python-version: "3.10", os: ubuntu-latest, session: "mypy" }
- { python-version: "3.10", os: ubuntu-latest, session: "tests" }
- { python-version: "3.10", os: windows-latest, session: "tests" }
- { python-version: "3.10", os: ubuntu-latest, session: "typeguard" }
- { python-version: "3.10", os: ubuntu-latest, session: "xdoctest" }
- { python-version: "3.10", os: ubuntu-latest, session: "docs-build" }
- { python-version: "3.11", os: ubuntu-latest, session: "pre-commit" }
- { python-version: "3.11", os: ubuntu-latest, session: "safety" }
- { python-version: "3.11", os: ubuntu-latest, session: "mypy" }
- { python-version: "3.11", os: ubuntu-latest, session: "tests" }
- { python-version: "3.11", os: windows-latest, session: "tests" }
- { python-version: "3.11", os: ubuntu-latest, session: "typeguard" }
- { python-version: "3.11", os: ubuntu-latest, session: "xdoctest" }
- { python-version: "3.11", os: ubuntu-latest, session: "docs-build" }

env:
NOXSESSION: ${{ matrix.session }}
Expand Down
29 changes: 0 additions & 29 deletions docs/API.rst

This file was deleted.

8 changes: 3 additions & 5 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@

from jinja2.defaults import DEFAULT_FILTERS

import drevalpy

sys.path.insert(0, os.path.abspath("../"))
sys.path.insert(0, os.path.abspath(".."))


# -- General configuration ---------------------------------------------
Expand Down Expand Up @@ -58,9 +56,9 @@
# the built documents.
#
# The short X.Y version.
version = drevalpy.__version__
version = "1.0.10"
# The full version, including alpha/beta/rc tags.
release = drevalpy.__version__
release = "1.0.10"

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
9 changes: 4 additions & 5 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
sphinx-autobuild==2024.10.3 ; python_version >= "3.9" and python_full_version <= "3.13.0"
sphinx-autodoc-typehints==2.3.0 ; python_version >= "3.9" and python_full_version <= "3.13.0"
sphinx-click==6.0.0 ; python_version >= "3.9" and python_full_version <= "3.13.0"
sphinx-rtd-theme==3.0.2 ; python_version >= "3.9" and python_full_version <= "3.13.0"
-e .
sphinx-autobuild==2024.10.3 ; python_version >= "3.11" and python_version < "3.13"
sphinx-autodoc-typehints==2.5.0 ; python_version >= "3.11" and python_version < "3.13"
sphinx-click==6.0.0 ; python_version >= "3.11" and python_version < "3.13"
sphinx-rtd-theme==3.0.2 ; python_version >= "3.11" and python_version < "3.13"
186 changes: 186 additions & 0 deletions drevalpy/datasets/curvecurator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
"""Contains all function required for CurveCurator fitting."""

import subprocess
from pathlib import Path

import numpy as np
import pandas as pd
import toml

from ..pipeline_function import pipeline_function


def _prepare_raw_data(curve_df: pd.DataFrame, output_dir: str | Path):
required_columns = ["dose", "response", "sample", "drug"]
if not all([col in curve_df.columns for col in required_columns]):
raise ValueError("Missing columns in viability data. Required columns are {required_columns}.")
if "replicate" in curve_df.columns:
required_columns.append("replicate")
curve_df = curve_df[required_columns]
n_replicates = 1
conc_columns = ["dose"]
has_multicol_index = False
if "replicate" in curve_df.columns:
n_replicates = curve_df["replicate"].nunique()
conc_columns.append("replicate")
has_multicol_index = True

df = curve_df.pivot(index=["sample", "drug"], columns=conc_columns, values="response")

for i in range(n_replicates):
df.insert(0, (0.0, n_replicates - i), 1.0)

concentrations = df.columns.sort_values()
df = df[concentrations]

experiments = np.arange(df.shape[1])
df.insert(0, "Name", df.index.map(lambda x: f"{x[0]}|{x[1]}"))
df.columns = ["Name"] + [f"Raw {i}" for i in experiments]

curvecurator_folder = Path(output_dir)
curvecurator_folder.mkdir(exist_ok=True, parents=True)
df.to_csv(curvecurator_folder / "curvecurator_input.tsv", sep="\t", index=False)

if has_multicol_index:
doses = [pair[0] for pair in concentrations]
else:
doses = concentrations.to_list()
return len(experiments), doses, n_replicates, len(df)


def _prepare_toml(filename: str, n_exp: int, n_replicates: int, doses: list[float], dataset_name: str, cores: int):
config = {
"Meta": {
"id": filename,
"description": dataset_name,
"condition": "drug",
"treatment_time": "72 h",
},
"Experiment": {
"experiments": range(n_exp),
"doses": doses,
"dose_scale": "1e-06",
"dose_unit": "M",
"control_experiment": [i for i in range(n_replicates)],
"measurement_type": "OTHER",
"data_type": "OTHER",
"search_engine": "OTHER",
"search_engine_version": "0",
},
"Paths": {
"input_file": "curvecurator_input.tsv",
"curves_file": "curves.txt",
"normalization_file": "norm.txt",
"mad_file": "mad.txt",
"dashboard": "dashboard.html",
},
"Processing": {
"available_cores": cores,
"max_missing": max(len(doses) - 5, 0),
"imputation": False,
"normalization": False,
},
"Curve Fit": {
"type": "OLS",
"speed": "exhaustive",
"max_iterations": 1000,
"interpolation": False,
"control_fold_change": True,
},
"F Statistic": {
"optimized_dofs": True,
"alpha": 0.05,
"fc_lim": 0.45,
},
}
return config


def _exec_curvecurator(output_dir: Path):
command = ["CurveCurator", str(output_dir / "config.toml"), "--mad"]
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
process.communicate()


@pipeline_function
def preprocess(input_file: str | Path, output_dir: str | Path, dataset_name: str, cores: int):
"""
Preprocess raw viability data and create config.toml for use with CurveCurator.

:param input_file: Path to csv file containing the raw viability data
:param output_dir: Path to store all the files to, including the preprocessed data, the config.toml
for CurveCurator, CurveCurator's output files, and the postprocessed data
:param dataset_name: Name of the dataset
:param cores: The number of cores to be used for fitting the curves using CurveCurator.
This parameter is written into the config.toml, but it is min of the number of curves to fit
and the number given (min(n_curves, cores))
"""
input_file = Path(input_file)
output_dir = Path(output_dir)
curve_df = pd.read_csv(input_file)

n_exp, doses, n_replicates, n_curves_to_fit = _prepare_raw_data(curve_df, output_dir)
cores = min(n_curves_to_fit, cores)

config = _prepare_toml(input_file.name, n_exp, n_replicates, doses, dataset_name, cores)
with open(output_dir / "config.toml", "w") as f:
toml.dump(config, f)


@pipeline_function
def postprocess(output_folder: str | Path, dataset_name: str):
"""
Postprocess CurveCurator output file.

This function reads the curves.txt file created by CurveCurator, which contains the
fitted curve parameters and postprocesses it to be used by drevalpy.

:param output_folder: Path to the output folder of CurveCurator containin the curves.txt file.
:param dataset_name: The name of the dataset, will be used to prepend the postprocessed <dataset_name>.csv file
"""
output_folder = Path(output_folder)
required_columns = {
"Name": "Name",
"pEC50": "response",
"pEC50 Error": "pEC50Error",
"Curve Slope": "Slope",
"Curve Front": "Front",
"Curve Back": "Back",
"Curve Fold Change": "FoldChange",
"Curve AUC": "AUC",
"Curve R2": "R2",
"Curve P_Value": "pValue",
"Curve Relevance Score": "RelevanceScore",
"Curve F_Value": "fValue",
"Curve Log P_Value": "negLog10pValue",
"Signal Quality": "SignalQuality",
"Curve RMSE": "RMSE",
"Curve F_Value SAM Corrected": "fValueSAMCorrected",
"Curve Regulation": "Regulation",
}
fitted_curve_data = pd.read_csv(Path(output_folder) / "curves.txt", sep="\t", usecols=required_columns).rename(
columns=required_columns
)
fitted_curve_data[["cell_line_id", "drug_id"]] = fitted_curve_data.Name.str.split("|", expand=True)
fitted_curve_data.to_csv(output_folder / f"{dataset_name}.csv", index=None)


def fit_curves(input_file: str | Path, output_dir: str | Path, dataset_name: str, cores: int):
"""
Fit curves for provided raw viability data.

This functions reads viability data in a predefined input format, preprocesses the data
to be readable by CurveCurator, fits curves to the data using CurveCurator, and postprocesses
the fitted data to a format required by drevalpy.

:param input_file: Path to the file containing the raw viability data
:param output_dir: Path to store all the files to, including the preprocessed data, the config.toml
for CurveCurator, CurveCurator's output files, and the postprocessed data
:param dataset_name: The name of the dataset, will be used to prepend the postprocessed <dataset_name>.csv file
:param cores: The number of cores to be used for fitting the curves using CurveCurator.
This parameter is written into the config.toml, but it is min of the number of curves to fit
and the number given (min(n_curves, cores))
"""
preprocess(input_file, output_dir, dataset_name, cores)
_exec_curvecurator(Path(output_dir))
postprocess(output_dir, dataset_name)
23 changes: 19 additions & 4 deletions drevalpy/datasets/loader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Contains functions to load the GDSC1, GDSC2, CCLE, and Toy datasets."""

import os
from pathlib import Path
from typing import Callable

import pandas as pd
Expand Down Expand Up @@ -92,6 +93,16 @@ def load_toy(path_data: str = "data") -> DrugResponseDataset:
)


def load_custom(path_data: str | Path) -> DrugResponseDataset:
"""
Load custom dataset.

:param path_data: Path to location of custom dataset
:return: DrugResponseDataset containing response, cell line IDs, and drug IDs
"""
return DrugResponseDataset.from_csv(path_data)


AVAILABLE_DATASETS: dict[str, Callable] = {
"GDSC1": load_gdsc1,
"GDSC2": load_gdsc2,
Expand All @@ -105,12 +116,16 @@ def load_dataset(dataset_name: str, path_data: str = "data") -> DrugResponseData
"""
Load a dataset based on the dataset name.

:param dataset_name: The name of the dataset to load ('GDSC1', 'GDSC2', 'CCLE', or 'Toy_Data').
:param dataset_name: The name of the dataset to load. Can be one of ('GDSC1', 'GDSC2', 'CCLE', or 'Toy_Data')
to download provided datasets, or any other name, to allow for custom datasets. In that case, the following
file has to exist: <path_data>/<dataset_name>.csv.
:param path_data: The path to the dataset.
:return: A DrugResponseDataset containing response, cell line IDs, drug IDs, and dataset name.
:raises ValueError: If the dataset name is unknown.
:raises FileNotFoundError: If the custom dataset could not be found at the given path.
"""
if dataset_name in AVAILABLE_DATASETS:
return AVAILABLE_DATASETS[dataset_name](path_data) # type: ignore
else:
raise ValueError(f"Unknown dataset name: {dataset_name}")
custom_path = Path(path_data) / dataset_name / f"{dataset_name}.csv"
if custom_path.is_file():
return load_custom(custom_path)
raise FileNotFoundError(f"Custom dataset does not exist at given path: {custom_path}")
8 changes: 4 additions & 4 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
sys.exit(1)

package = "drevalpy"
python_versions = ["3.10", "3.11"]
python_versions = ["3.11", "3.12"]
nox.options.sessions = (
"pre-commit",
"safety",
Expand Down Expand Up @@ -147,7 +147,7 @@ def mypy(session: Session) -> None:
"""
args = session.posargs or ["drevalpy", "tests", "docs/conf.py"]
session.install(".")
session.install("mypy", "pytest", "types-requests", "types-attrs", "types-PyYAML")
session.install("mypy", "pytest", "types-requests", "types-attrs", "types-PyYAML", "types-toml")
session.run("mypy", *args)


Expand All @@ -158,7 +158,7 @@ def tests(session: Session) -> None:

:param session: The Session object.
"""
session.install(".")
session.install(".[fit]")
session.install("coverage[toml]", "pytest", "pygments")
try:
session.run("coverage", "run", "--parallel", "-m", "pytest", *session.posargs)
Expand Down Expand Up @@ -194,7 +194,7 @@ def typeguard(session: Session) -> None:

:param session: The Session object.
"""
session.install(".")
session.install(".[fit]")
session.install("pytest", "typeguard", "pygments")
session.run("pytest", f"--typeguard-packages={package}", *session.posargs)

Expand Down
Loading