pythonhealthdatascience
diff --git a/‎.flake8‎
Lines changed: 7 additions & 1 deletion b/‎.flake8‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎.lintr‎
Lines changed: 27 additions & 1 deletion b/‎.lintr‎
Lines changed: 27 additions & 1 deletion
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 14 additions & 0 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎.pre-commit-hooks/check-no-quarto-r-include.sh‎
Lines changed: 21 additions & 0 deletions b/‎.pre-commit-hooks/check-no-quarto-r-include.sh‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎.pylintrc‎
Lines changed: 10 additions & 1 deletion b/‎.pylintrc‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 18 additions & 0 deletions b/‎README.md‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎environment.yaml‎
Lines changed: 1 addition & 0 deletions b/‎environment.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎lint.sh‎
Lines changed: 10 additions & 8 deletions b/‎lint.sh‎
Lines changed: 10 additions & 8 deletions
diff --git a/‎pages/inputs/input_data.qmd‎
Lines changed: 19 additions & 19 deletions b/‎pages/inputs/input_data.qmd‎
Lines changed: 19 additions & 19 deletions
@@ -1,5 +1,11 @@
 [flake8]
 per-file-ignores =
     docstrings.py: F811
+    outputs.py: F811
+    parallel.py: F401
     parameters_file.py: E402,F811,E0102
-    parameters_validation.py: F821
+    parameters_validation.py: F821
+    replications.py: F401,F811,F821
+    pages/output_analysis/outputs_resources/*.py:E261,E262,F821
+    pages/output_analysis/replications_resources/*.py:E261,E262,F821
+    pages/inputs/parameters_validation_resources/ParamClass.py: C0103
@@ -1,3 +1,29 @@
 linters: all_linters(packages = "lintr", undesirable_function_linter = NULL)
 encoding: "UTF-8"
-exclusions: list("renv")
+exclusions: list(
+    "pages/inputs/parameters_validation.qmd" = list(
+        object_usage_linter = 771:772
+      ),
+    "pages/output_analysis/n_reps.qmd" = list(
+        unused_import_linter = Inf,
+        object_usage_linter = Inf
+      ),
+    "pages/output_analysis/outputs.qmd" = list(
+        one_call_pipe_linter = 898,
+        line_length_linter = 2828
+      ),
+    "pages/output_analysis/parallel.qmd" = list(
+        one_call_pipe_linter = 812
+      ),
+    "pages/output_analysis/outputs_resources/model.R" = list(
+        object_usage_linter = Inf
+      ),
+    "pages/output_analysis/replications.qmd" = list(
+        unused_import_linter = Inf
+      ),
+    "pages/output_analysis/replications_resources" = list(
+        object_usage_linter = Inf
+      ),
+    "pages/style_docs/linting_resources/code.R",
+    "renv"
+  )
@@ -0,0 +1,14 @@
+repos:
+  - repo: local
+    hooks:
+      - id: quarto-r-include-check
+        name: Block R Quarto file includes which break lintr
+        entry: .pre-commit-hooks/check-no-quarto-r-include.sh
+        language: script
+        files: \.qmd$
+  - repo: https://github.com/lorenzwalthert/precommit
+    rev: v0.4.3
+    hooks:
+      - id: lintr
+        args: [--warn_only]
+        verbose: true
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+# Find staged .qmd files
+FILES=$(git diff --cached --name-only | grep '\.qmd$')
+ERROR=0
+
+for FILE in $FILES; do
+  # Detect presence of Quarto include lines
+  if grep -q '{{< *include *.*\.R *>}}' "$FILE"; then
+    echo "ERROR: $FILE contains '{{< include ... .R >}}'."
+    echo "Please use '#| file: filename.R' in code chunk options instead."
+    ERROR=1
+  fi
+done
+
+if [ $ERROR -eq 1 ]; then
+  echo "Commit blocked: Replace '{{< include ... .R >}}' with Quarto chunk option '#| file: filename.R'."
+  exit 1
+fi
+
+exit 0
@@ -2,4 +2,13 @@
 max-line-length=79
 
 [MESSAGES CONTROL]
-disable=too-many-lines
+disable =
+    duplicate-code,
+    function-redefined,
+    missing-module-docstring,
+    redefined-outer-name,
+    too-few-public-methods,
+    too-many-arguments,
+    too-many-instance-attributes,
+    too-many-lines,
+    too-many-positional-arguments
@@ -154,6 +154,24 @@ Note: inactive code (i.e. code that does not get run when building the book) wil
 
 <br>
 
+## Pre-commit
+
+To activate the pre-commit hook...
+
+1. Make the bash script executable - from command line, run:
+
+```{.bash}
+chmod +x .pre-commit-hooks/check-no-quarto-r-include.sh
+```
+
+2. Run the following from your python environment on the command line:
+
+```{.python}
+pre-commit install
+```
+
+<br>
+
 ## Funding
 
 This project is supported by the Medical Research Council [grant number [MR/Z503915/1](https://gtr.ukri.org/projects?ref=MR%2FZ503915%2F1)].
@@ -9,6 +9,7 @@ dependencies:
   - pandas=2.3.1
   - plotly=6.3.0
   - pip
+  - pre-commit=4.3.0
   - pylint=3.3.7
   - pytest=8.4.1
   - python=3.11
 
@@ -6,17 +6,16 @@ print_section() {
     echo "--------------------------------------------------------------------"
 }
 
-# Note: I have used ```{r} #| file: file.R``` instead of
-# ```{r}{{< include file.R >}}```, and likewise for python, as the latter
-# breaks lintr (false positive messages, and missing other messages) and breaks
-# pylint (returns an error Parsing failed: 'invalid syntax'). It doesn't break
+# Note: For R, I have used ```{r} #| file: file.R``` instead of
+# ```{r}{{< include file.R >}}```, as the latter breaks lintr (false positive
+# messages, and missing other messages) and breaks. It doesn't break
 # if used in non-active code chunks as linters ignore those.
 
 print_section "R" "index.qmd"
 Rscript -e 'lintr::lint("index.qmd")'
 
 print_section "R" "pages/"
-Rscript -e 'lintr::lint_dir("pages", exclusions = list("style_docs/linting_resources/code.R"))'
+Rscript -e 'lintr::lint_dir("pages")'
 
 print_section "R" "tests/"
 Rscript -e 'lintr::lint_dir("tests")'
@@ -26,6 +25,9 @@ echo "--------------------------------------------------------------------"
 print_section "python" "index.qmd and pages/"
 lintquarto -l pylint flake8 -p index.qmd pages/
 
-print_section "python" "tests/"
-pylint pages tests --ignore=linting_resources
-flake8 pages tests --exclude linting_resources
+print_section "python" "pages/ and tests/"
+
+pylint pages tests --ignore=linting_resources,outputs_resources,replications_resources
+pylint pages/output_analysis/outputs_resources pages/output_analysis/replications_resources --disable=missing-module-docstring,undefined-variable
+
+flake8 pages tests --exclude linting_resources,replications_resources
@@ -19,7 +19,7 @@ title: Input data management
 
 :::
 
-## 🧾 Input data
+## Input data
 
 When managing input data in your RAP, there are three key files:
 
@@ -29,7 +29,7 @@ When managing input data in your RAP, there are three key files:
 
 ![](input_data_resources/input_files.png)
 
-## 📦 What is included in a RAP?
+## What is included in a RAP?
 
 Your reproducible analytical pipeline (RAP) should begin with the **earliest data you access**. This could be:
 
@@ -42,7 +42,7 @@ Keep in mind that, especially in sensitive areas like healthcare, you may not be
 
 > **Why is this important?** By starting at the source, you make your work transparent and easy to repeat. For instance, if new raw data becomes available, it's important you have your input modelling code so that you can check your distributions are still appropriate, re-estimate your model parameters, and re-run your analysis.
 
-## 🗃️ Raw data
+## Raw data
 
 This is data which reflects system you will be simulating. It is used to estimate parameters and fit distributions for your simulation model. For example:
 
@@ -57,11 +57,11 @@ This is data which reflects system you will be simulating. It is used to estimat
 
 :::
 
-### 📋 Checklist: Managing your raw data
+### Checklist: Managing your raw data
 
 :::{.cream}
 
-🗂️ **Always**
+**Always**
 
 * **Keep copies of your raw data**<br>Or, if you can't export it, document how to access it (e.g. database location, required permissions).
 
@@ -71,7 +71,7 @@ This is data which reflects system you will be simulating. It is used to estimat
 
 <br>
 
-🔓 **If you can share the data:**
+**If you can share the data:**
 
 * **Make the data openly available**<br>Follow the [FAIR principles]((https://open-science-training-handbook.github.io/Open-Science-Training-Handbook_EN/02OpenScienceBasics/02OpenResearchDataAndMaterials.html)): Findable, Accessible, Interoperable, Reusable.
 
@@ -83,7 +83,7 @@ This is data which reflects system you will be simulating. It is used to estimat
 
 <br>
 
-🔒 **If you cannot share the data:**
+**If you cannot share the data:**
 
 * **Describe the dataset**<br>Include details in your documentation.
 
@@ -141,23 +141,23 @@ Some recommendations for generalist repositories are available:
 
 Instructions for Zenodo archiving are provided on our [sharing and archiving](../sharing/archive.qmd) page.
 
-## 📜 Input modelling code
+## Input modelling code
 
 [Input modelling code](input_modelling.qmd#input-modelling) refers to the scripts used to define and fit the statistical distributions that represent the uncertain inputs for a simulation model.
 
 These scripts are often not shared, but are an essential part of your simulation RAP. Sharing them ensures transparency in how distributions were chosen and allows you (or others) to re-run the process if new data or assumptions arise.
 
-### 📋 Checklist: Managing your input modelling code
+### Checklist: Managing your input modelling code
 
 :::{.cream}
 
-🔓 **If you can share the code:**
+**If you can share the code:**
 
 * **Include the input modelling code in your repository**<br>Store it alongside your simulation code and other relevant scripts.
 
 <br>
 
-🔒 **If you cannot share the code:**
+**If you cannot share the code:**
 
 * **For internal use:**
   * Store the code securely and ensure it is accessible to your team or organisation - avoid saving it only on a personal device.
@@ -168,15 +168,15 @@ These scripts are often not shared, but are an essential part of your simulation
 
 :::
 
-## ⚙️ Parameters
+## Parameters
 
 Parameters are the numerical values used in your model, like the arrival rates, service times or probabilities.
 
-### 📋 Checklist: Managing your parameters
+### Checklist: Managing your parameters
 
 :::{.cream}
 
-🗂️ **Always**
+**Always**
 
 * **Keep a structured parameter file**<br>Store all model parameters in a clearly structured format like a [CSV file](parameters_file.qmd) or a [script](parameters_script.qmd).
 
@@ -186,15 +186,15 @@ Parameters are the numerical values used in your model, like the arrival rates,
 
 <br>
 
-🔓 **If you can share the parameters:**
+**If you can share the parameters:**
 
 * **Include parameter files in your repository**<br>Store parameter files alongside your model code and documentation.
 
 <br>
 
 You must share some parameters with your model so that it is possible for others to run it. Parameters are often less sensitive than raw data, so sharing is usually possible. However-
 
-🔒 **If you cannot share the parameters:**
+**If you cannot share the parameters:**
 
 * **Provide synthetic parameters**<br>Supply artifical values for each parameter, clearly labelled as synthetic.
 
@@ -204,7 +204,7 @@ You must share some parameters with your model so that it is possible for others
 
 :::
 
-## 🔐 Maintaining a private and public version of your model
+## Maintaining a private and public version of your model
 
 It's common to have data and/or code that cannot be shared publicly. **Both your private and public components should be [version controlled](../setup/version.qmd)**, but you cannot split a single GitHub repository into public and private sections. The suggested solution is to use two separate repositories: **one public, one private**.
 
@@ -235,7 +235,7 @@ The way you might set these up depends on whether you are allowed to share the r
   3. Use the shared simulation package in both repositories.
   4. Run and share the full workflow in public with synthetic parameters; run the actual analysis in private with the real parameters.
 
-## 🧪 Test yourself
+## Test yourself
 
 ```{r}
 #| echo: false
@@ -293,7 +293,7 @@ cat(longmcq(c(
 
 :::
 
-## 📎 Further information
+## Further information
 
 * ["How to Make a Data Dictionary"](https://help.osf.io/article/217-how-to-make-a-data-dictionary) from OSF Support.