Skip to content

Commit 43ea964

Browse files
authored
Reproducibility for DOI 10.5281/zenodo.6853185 (#9)
* Bug fixes and template logic changes * DOI 10.5281/zenodo.6853185
1 parent a1113bb commit 43ea964

File tree

11 files changed

+342
-11
lines changed

11 files changed

+342
-11
lines changed

10-5281_zenodo-6853067/Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Set global arguments
2-
ARG JAMMIES_VER=0.4.3
2+
ARG JAMMIES_VER=0.4.5
33

44
# Get and patch project for working directory
55
FROM python:3.11.2-alpine3.17 as projects
@@ -22,7 +22,7 @@ RUN apk add git
2222

2323
## Install jammies and run
2424
RUN python3 -m pip install "jammies[all]==${JAMMIES_VER}"
25-
RUN jammies patch src
25+
RUN jammies patch src -y
2626

2727
# Setup project specific info
2828
FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04

10-5281_zenodo-6853067/project_metadata.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@
7979
},
8080
"https://github.com/bpaassen/sparfae": {
8181
"name": "#github",
82-
"license": "#gpl3later",
8382

8483
"tags": [
8584
{
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
**/clean
2+
**/env
3+
**/src
4+
**/.dockerignore
5+
**/Dockerfile*
6+
**/README.md
7+
**/instructions.md
8+
**/issues.md

10-5281_zenodo-6853185/.gitignore

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Ignore clean and src directories
2+
/clean
3+
/src
4+
5+
# Ignore environments
6+
/env
7+
8+
# Ignore IDEs
9+
.vscode
10+
11+
# Ignore caches
12+
__pycache__
13+
.jammies.toml

10-5281_zenodo-6853185/Dockerfile

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# Set global arguments
2+
ARG JAMMIES_VER=0.4.5
3+
4+
# Get and patch project for working directory
5+
FROM python:3.11.2-alpine3.17 as projects
6+
7+
## Set local arguments
8+
ARG JAMMIES_VER
9+
10+
## Keeps Python from generating .pyc files in the container
11+
ENV PYTHONDONTWRITEBYTECODE=1
12+
13+
## Turns off buffering for easier container logging
14+
ENV PYTHONUNBUFFERED=1
15+
16+
## Copy files to directory
17+
COPY . ./
18+
19+
## Add git to alpine to pull necessary repositories
20+
RUN apk update
21+
RUN apk add git
22+
23+
## Install jammies and run
24+
RUN python3 -m pip install "jammies[all]==${JAMMIES_VER}"
25+
RUN jammies patch src -y
26+
27+
# Setup Java runtime via jlink
28+
FROM eclipse-temurin:17.0.8_7-jdk-jammy as java
29+
30+
# Create custom runtime
31+
RUN $JAVA_HOME/bin/jlink \
32+
--add-modules ALL-MODULE-PATH \
33+
--strip-debug \
34+
--no-man-pages \
35+
--no-header-files \
36+
--compress=2 \
37+
--output /javaruntime
38+
39+
# Setup project specific info
40+
FROM python:3.11.4-bookworm
41+
42+
## Keeps Python from generating .pyc files in the container
43+
ENV PYTHONDONTWRITEBYTECODE=1
44+
45+
## Turns off buffering for easier container logging
46+
ENV PYTHONUNBUFFERED=1
47+
48+
## Copy Java runtime over
49+
ENV JAVA_HOME=/opt/java/openjdk
50+
ENV PATH $JAVA_HOME/bin:$PATH
51+
COPY --from=java /javaruntime $JAVA_HOME
52+
53+
## Copy project files from previous stage here
54+
RUN mkdir /src
55+
COPY --from=projects /src /src
56+
WORKDIR /src
57+
58+
## Setup python
59+
RUN python3 -m pip install .
60+
61+
## Setup script run
62+
CMD [ "python3", "./helper_code/models/regression/train_rf_regression_full_cv.py", "--training-data-filepath", "./VLE_datasets/v1/VLE_12k_dataset_v1.csv", "--output-dir", "./results" ]

10-5281_zenodo-6853185/README.md

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
# [Can Population-based Engagement Improve Personalisation? A Novel Dataset and Experiment](https://doi.org/10.5281/zenodo.6853185)
2+
3+
![Not at All Reproducible](https://img.shields.io/badge/Status-Not%20at%20All%20Reproducible-red)
4+
5+
This is a project constructor for the paper [*Can Population-based Engagement Improve Personalisation? A Novel Dataset and Experiment*](https://doi.org/10.5281/zenodo.6853185) by Sahan Bulathwela, Meghana Verma, [María Pérez-Ortiz](https://orcid.org/0000-0003-1302-6093), [Emine Yilmaz](https://orcid.org/0000-0003-4734-4532), [John Shawe-Taylor](https://orcid.org/0000-0002-2030-0073).
6+
7+
### Associated Metadata
8+
9+
#### Tested Systems
10+
11+
![Debian: bullseye (11) | bookworm (12)](https://img.shields.io/badge/Debian-bullseye%20%2811%29%20%7C%20bookworm%20%2812%29-informational)
12+
![Docker NVIDIA: 20.10 | 23.0](https://img.shields.io/badge/Docker%20NVIDIA-20.10%20%7C%2023.0-informational)
13+
14+
#### Languages
15+
![java: 17.0.8](https://img.shields.io/badge/java-17.0.8-informational)
16+
![Python: 3.11.2 | 3.11.4](https://img.shields.io/badge/Python-3.11.2%20%7C%203.11.4-informational)
17+
18+
#### Resources
19+
20+
* [Can Population-based Engagement Improve Personalisation? A Novel Dataset and Experiment](https://doi.org/10.5281/zenodo.6853185) (Public)
21+
* Contains paper under [CC-BY-4.0](https://creativecommons.org/licenses/by/4.0/)
22+
* [GitHub](https://github.com/sahanbull/VLE-Dataset) (Public)
23+
* Contains data under ARR
24+
* Contains materials under ARR
25+
26+
## Project Files
27+
28+
The constructor downloads the following files:
29+
* [Cloned GitHub](https://github.com/ahaim5357/VLE-Dataset) under ARR
30+
31+
## Setup Instructions
32+
33+
### Method 1: Docker
34+
35+
This project contains the necessary files needed to setup a [docker container][docker]. Make sure you have Docker installed before attempting anything below.
36+
37+
To build the docker container, navigate to this directory and run the following command:
38+
39+
```sh
40+
docker build -t <image_name> .
41+
```
42+
43+
`image_name` should be replaced with whatever name you would like to refer to the docker container as. It will take around 30 minutes to an hour to build the image.
44+
45+
From there, you can load into the terminal via:
46+
47+
```sh
48+
docker run --rm -itv <local_directory>:/volume <image_name> sh
49+
```
50+
51+
A `volume` directory will be created within the image which will link to the `local_directory` specified. You can specify the current directory of execution via `${PWD}`.
52+
53+
> We are loading into the terminal instead of into Python to copy any generated figures onto the local machine as they cannot otherwise be easily viewed.
54+
55+
Once in the docker terminal, you can run the Python script via:
56+
57+
```sh
58+
python3 ./helper_code/models/regression/train_rf_regression_full_cv.py --training-data-filepath VLE_datasets/v1/VLE_12k_dataset_v1.csv --output-dir ./results
59+
```
60+
61+
You can look through the terminal output and compare the numbers within the paper. To view the figures on the local machine, you can copy them to the volume via:
62+
63+
```sh
64+
cp -R ./results /volume
65+
```
66+
67+
## Method 2: Local Setup
68+
69+
This project uses the Python package `jammies[all]` to setup and fix any issues in the codebase. For instructions on how to download and generate the project from this directory, see the [`jammies`][jammies] repository.
70+
71+
You will also need a version of [Java][java] to run Spark, as consumed by the codebase. Any version of Java 8+ will work, though this setup guide recommends using the latest LTS, which is 17 as of the writing of this guide.
72+
73+
Spark also takes advantage of [Apache Hadoop][hadoop], but this is not necessary to run the codebase, nor does it affect the outcomes, so it will not be used in this guide.
74+
75+
The following instructions have been reproduced using [Python][python] 3.11.4. This project does not make any guarantees that this will work outside of the specified version. Make sure you have Python, along with gcc for Cython, before attempting anything below.
76+
77+
First, you will need to navigate to the generated `src` directory. You will need to install the required dependencies into the global Python instance or a virtual environment via:
78+
79+
```sh
80+
python3 -m pip install .
81+
```
82+
83+
> `python3` is replaced with `py` on Windows machines. Additionally, the `python3 -m` prefix is unnecessary if `pip` is properly added to the path.
84+
85+
After installing the required dependencies, run the Python script via:
86+
87+
```sh
88+
python3 ./helper_code/models/regression/train_rf_regression_full_cv.py --training-data-filepath VLE_datasets/v1/VLE_12k_dataset_v1.csv --output-dir ./results
89+
```
90+
91+
You can look through the `results` directory and compare the numbers within the paper.
92+
93+
[docker]: https://www.docker.com/
94+
[jammies]: https://github.com/ahaim5357/jammies
95+
[java]: https://adoptium.net/temurin/releases/?version=17
96+
[hadoop]: http://apache.github.io/hadoop/
97+
[python]: https://www.python.org/
98+
99+
## Issues
100+
101+
None of the results generated match anything reported in the papers. The `results.csv` generated reports the RMSE, but not for the 12k results, so while the code may work, no direct correlation can be interpreted from the results in the paper.
102+
103+
As such, no consistent results are reported in the paper.
104+
105+
*[ARR]: All Rights Reserved
106+
*[Cloned GitHub]: Cloned GitHub Repository
107+
*[GitHub]: GitHub Repository
108+
*[CC-BY-4.0]: Creative Commons Attribution 4.0 International
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
## Setup Instructions
2+
3+
### Method 1: Docker
4+
5+
This project contains the necessary files needed to setup a [docker container][docker]. Make sure you have Docker installed before attempting anything below.
6+
7+
To build the docker container, navigate to this directory and run the following command:
8+
9+
```sh
10+
docker build -t <image_name> .
11+
```
12+
13+
`image_name` should be replaced with whatever name you would like to refer to the docker container as. It will take around 30 minutes to an hour to build the image.
14+
15+
From there, you can load into the terminal via:
16+
17+
```sh
18+
docker run --rm -itv <local_directory>:/volume <image_name> sh
19+
```
20+
21+
A `volume` directory will be created within the image which will link to the `local_directory` specified. You can specify the current directory of execution via `${PWD}`.
22+
23+
> We are loading into the terminal instead of into Python to copy any generated figures onto the local machine as they cannot otherwise be easily viewed.
24+
25+
Once in the docker terminal, you can run the Python script via:
26+
27+
```sh
28+
python3 ./helper_code/models/regression/train_rf_regression_full_cv.py --training-data-filepath VLE_datasets/v1/VLE_12k_dataset_v1.csv --output-dir ./results
29+
```
30+
31+
You can look through the terminal output and compare the numbers within the paper. To view the figures on the local machine, you can copy them to the volume via:
32+
33+
```sh
34+
cp -R ./results /volume
35+
```
36+
37+
## Method 2: Local Setup
38+
39+
This project uses the Python package `jammies[all]` to setup and fix any issues in the codebase. For instructions on how to download and generate the project from this directory, see the [`jammies`][jammies] repository.
40+
41+
You will also need a version of [Java][java] to run Spark, as consumed by the codebase. Any version of Java 8+ will work, though this setup guide recommends using the latest LTS, which is 17 as of the writing of this guide.
42+
43+
Spark also takes advantage of [Apache Hadoop][hadoop], but this is not necessary to run the codebase, nor does it affect the outcomes, so it will not be used in this guide.
44+
45+
The following instructions have been reproduced using [Python][python] 3.11.4. This project does not make any guarantees that this will work outside of the specified version. Make sure you have Python, along with gcc for Cython, before attempting anything below.
46+
47+
First, you will need to navigate to the generated `src` directory. You will need to install the required dependencies into the global Python instance or a virtual environment via:
48+
49+
```sh
50+
python3 -m pip install .
51+
```
52+
53+
> `python3` is replaced with `py` on Windows machines. Additionally, the `python3 -m` prefix is unnecessary if `pip` is properly added to the path.
54+
55+
After installing the required dependencies, run the Python script via:
56+
57+
```sh
58+
python3 ./helper_code/models/regression/train_rf_regression_full_cv.py --training-data-filepath VLE_datasets/v1/VLE_12k_dataset_v1.csv --output-dir ./results
59+
```
60+
61+
You can look through the `results` directory and compare the numbers within the paper.
62+
63+
[docker]: https://www.docker.com/
64+
[jammies]: https://github.com/ahaim5357/jammies
65+
[java]: https://adoptium.net/temurin/releases/?version=17
66+
[hadoop]: http://apache.github.io/hadoop/
67+
[python]: https://www.python.org/

10-5281_zenodo-6853185/issues.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
## Issues
2+
3+
None of the results generated match anything reported in the papers. The `results.csv` generated reports the RMSE, but not for the 12k results, so while the code may work, no direct correlation can be interpreted from the results in the paper.
4+
5+
As such, no consistent results are reported in the paper.
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
{
2+
"files": [
3+
{
4+
"type": "git",
5+
"name": "Github",
6+
"repository": "https://github.com/ahaim5357/VLE-Dataset.git",
7+
"commit": "6f992d8f5ac4837cc364bfd7421a983976306e89",
8+
"extra": {
9+
"name": "#github_cloned",
10+
"link": "https://github.com/ahaim5357/VLE-Dataset"
11+
}
12+
}
13+
],
14+
"extra": {
15+
"schema_version": 1,
16+
"status": 1,
17+
"systems": {
18+
"_": [
19+
"debian-11",
20+
"debian-12",
21+
"docker-20.10-nvidia",
22+
"docker-23.0-nvidia"
23+
]
24+
},
25+
"languages": {
26+
"python": [
27+
"3.11.2",
28+
"3.11.4"
29+
],
30+
"java": [
31+
"17.0.8"
32+
]
33+
},
34+
"authors": [
35+
"Sahan Bulathwela",
36+
"Meghana Verma",
37+
"https://orcid.org/0000-0003-1302-6093",
38+
"https://orcid.org/0000-0003-4734-4532",
39+
"https://orcid.org/0000-0002-2030-0073"
40+
],
41+
"groups": [
42+
"conference",
43+
"short_paper",
44+
"edm",
45+
"2022"
46+
],
47+
"links": {
48+
"https://doi.org/10.5281/zenodo.6853185": {
49+
"name": "Can Population-based Engagement Improve Personalisation? A Novel Dataset and Experiment",
50+
51+
"tags": [
52+
{
53+
"value": "paper",
54+
"license": "cc4"
55+
}
56+
]
57+
},
58+
"https://github.com/sahanbull/VLE-Dataset": {
59+
"name": "#github",
60+
61+
"tags": [
62+
"data",
63+
"materials"
64+
]
65+
}
66+
}
67+
}
68+
}

templates/python.Dockerfile

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Set global arguments
2-
ARG JAMMIES_VER=0.4.3
2+
ARG JAMMIES_VER=0.4.5
33

44
# Get and patch project for working directory
55
FROM python:3.11.2-alpine3.17 as projects
@@ -22,9 +22,9 @@ RUN apk add git
2222

2323
## Install jammies and run
2424
RUN python3 -m pip install "jammies[all]==${JAMMIES_VER}"
25-
RUN jammies patch src
25+
RUN jammies patch src -y
2626

27-
## Setup project specific info
27+
# Setup project specific info
2828
FROM python:3.11.4-bookworm
2929

3030
## Keeps Python from generating .pyc files in the container
@@ -36,9 +36,10 @@ ENV PYTHONUNBUFFERED=1
3636
## Copy project files from previous stage here
3737
RUN mkdir /src
3838
COPY --from=projects /src /src
39+
WORKDIR /src
3940

4041
## Setup python
41-
RUN python3 -m pip install -r /src/requirements.txt
42+
RUN python3 -m pip install -r requirements.txt
4243

4344
## Setup script run
4445
CMD [ "python3", "<file_name>" ]

0 commit comments

Comments
 (0)