Skip to content

Commit 7b78ef1

Browse files
authored
Merge pull request #40 from ubc-provenance/dev
Fix docker permission error + simplified install
2 parents 8d98c18 + 4f3ca4c commit 7b78ef1

File tree

6 files changed

+120
-75
lines changed

6 files changed

+120
-75
lines changed

.devcontainer/devcontainer.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,16 @@
44
"service": "pids",
55
"workspaceFolder": "/home/pids",
66
"forwardPorts": ["${env:DOCKER_PORT}"],
7-
"remoteUser": "${env:USER_NAME}",
7+
"remoteUser": "pids",
88
"customizations": {
99
"vscode": {
1010
"extensions": [
1111
"ms-python.python",
1212
"ms-vscode-remote.remote-containers",
1313
"nvidia.nsight-vscode-edition", // For CUDA/GPU debugging
1414
"eamodio.gitlens",
15-
"ms-toolsai.jupyter"
15+
"ms-toolsai.jupyter",
16+
"anthropic.claude-code"
1617
]
1718
}
1819
},

.env.local

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,3 @@ INPUT_DIR=./data
22
ARTIFACTS_DIR=./artifacts
33
DOCKER_PORT=8888
44
COMPOSE_PROJECT_NAME=pidsmaker
5-
HOST_UID=1000
6-
HOST_GID=1000
7-
USER_NAME=user

Dockerfile

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,13 @@ RUN apt update && \
1515
ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/
1616
ENV PATH=$JAVA_HOME/bin:$PATH
1717

18-
# installing sudo
19-
RUN apt-get update && apt-get install -y sudo git
18+
# installing sudo, git, gosu (for privilege dropping in entrypoint)
19+
RUN apt-get update && apt-get install -y sudo git && \
20+
set -eux; \
21+
wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/1.17/gosu-amd64"; \
22+
chmod +x /usr/local/bin/gosu; \
23+
gosu --version; \
24+
gosu nobody true
2025

2126
# installing node for Claude Code
2227
RUN apt-get update \
@@ -32,17 +37,19 @@ RUN wget https://repo.anaconda.com/archive/Anaconda3-2023.03-1-Linux-x86_64.sh
3237
RUN bash Anaconda3-2023.03-1-Linux-x86_64.sh -b -p /opt/conda
3338
RUN rm Anaconda3-2023.03-1-Linux-x86_64.sh
3439

35-
ARG USER_ID
36-
ARG GROUP_ID
37-
ARG USER_NAME
38-
RUN groupadd -g ${GROUP_ID} ${USER_NAME} && useradd -u ${USER_ID} -g ${GROUP_ID} -m -s /bin/bash ${USER_NAME}
40+
# Create a fixed container user. The UID/GID will be remapped at runtime
41+
# by entrypoint.sh to match the host user — no build args required.
42+
# Home is kept separate from WORKDIR (/home/pids) to avoid conflicts with
43+
# the bind-mounted project directory.
44+
RUN groupadd -g 1000 pids && \
45+
useradd -u 1000 -g 1000 -m -d /home/user -s /bin/bash pids
3946
WORKDIR /home/pids
4047

4148
ENV PATH="/opt/conda/bin:$PATH"
4249
ENV PATH="/opt/conda/envs/pids/bin:$PATH"
4350
RUN conda create -n pids python=3.9 && \
44-
echo "source /opt/conda/etc/profile.d/conda.sh" >> /home/${USER_NAME}/.bashrc && \
45-
echo "conda activate pids" >> /home/${USER_NAME}/.bashrc
51+
echo "source /opt/conda/etc/profile.d/conda.sh" >> /home/user/.bashrc && \
52+
echo "conda activate pids" >> /home/user/.bashrc
4653
# https://pythonspeed.com/articles/activate-conda-dockerfile/
4754
SHELL ["conda", "run", "-n", "pids", "/bin/bash", "-c"]
4855
# Activate the environment and install dependencies
@@ -69,10 +76,18 @@ RUN pip install pytest==8.3.5 pytest-cov==6.1.1 pre-commit==4.2.0 setuptools==61
6976

7077
COPY . .
7178

72-
# COPY is done by the docker daemon as root, so we need to chown
73-
RUN chown -R ${USER_NAME}:${USER_NAME} /home
74-
USER ${USER_NAME}
75-
79+
# COPY is done by the docker daemon as root, so we need to chown.
80+
# Only chown the project dir and the user home — not /home/artifacts
81+
# which is a runtime volume managed by entrypoint.sh.
82+
RUN chown -R pids:pids /home/pids /home/user
83+
USER pids
7684

7785
RUN [ -f pyproject.toml ] && pip install -e . || echo "No pyproject.toml found, skipping install"
7886
RUN [ -f .pre-commit-config.yaml ] && pre-commit install || echo "No pre-commit found, skipping install"
87+
88+
# Switch back to root so the entrypoint can remap UID/GID and fix permissions
89+
USER root
90+
COPY entrypoint.sh /entrypoint.sh
91+
RUN chmod +x /entrypoint.sh
92+
ENTRYPOINT ["/entrypoint.sh"]
93+
CMD ["bash"]

compose-pidsmaker.yml

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,6 @@ services:
22
pids:
33
build:
44
context: .
5-
args:
6-
USER_ID: ${HOST_UID}
7-
GROUP_ID: ${HOST_GID}
8-
USER_NAME: ${USER_NAME}
95
container_name: ${COMPOSE_PROJECT_NAME}-pids
106
networks:
117
- shared_network
@@ -18,9 +14,8 @@ services:
1814
DB_PASSWORD: postgres
1915
volumes:
2016
- ./:/home/pids
21-
- ${ARTIFACTS_DIR:-/artifacts}:/home/artifacts
17+
- ${ARTIFACTS_DIR:-/home/artifacts}:/home/artifacts
2218
# - /path/to/raw/data:/data
23-
entrypoint: bash
2419
stdin_open: true
2520
tty: true
2621
deploy:

docs/docs/ten-minute-install.md

Lines changed: 56 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -83,65 +83,69 @@ sudo systemctl restart docker
8383
## Load databases
8484
We create two containers: one that runs the postgres database, the other runs the Python env and the pipeline.
8585
86-
1. Set your paths in .env
87-
```sh
88-
cd ..
89-
cp .env.local .env
90-
```
91-
92-
- In `.env`, set `INPUT_DIR` to the `data` folder path. Optionally, set `ARTIFACTS_DIR` to the folder where all generated files will go (multiple GBs).
93-
94-
- Run the following command and set accordingly `HOST_UID`, `HOST_GID` and `USER_NAME` in `.env`.
95-
```sh
96-
echo "HOST_UID=$(id -u)" && echo "HOST_GID=$(id -g)" && echo "USER_NAME=$(whoami)"
97-
```
98-
99-
- Then run:
100-
```sh
101-
source .env
102-
```
103-
104-
- Then create the output artifacts folder if it doesn't exist yet and ensure it is owned by your user.
105-
```sh
106-
mkdir ${ARTIFACTS_DIR} || chown ${USER_NAME} -R ${ARTIFACTS_DIR}
107-
```
108-
109-
2. Build and start the database container up:
110-
```sh
111-
docker compose -p postgres -f compose-postgres.yml up -d --build
112-
```
113-
Note: each time you modify variables in `.env`, update env variables using `source .env` prior to running `docker compose`.
86+
### 1. Set your paths in .env
87+
88+
```sh
89+
cp .env.local .env
90+
```
91+
92+
In `.env`, set `INPUT_DIR` to the `data` folder path. Optionally, set `ARTIFACTS_DIR` to a path where all generated files will go (multiple GBs).
93+
94+
95+
### 2. Build and start the database container up:
96+
97+
```sh
98+
docker compose -p postgres -f compose-postgres.yml up -d --build
99+
```
100+
Note: each time you modify variables in `.env`, update env variables using `source .env` prior to running `docker compose`.
114101
115-
3. In a terminal, get a shell into this container:
116-
```sh
117-
docker compose -p postgres exec postgres bash
118-
```
119-
4. If you have enough space to uncompress all datasets locally (135 GB), run this script to load all databases:
120-
```sh
121-
./scripts/load_dumps.sh
122-
```
123-
If you have limited space and want to load databases one by one, do:
124-
```sh
125-
pg_restore -U postgres -h localhost -p 5432 -d DATASET /data/DATASET.dump
126-
```
127-
!!! note
128-
If you want to parse raw data and create database from scratch, please follow the [guideline](./create-db-from-scratch.md) instead of running the above two commands.
129-
6. Once databases are loaded, we won't need to touch this container anymore:
130-
```sh
131-
exit
132-
```
102+
### 3. Get a shell into the postgres container
103+
104+
```sh
105+
docker compose -p postgres exec postgres bash
106+
```
107+
108+
### 4. Load database dumps
109+
110+
If you have enough space to uncompress all datasets you have downloaded locally in the `data` folder, run this script:
111+
112+
```sh
113+
./scripts/load_dumps.sh
114+
```
115+
116+
If you have limited space and want to load databases one by one, do:
117+
118+
```sh
119+
pg_restore -U postgres -h localhost -p 5432 -d DATASET /data/DATASET.dump
120+
```
121+
122+
!!! note
123+
If you want to parse raw data and create database from scratch, please follow the [guideline](./create-db-from-scratch.md) instead of running the above two commands.
124+
125+
Once databases are loaded, we won't need to touch this container anymore:
126+
127+
```sh
128+
exit
129+
```
133130
134131
## Get into the PIDSMaker container
135132
136133
It is within the `pids` container that coding and experiments take place.
137134
138-
1. For VSCode users, we recommend using the [dev container](https://code.visualstudio.com/docs/devcontainers/create-dev-container) extension to directly open VSCode in the container. To do so, simply install the extension, then ctrl+shift+P and <i>Dev Containers: Open Folder in Container</i>.
135+
### 1. VSCode Devcontainer approach
139136
140-
2. The other alternative is to load the container manually and open a shell directly in your terminal.
141-
```sh
142-
docker compose -f compose-pidsmaker.yml up -d --build
143-
docker compose exec pids bash
144-
```
137+
138+
For VSCode users, we recommend using the [dev container](https://code.visualstudio.com/docs/devcontainers/create-dev-container) extension to directly open VSCode in the container. To do so, simply install the extension, then ctrl+shift+P and <i>Dev Containers: Open Folder in Container</i>.
139+
140+
141+
### 2. Manual approach
142+
143+
The other alternative is to load the container manually and open a shell directly in your terminal.
144+
145+
```sh
146+
docker compose -f compose-pidsmaker.yml up -d --build
147+
docker compose exec pids bash
148+
```
145149
146150
It's in this container that the python env is installed and where the framework will be used.
147151

entrypoint.sh

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#!/bin/bash
2+
set -e
3+
4+
CONTAINER_USER="pids"
5+
CONTAINER_HOME="/home/user"
6+
7+
# Detect the host user's UID/GID from the bind-mounted project directory.
8+
# Bind mounts preserve the host file ownership, so stat gives us the host UID/GID
9+
# without needing any environment variables.
10+
TARGET_UID=$(stat /home/pids -c '%u' 2>/dev/null || echo "1000")
11+
TARGET_GID=$(stat /home/pids -c '%g' 2>/dev/null || echo "1000")
12+
13+
# Safety guard: never remap to root
14+
[ "$TARGET_UID" = "0" ] && TARGET_UID=1000
15+
[ "$TARGET_GID" = "0" ] && TARGET_GID=1000
16+
17+
CURRENT_UID=$(id -u "$CONTAINER_USER")
18+
CURRENT_GID=$(id -g "$CONTAINER_USER")
19+
20+
if [ "$TARGET_UID" != "$CURRENT_UID" ] || [ "$TARGET_GID" != "$CURRENT_GID" ]; then
21+
groupmod -g "$TARGET_GID" "$CONTAINER_USER" 2>/dev/null || true
22+
usermod -u "$TARGET_UID" -g "$TARGET_GID" "$CONTAINER_USER" 2>/dev/null || true
23+
# Re-own the user's home directory (not the bind-mounted project dir)
24+
chown -R "$TARGET_UID:$TARGET_GID" "$CONTAINER_HOME" 2>/dev/null || true
25+
fi
26+
27+
# Ensure the artifacts directory exists and is writable by the container user.
28+
# This handles both the case where Docker created it as root and the case
29+
# where it doesn't exist yet on the host.
30+
mkdir -p /home/artifacts
31+
chown "$TARGET_UID:$TARGET_GID" /home/artifacts
32+
33+
exec gosu "$CONTAINER_USER" "$@"

0 commit comments

Comments
 (0)