Skip to content

Commit bbc288b

Browse files
authored
big new image restructure (#7)
1 parent 26a2f4d commit bbc288b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

82 files changed

+5218
-253
lines changed

.github/workflows/build.yml

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -28,30 +28,3 @@ jobs:
2828
VALIDATE_BASH_EXEC: true
2929
VALIDATE_DOCKERFILE_HADOLINT: true
3030
VALIDATE_YAML: true
31-
32-
# build_python_kernel:
33-
# permissions:
34-
# id-token: write
35-
# contents: read
36-
# packages: write
37-
# actions: write
38-
# uses: ./.github/workflows/reusable-docker-build.yml
39-
# strategy:
40-
# matrix:
41-
# # Must be a supported version by jupyter/datascience-notebook
42-
# # https://hub.docker.com/r/jupyter/datascience-notebook/tags?page=1&name=python-
43-
# version: [ "3.9.13", "3.8.13" ]
44-
# secrets: inherit
45-
# with:
46-
# dockerfile: ./kernels/python/Dockerfile
47-
# context: ./kernels/python
48-
# images: |
49-
# ghcr.io/${{ github.repository }}/python
50-
# tags: |
51-
# type=ref,event=branch,prefix=${{ matrix.version }}
52-
# type=ref,event=pr,prefix=${{ matrix.version }}
53-
# type=sha,format=long,prefix=${{ matrix.version }}
54-
# type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }},prefix=${{ matrix.version }}
55-
# build_args: |
56-
# PYTHON_VERSION=${{ matrix.version }}
57-
# platforms: "linux/amd64"

.gitignore

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# ignore these everywhere
2+
.pythonrc
3+
.Rprofile
4+
apt-install
5+
Aptfile
6+
environment.txt
7+
git_credential_helper.py
8+
git-wrapper.sh
9+
gpu.Aptfile
10+
gpu.requirements.in
11+
initial-condarc
12+
ipython_config.py
13+
secrets_helper.sh
14+
requirements.in
15+
requirements.R
16+
run.sh
17+
18+
# ...except for these places where we care about changes happening
19+
# (NOTE: this is because the tasks should copy the files down into the build directories)
20+
!scripts/apt-install
21+
!scripts/secrets_helper.sh
22+
23+
!python/base/Aptfile
24+
!python/datascience/Aptfile
25+
!python/noteable/Aptfile
26+
27+
!python/base-gpu/gpu.Aptfile
28+
!python/base-gpu/environment.txt
29+
30+
!python/base-gpu/initial-condarc
31+
32+
!python/base/requirements.in
33+
!python/datascience/requirements.in
34+
!python/noteable/requirements.in
35+
36+
!python/run.sh
37+
!python/base-gpu/run.sh
38+
!r/run.sh
39+
40+
!python/noteable/.pythonrc
41+
!python/noteable/ipython_config.py
42+
!python/noteable/git_credential_helper.py
43+
!python/noteable/git-wrapper.sh
44+
45+
!r/noteable/.Rprofile
46+
!r/noteable/requirements.R

Makefile

Lines changed: 0 additions & 6 deletions
This file was deleted.

R/Aptfile

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
build-essential
2+
ca-certificates
3+
cmake
4+
curl
5+
bzip2
6+
gnupg2
7+
wget
8+
g++
9+
git
10+
jq
11+
libudunits2-dev
12+
procps
13+
unixodbc-dev

R/base/4.3.0/Dockerfile

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# syntax = docker/dockerfile:1.2.1
2+
# ---
3+
# Bare minimum R 4.3.x image with IRkernel installed
4+
# - no R packages aside from builtins and IRkernel
5+
# - no git, secrets, SQL, extensions, etc
6+
# ---
7+
ARG NBL_R_VERSION=4.3.0
8+
FROM r-base:${NBL_R_VERSION}
9+
10+
# User/group setup
11+
USER root
12+
13+
ENV NB_USER="noteable" \
14+
NB_UID=4004 \
15+
NB_GID=4004
16+
17+
RUN groupadd --gid 4004 noteable && \
18+
useradd --uid 4004 \
19+
--shell /bin/false \
20+
--create-home \
21+
--no-log-init \
22+
--gid noteable noteable \
23+
--home-dir /srv/noteable && \
24+
chown --recursive noteable:noteable /srv/noteable && \
25+
mkdir -p /etc/noteable && chown noteable:noteable /etc/noteable
26+
27+
# Install tini to manage passing signals to the child kernel process
28+
ENV TINI_VERSION v0.19.0
29+
ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini
30+
RUN chmod +x /tini
31+
32+
# Use micromamba and set up a virtual environment so we can install packages without root
33+
COPY apt-install /usr/bin/
34+
# hadolint ignore=DL3045
35+
COPY Aptfile .
36+
RUN /usr/bin/apt-install Aptfile
37+
38+
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
39+
RUN wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba && \
40+
./bin/micromamba shell init -s bash -p ~/micromamba
41+
42+
USER noteable
43+
RUN micromamba create --name noteable-venv \
44+
-c conda-forge \
45+
-y \
46+
r="${NBL_R_VERSION}"
47+
# make subsequent RUN commands use the virtualenv:
48+
SHELL ["micromamba", "run", "-n", "noteable-venv", "/bin/bash", "-c"]
49+
50+
# hadolint ignore=SC2239
51+
RUN R -e "install.packages('IRkernel', repos='http://cran.us.r-project.org')"
52+
53+
COPY secrets_helper.sh /tmp/secrets_helper.sh
54+
COPY run.sh /usr/local/bin
55+
56+
ENV HOME="/srv/noteable" \
57+
XDG_CACHE_HOME="/srv/noteable/.cache/" \
58+
GOOGLE_APPLICATION_CREDENTIALS="/vault/secrets/gcp-credentials"
59+
60+
WORKDIR /etc/noteable/project
61+
EXPOSE 50001-50005
62+
63+
ENTRYPOINT ["/tini", "-g", "--"]
64+
CMD ["run.sh"]
65+
66+
ARG NBL_ARG_BUILD_TIMESTAMP="undefined"
67+
ARG NBL_ARG_REVISION="undefined"
68+
ARG NBL_ARG_BUILD_URL="undefined"
69+
ARG NBL_ARG_VERSION="undefined"
70+
LABEL org.opencontainers.image.created="${NBL_ARG_BUILD_TIMESTAMP}" \
71+
org.opencontainers.image.revision="${NBL_ARG_REVISION}" \
72+
org.opencontainers.image.source="https://github.com/noteable-io/polymorph" \
73+
org.opencontainers.image.title="noteable-R-${NBL_R_VERSION}" \
74+
org.opencontainers.image.url="${NBL_ARG_BUILD_URL}" \
75+
org.opencontainers.image.vendor="Noteable" \
76+
org.opencontainers.image.version="${NBL_ARG_VERSION}"

R/noteable/.Rprofile

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
library(IRdisplay)
2+
library(repr)
3+
library(reticulate)
4+
5+
prepare_dex_content <- function(df) {
6+
#'
7+
#' Create schema and data structure for data frame to be rendered by DEX
8+
#'
9+
10+
# create a schema for a dataframe, which DEX uses to determine column dtypes.
11+
# R data frames don't have this functionality, so we have to use reticulate
12+
# to call into the python pandas library
13+
pandas <- import("pandas")
14+
15+
# If df is a matrix, convert it to a data frame
16+
if (is.matrix(df)) {
17+
# In R, a matrix is a 2D vector, not a data frame. When reticulate converts an R matrix to Python,
18+
# it becomes a numpy array, not a pandas DataFrame. The pandas function we're using requires a DataFrame,
19+
# so we need to convert the matrix to a data frame first.
20+
#
21+
# We use stringsAsFactors = FALSE to prevent R from converting strings to factors. This is a feature of R
22+
# that can be confusing for people used to Python, where there's no direct equivalent of factors.
23+
#
24+
# We use row.names = FALSE to prevent R from using the first column of the data as row names. This is
25+
# because R matrices don't have row names in the same way that data frames do, and we want to keep the
26+
# structure of the data consistent when we convert it to a DataFrame.
27+
df <- as.data.frame(df, stringsAsFactors = FALSE, row.names = FALSE)
28+
}
29+
df_py <- r_to_py(df)
30+
schema <- pandas$io$json$build_table_schema(df_py, index=FALSE)
31+
32+
# vectorized format (list of lists)
33+
#data = as.matrix.data.frame(t(df))
34+
# pandas df.to_dict("records") format
35+
data = as.data.frame.list(df)
36+
37+
list(
38+
schema = schema,
39+
data = data
40+
)
41+
}
42+
43+
prepare_dex_metadata <- function(df) {
44+
#'
45+
#' Create metadata for data frame to be rendered by DEX
46+
#'
47+
list(
48+
default_index_used=TRUE,
49+
dataframe_info = list(
50+
orig_num_rows = dim(df)[0],
51+
orig_num_cols = dim(df)[1]
52+
)
53+
)
54+
}
55+
56+
repr_dex <- function(obj, ...) {
57+
if (is(obj, "data.frame") || is(obj, "matrix")) {
58+
data <- prepare_dex_content(obj)
59+
metadata <- prepare_dex_metadata(obj)
60+
bundle_data <- list("application/vnd.dataresource+json"=data)
61+
bundle_metadata <- list("application/vnd.dataresource+json"=metadata)
62+
# we could use publish_mimebundle() to provide the data and metadata,
63+
# but that doesn't return anything, which triggers repr_html/repr_markdown, etc
64+
#publish_mimebundle(bundle_data, metadata=bundle_metadata)
65+
return(data)
66+
} else {
67+
# if it's not a matrix or data.frame, return NULL to let other repr_* functions handle it.
68+
return(NULL)
69+
}
70+
}
71+
72+
enable_dex_formatter <- function() {
73+
# Add custom display formatter to newly added mimetype
74+
IRkernel:::replace_in_package('repr', 'mime2repr', c(repr::mime2repr, list(`application/vnd.dataresource+json` = repr_dex)))
75+
76+
# Add dataresource mimetype to list of recognized mimetypes
77+
mimetypes <- c(getOption('jupyter.display_mimetypes'), "application/vnd.dataresource+json")
78+
options(jupyter.display_mimetypes = mimetypes)
79+
80+
# Register custom formatter for matrix and data.frame
81+
registerS3method("repr_html", "matrix", repr_dex)
82+
registerS3method("repr_html", "data.frame", repr_dex)
83+
}
84+
85+
disable_dex_formatter <- function() {
86+
# Remove custom display formatter
87+
IRkernel:::replace_in_package('repr', 'mime2repr', repr::mime2repr)
88+
89+
# Remove dataresource mimetype from list of recognized mimetypes
90+
mimetypes <- setdiff(getOption('jupyter.display_mimetypes'), "application/vnd.dataresource+json")
91+
options(jupyter.display_mimetypes = mimetypes)
92+
93+
# Reset the formatter for matrix and data.frame to the default
94+
registerS3method("repr_html", "matrix", repr:::repr_html.matrix)
95+
registerS3method("repr_html", "data.frame", repr:::repr_html.data.frame)
96+
}
97+
98+
# enable by default
99+
enable_dex_formatter()

R/noteable/4.3.0/Dockerfile

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# syntax = docker/dockerfile:1.2.1
2+
# Noteable build: adds packages to enable Noteable-specific functionality:
3+
# - DEX support (via .Rprofile)
4+
ARG BASE_IMAGE
5+
# hadolint ignore=DL3006
6+
FROM ${BASE_IMAGE} as base
7+
8+
USER noteable
9+
10+
# Install python to use with Reticulate
11+
RUN micromamba install python=3.9 -y -c conda-forge
12+
13+
# R package dependencies and py_install
14+
COPY requirements.R /tmp/requirements.R
15+
RUN R -e "source('/tmp/requirements.R')"
16+
17+
# similarly, copy any R commands that need to run on startup
18+
COPY .Rprofile /srv/noteable/.Rprofile

R/noteable/requirements.R

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
install.packages('reticulate', repos='http://cran.us.r-project.org')
2+
library(reticulate)
3+
# Python packages to be used in R via reticulate
4+
# ref: https://rstudio.github.io/reticulate/articles/python_packages.html
5+
py_install('pandas==1.5.3', pip=TRUE)

R/run.sh

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#!/usr/bin/env bash
2+
set -o pipefail
3+
set -o nounset
4+
set -o errexit
5+
6+
echo "Local time: $(date)"
7+
8+
set -x
9+
10+
connection_file=/tmp/connection_file.json
11+
cp /etc/noteable/connections/connection_file.json ${connection_file}
12+
13+
# Inject Secrets into environment (see script docstring for more info)
14+
# set +x to avoid echoing the Secrets in plaintext to logs
15+
set +x
16+
echo "Injecting Secrets into environment, echoing is turned off"
17+
# shellcheck disable=SC1091
18+
source /tmp/secrets_helper.sh
19+
echo "Done injecting Secrets, turning echoing back on"
20+
set -x
21+
22+
echo "Starting R kernel"
23+
micromamba run -n noteable-venv R --slave -e "IRkernel::main()" --args ${connection_file}

Taskfile.R.yaml

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
version: 3
2+
3+
# https://hub.docker.com/_/r-base/tags
4+
vars:
5+
NBL_R_VERSION: 4.3.0
6+
IDENTIFIER: base
7+
8+
# NOTE: When using `deps: []`, variables are inherited from the current task, but when calling them
9+
# directly in `cmds: []`, the variables have to be passed in explicitly.
10+
11+
tasks:
12+
core:build:
13+
desc: Build the R 4.x image
14+
cmds:
15+
- >-
16+
docker build R/{{.IDENTIFIER}}/{{.NBL_R_VERSION}} \
17+
--build-arg "NBL_R_VERSION={{.NBL_R_VERSION}}" \
18+
--build-arg "BASE_IMAGE={{.BASE_IMAGE}}" \
19+
--tag "local/kernel-r-{{.NBL_R_VERSION}}-{{.IDENTIFIER}}:dev"
20+
21+
base:copy-files:
22+
desc: Copy files from the R directory to the build directories
23+
cmds:
24+
- task copy-root-files LANGUAGE=R IDENTIFIER={{.IDENTIFIER}} NBL_LANGUAGE_VERSION={{.NBL_R_VERSION}}
25+
- task copy-language-files LANGUAGE=R IDENTIFIER={{.IDENTIFIER}} NBL_LANGUAGE_VERSION={{.NBL_R_VERSION}}
26+
27+
base:build:
28+
desc: Build the R 4.x base image after copying required files
29+
cmds:
30+
- task r:base:copy-files IDENTIFIER=base NBL_LANGUAGE_VERSION={{.NBL_R_VERSION}}
31+
- task r:core:build IDENTIFIER=base NBL_R_VERSION={{.NBL_R_VERSION}}
32+
33+
noteable:build:
34+
desc: Build the R 4.3.x image with data frame -> DEX support
35+
cmds:
36+
- cp R/noteable/.Rprofile R/noteable/{{.NBL_R_VERSION}}/.Rprofile
37+
- cp R/noteable/requirements.R R/noteable/{{.NBL_R_VERSION}}/requirements.R
38+
- task r:core:build IDENTIFIER=noteable NBL_R_VERSION={{.NBL_R_VERSION}} BASE_IMAGE=local/kernel-r-{{.NBL_R_VERSION}}-base:dev

0 commit comments

Comments
 (0)