Skip to content

Commit 270355d

Browse files
authored
Initialize project with base kernel (#2)
* Add initial builds for python kernel * Add python version to Dockerfile * Fix pull requests * Fix workflow reference * Fix workflow reference again * Prefix with ./ * Fix dockerfile / context args * Add Dockerhub secrets * Add actions:write * Set python version as environment variable * Use build args to set python version * Disable ARM build * Use common files layout from legacy project * Fix makefile script * Add kernels/ prefix to copy paths * Fix file paths * Fix ipython path * Move environment.txt to kernel * Reenable arm builds * Remove julia fix-permissions * Disable arm builds again * Remove default build labels * Remove unsued Make scripts * Remove duplicate linting block * Add missing newline * Add comment regarding docker image versions
1 parent f27c3dd commit 270355d

File tree

12 files changed

+510
-0
lines changed

12 files changed

+510
-0
lines changed

.github/workflows/build.yml

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
name: Build kernel images
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
pull_request:
8+
branches:
9+
- main
10+
11+
jobs:
12+
base-linting:
13+
name: base-linting
14+
runs-on: ubuntu-22.04
15+
16+
steps:
17+
- name: Checkout Code
18+
uses: actions/checkout@v3
19+
20+
- name: Lint Dockerfile, Shell scripts, YAML
21+
uses: github/super-linter@v4
22+
env:
23+
DEFAULT_BRANCH: master
24+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
25+
26+
# Linters to enable
27+
VALIDATE_BASH: true
28+
VALIDATE_BASH_EXEC: true
29+
VALIDATE_DOCKERFILE_HADOLINT: true
30+
VALIDATE_YAML: true
31+
32+
build_python_kernel:
33+
permissions:
34+
id-token: write
35+
contents: read
36+
packages: write
37+
actions: write
38+
uses: ./.github/workflows/reusable-docker-build.yml
39+
strategy:
40+
matrix:
41+
# Must be a supported version by jupyter/datascience-notebook
42+
# https://hub.docker.com/r/jupyter/datascience-notebook/tags?page=1&name=python-
43+
version: [ "3.9.13", "3.8.13" ]
44+
secrets: inherit
45+
with:
46+
dockerfile: ./kernels/python/Dockerfile
47+
context: ./kernels/python
48+
images: |
49+
ghcr.io/${{ github.repository }}/python
50+
tags: |
51+
type=ref,event=branch,prefix=${{ matrix.version }}
52+
type=ref,event=pr,prefix=${{ matrix.version }}
53+
type=sha,format=long,prefix=${{ matrix.version }}
54+
type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }},prefix=${{ matrix.version }}
55+
build_args: |
56+
PYTHON_VERSION=${{ matrix.version }}
57+
platforms: "linux/amd64"
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
name: docker
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
dockerfile:
7+
description: "Path to the Dockerfile to build"
8+
type: string
9+
default: Dockerfile
10+
context:
11+
description: "The context for Docker build"
12+
type: string
13+
default: "."
14+
platforms:
15+
description: "Comma separate list of platforms to build on"
16+
type: string
17+
required: false
18+
default: "linux/amd64,linux/arm64"
19+
images:
20+
description: "The image names that we want to build"
21+
type: string
22+
required: true
23+
tags:
24+
description: "The various tags to be attached to the built image"
25+
type: string
26+
required: false
27+
default: ""
28+
labels:
29+
description: "The various labels to attach to the built image"
30+
type: string
31+
required: false
32+
default: |
33+
org.opencontainers.image.url=https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
34+
org.opencontainers.image.vendor=Noteable
35+
org.opencontainers.image.version=${{ github.ref }}
36+
target:
37+
description: "Sets the target stage to build"
38+
type: string
39+
required: false
40+
build_args:
41+
description: "Additional build args to pass to the Docker build"
42+
type: string
43+
required: false
44+
default: ""
45+
secrets:
46+
# We login to Dockerhub to prevent rate limiting issues when pulling images
47+
# https://docs.docker.com/docker-hub/download-rate-limit/
48+
DOCKERHUB_USER:
49+
required: true
50+
DOCKERHUB_PASSWORD:
51+
required: true
52+
53+
jobs:
54+
build:
55+
permissions:
56+
id-token: write
57+
contents: read
58+
packages: write
59+
60+
if: |
61+
github.event_name == 'push' ||
62+
(github.event_name == 'pull_request' && github.event.pull_request.state == 'open')
63+
runs-on: ubuntu-22.04
64+
steps:
65+
- name: Checkout the code
66+
uses: actions/checkout@v3
67+
68+
- name: Copy common files
69+
run: make copy-common-files
70+
- name: Log in to Docker Hub
71+
uses: docker/login-action@v2
72+
with:
73+
username: ${{ secrets.DOCKERHUB_USER }}
74+
password: ${{ secrets.DOCKERHUB_PASSWORD }}
75+
76+
- name: Log in to the Container registry
77+
uses: docker/login-action@v2
78+
with:
79+
registry: ghcr.io
80+
username: ${{ github.actor }}
81+
password: ${{ secrets.GITHUB_TOKEN }}
82+
83+
- name: Set up Docker Buildx
84+
uses: docker/setup-buildx-action@v2
85+
with:
86+
version: v0.10.1
87+
88+
# Note: The outputs in github action will show duplicate labels being generated for the meta outputs.
89+
# When the Docker engine builds, it will only take the later values, and our custom labels get added
90+
# at the end. https://github.com/docker/metadata-action/issues/125
91+
- name: Docker metadata for labels and tags
92+
id: meta
93+
uses: docker/metadata-action@v4
94+
with:
95+
images: ${{ inputs.images }}
96+
tags: ${{ inputs.tags }}
97+
labels: ${{ inputs.labels }}
98+
99+
- name: Build and push
100+
uses: docker/build-push-action@v3
101+
with:
102+
platforms: ${{ inputs.platforms }}
103+
context: ${{ inputs.context }}
104+
push: true
105+
tags: ${{ steps.meta.outputs.tags }}
106+
labels: ${{ steps.meta.outputs.labels }}
107+
target: ${{ inputs.target }}
108+
cache-from: type=gha
109+
cache-to: type=gha,mode=max
110+
build-args: ${{ inputs.build_args }}
111+
112+
clear_cache:
113+
permissions:
114+
contents: read
115+
actions: write
116+
# If the PR is closed (or merged), we want to clear the cache
117+
if: ${{ github.event_name == 'pull_request' && github.event.pull_request.state == 'closed' }}
118+
runs-on: ubuntu-latest
119+
steps:
120+
- name: Check out code
121+
uses: actions/checkout@v3
122+
123+
- name: Cleanup
124+
run: |
125+
gh extension install actions/gh-actions-cache
126+
127+
REPO=${{ github.repository }}
128+
BRANCH=${{ github.ref }}
129+
130+
echo "Fetching list of cache key"
131+
cacheKeysForPR=$(gh actions-cache list -R $REPO -B $BRANCH | cut -f 1 )
132+
133+
## Setting this to not fail the workflow while deleting cache keys.
134+
set +e
135+
echo "Deleting caches..."
136+
for cacheKey in $cacheKeysForPR
137+
do
138+
gh actions-cache delete $cacheKey -R $REPO -B $BRANCH --confirm
139+
done
140+
echo "Done"
141+
env:
142+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

Makefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
copy-common-files:
2+
cp requirements.txt kernels/python/
3+
cp ipython_config.py kernels/python/
4+
cp secrets_helper.py kernels/python/
5+
cp git_credential_helper.py kernels/python/
6+
cp git-wrapper.sh kernels/python/

git-wrapper.sh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/usr/bin/env bash
2+
# This script wraps git to only allow certain commands to be run.
3+
# We mainly want to prevent users from getting into unknown states by checking out other branches, etc.
4+
5+
# Allowed command list
6+
allowed_commands=( "commit" "pull" "push" "status" "diff" "add" "fetch" "log" "version" )
7+
8+
# Check if the command is allowed
9+
# shellcheck disable=SC2076
10+
if [[ ! " ${allowed_commands[*]} " =~ " ${1} " ]]; then
11+
echo "That git command is not allowed, contact [email protected] if you think this is a mistake."
12+
exit 1
13+
fi
14+
15+
# Otherwise pass through to git at /usr/bin/git
16+
exec /usr/bin/git "$@"

git_credential_helper.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#!/usr/bin/env python3
2+
"""
3+
This script is used as a Git credential helper https://git-scm.com/docs/git-credential.
4+
We iterate through all the git credential secrets on the file system and return the first one that matches the requested URL.
5+
If no match is found, we return an empty response.
6+
An empty response will cause Git to use the next credential helper in the list, or prompt the user for credentials.
7+
To test this script:
8+
$ cat > /tmp/demo.git-cred <<EOF
9+
{
10+
"meta": {
11+
"type": "USERNAME_PASSWORD",
12+
"host": "github.com",
13+
"protocol": "https",
14+
"path": "foo/bar"
15+
},
16+
"data": {
17+
"username": "demo",
18+
"password": "demo_password"
19+
}
20+
}
21+
EOF
22+
$ export NTBL_SECRETS_DIR=/tmp
23+
$ echo -e "host=github.com\nprotocol=https\npath=foo/bar" | ./git_credential_helper.py
24+
username=demo
25+
password=demo_password
26+
"""
27+
28+
import json
29+
from pathlib import Path
30+
import sys
31+
import os
32+
from typing import Optional
33+
34+
35+
def parse_input(input_: str) -> dict:
36+
"""Parse the input from Git into a dictionary."""
37+
return dict(line.split("=", 1) for line in input_.splitlines())
38+
39+
40+
def format_output(data: dict) -> str:
41+
"""Format the output to Git."""
42+
return "\n".join(f"{key}={value}" for key, value in data.items())
43+
44+
45+
def find_secret(input_data: dict) -> Optional[dict]:
46+
"""Find the secret that matches the input data."""
47+
secrets_dir = Path(os.environ.get("NTBL_SECRETS_DIR", "/vault/secrets"))
48+
if not secrets_dir.exists():
49+
return None
50+
51+
keys_to_match = ["host", "protocol", "path"]
52+
for secret_path in secrets_dir.glob("*.git-cred"):
53+
secret_data = json.loads(secret_path.read_text())
54+
meta = secret_data["meta"]
55+
if all(meta[key] == input_data.get(key) for key in keys_to_match):
56+
return secret_data["data"]
57+
58+
return None
59+
60+
61+
def main(stdin=sys.stdin, stdout=sys.stdout):
62+
"""Main entrypoint."""
63+
parsed_input = parse_input(stdin.read())
64+
if (secret := find_secret(parsed_input)) is not None:
65+
print(format_output(secret), file=stdout)
66+
67+
68+
if __name__ == "__main__":
69+
main()

ipython_config.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
c.InteractiveShellApp.extensions = [
2+
"noteable_magics",
3+
]
4+
5+
c.SqlMagic.feedback = False
6+
c.SqlMagic.autopandas = True
7+
c.NTBLMagic.project_dir = "/etc/noteable/project"
8+
c.NoteableDataLoaderMagic.return_head = False
9+
c.IPythonKernel._execute_sleep = 0.15
10+
# 10 minutes to support large files
11+
c.NTBLMagic.planar_ally_default_timeout_seconds = 600

kernels/python/.pythonrc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
import pandas as pd
2+
3+
import dx
4+
5+
dx.set_option("DISPLAY_MAX_ROWS", 50_000)
6+
dx.set_option("DISPLAY_MAX_COLUMNS", 100)
7+
dx.set_option("ENABLE_DATALINK", True)
8+
dx.set_option("ENABLE_ASSIGNMENT", False)

0 commit comments

Comments
 (0)