Skip to content

Commit 018dc6c

Browse files
authored
Merge pull request #203 from NERSC/improve-docker-bake
Improve local operator dev workflow with docker bake
2 parents e2c044f + 383ef51 commit 018dc6c

File tree

47 files changed

+2751
-1104
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+2751
-1104
lines changed

backend/core/interactem/core/models/messages.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class MessageSubject(str, Enum):
3131

3232
class MessageHeader(BaseModel):
3333
subject: MessageSubject
34-
meta: dict[str, Any] = {}
34+
meta: bytes | dict[str, Any] = b"{}"
3535
tracking: (
3636
list[
3737
OperatorTrackingMetadata

backend/operators/interactem/operators/messengers/zmq.py

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -98,35 +98,42 @@ async def recv(self) -> BytesMessage | None:
9898
all_messages: list[BytesMessage] = []
9999

100100
for id, msg_parts in id_msgs:
101+
raw_meta = None
102+
101103
if len(msg_parts) == 2:
102104
_header, _data = msg_parts
103105
elif len(msg_parts) == 3:
104-
_, _header, _data = msg_parts
106+
_header, raw_meta, _data = msg_parts
105107
else:
106-
logger.error(
107-
"Received an unexpected number of message parts: %s", len(msg_parts)
108-
)
109108
return None
110109

110+
# Decode header JSON
111111
if isinstance(_header, zmq.Message):
112112
header = MessageHeader.model_validate_json(_header.bytes)
113113
elif isinstance(_header, bytes):
114114
header = MessageHeader.model_validate_json(_header)
115115
else:
116-
logger.error("Received an unexpected message type: %s", type(_header))
117116
continue
118117

118+
# If meta was sent separately as bytes, restore it
119+
if raw_meta is not None:
120+
if isinstance(raw_meta, zmq.Message):
121+
header.meta = raw_meta.bytes
122+
else:
123+
header.meta = raw_meta
124+
119125
if header.subject != MessageSubject.BYTES:
120126
logger.error(
121127
"Received an unexpected message subject: %s", header.subject
122128
)
123129
continue
124130

125-
msg = (
126-
BytesMessage(header=header, data=_data.bytes)
127-
if isinstance(_data, zmq.Message)
128-
else BytesMessage(header=header, data=_data)
129-
)
131+
if isinstance(_data, zmq.Message):
132+
msg = BytesMessage(header=header, data=_data.bytes)
133+
else:
134+
msg = BytesMessage(header=header, data=_data)
135+
136+
# Tracking update
130137
if header.tracking is not None:
131138
header.tracking.append(
132139
InputPortTrackingMetadata(
@@ -157,21 +164,32 @@ async def send(self, message: BytesMessage):
157164
msg_futures = []
158165
data = message.data
159166
should_copy_header = len(self.output_ports) > 1
160-
time_before_send = datetime.now()
161167
for socket in self.output_sockets.values():
162-
# need to copy to send same message to multple sockets
163-
# otherwise we will continue to append to the same header
164168
if should_copy_header:
165-
header = message.header.model_copy(deep=True)
169+
header = message.header.model_copy()
166170
else:
167171
header = message.header
172+
168173
if header.tracking is not None:
174+
time_before_send = datetime.now()
169175
meta = OutputPortTrackingMetadata(
170176
id=socket.info.port_id, time_before_send=time_before_send
171177
)
172178
header.tracking.append(meta)
173-
header = header.model_dump_json().encode()
174-
things_to_send = [header, data]
179+
180+
# --- Separate meta if it's bytes ---
181+
raw_meta = None
182+
if isinstance(header.meta, bytes):
183+
raw_meta = header.meta
184+
header_json = header.model_dump_json(exclude={"meta"}).encode()
185+
else:
186+
header_json = header.model_dump_json().encode()
187+
188+
# --- Build multipart message ---
189+
if raw_meta is not None:
190+
things_to_send = [header_json, raw_meta, data]
191+
else:
192+
things_to_send = [header_json, data]
175193

176194
msg_futures.append(self._send_and_update_metrics(socket, things_to_send))
177195
# TODO: look into creating tasks

operators/.env.example

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
PODMAN_SERVICE_URI=unix:///<your-socket>
2+
BAKE_FILE=./docker-bake.hcl
3+
VERBOSE=true
4+
REGISTRY=host.containers.internal:5001/ghcr.io/nersc/interactem

operators/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.env

operators/README.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Building operators
2+
3+
## Building locally
4+
5+
### MacOS
6+
7+
1. Get `docker desktop` and `podman desktop`.
8+
1. Set up docker local docker registry by running:
9+
10+
```sh
11+
docker run -d -p 5001:5000 --restart always --name docker-registry registry:3
12+
```
13+
14+
1. Use [bake.sh](./bake.sh) to build all containers with docker. This includes base image, operator, and distiller-streaming. You should do the following
15+
16+
```sh
17+
./bake --push-local --build-base
18+
```
19+
20+
This will push everything to the local registry, instead of pushing up to GitHub packages. You can also omit `--build-base` to avoid building base images for faster iteration.
21+
22+
1. Set your `.env` file in the operator directory to have the correct podman socket (see [`.env.example`](./.env.example))
23+
1. Use poetry environment from root directory [pyproject.toml](../pyproject.toml) and run the following:
24+
25+
```sh
26+
poetry run python pull_images_from_bake.py
27+
```
28+
29+
This will pull local registry images into podman and tag them appropriately with [`pull_images_from_bake.py`](./pull_images_from_bake.py). This also runs at the end of [`bake.sh`](./bake.sh) if `--pull-local` is given.
30+
31+
The [build_all.sh](./build_all.sh) script was used before, but it is cumbersome so I am not updating it.

operators/bake.sh

Lines changed: 109 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,60 +1,131 @@
11
#!/bin/bash
2+
set -e
3+
24
SCRIPT_DIR=$(dirname "$0")
35
REPO_ROOT_DIR=$(git rev-parse --show-toplevel)
46
TAG=$(git rev-parse --short=6 HEAD)
7+
OPERATORS_DIR="$REPO_ROOT_DIR/operators"
8+
BAKE_FILE="$OPERATORS_DIR/docker-bake.hcl"
9+
OPERATOR_JSON="operator.json"
10+
VENV_DIR="$OPERATORS_DIR/distiller-streaming/.venv"
511

6-
cd $REPO_ROOT_DIR
12+
BUILD_BASE=false
13+
PUSH_LOCAL=false
14+
PUSH_REMOTE=false
15+
PULL_LOCAL=false
16+
TARGET=""
717

8-
# Build all images using Docker Bake
9-
docker buildx bake base --file $REPO_ROOT_DIR/operators/docker-bake.hcl
10-
docker buildx bake operator --file $REPO_ROOT_DIR/operators/docker-bake.hcl
11-
docker buildx bake distiller-streaming --file $REPO_ROOT_DIR/operators/docker-bake.hcl
12-
docker buildx bake operators --file $REPO_ROOT_DIR/operators/docker-bake.hcl --provenance=false
18+
# Parse args
19+
while [[ $# -gt 0 ]]; do
20+
case $1 in
21+
--build-base)
22+
BUILD_BASE=true
23+
shift
24+
;;
25+
--push-local)
26+
PUSH_LOCAL=true
27+
VARS="$OPERATORS_DIR/vars-local.hcl"
28+
shift
29+
;;
30+
--pull-local)
31+
PULL_LOCAL=true
32+
shift
33+
;;
34+
--push-remote)
35+
PUSH_REMOTE=true
36+
VARS="$OPERATORS_DIR/vars-prod.hcl"
37+
shift
38+
;;
39+
--target)
40+
TARGET="$2"
41+
shift 2
42+
;;
43+
*)
44+
shift
45+
;;
46+
esac
47+
done
1348

14-
if [ $? -ne 0 ]; then
15-
echo "Failed to build images"
49+
if [ -z "$VARS" ]; then
50+
echo "Error: Must specify either --push-local or --push-remote"
1651
exit 1
1752
fi
1853

19-
generate_label_args() {
20-
local label_args=""
21-
# Process each operator directory
22-
for dir in "$REPO_ROOT_DIR/operators"/*; do
23-
if [ -d "$dir" ] && [ -f "$dir/operator.json" ] && [ -f "$dir/Containerfile" ]; then
24-
op_name=$(basename "$dir")
54+
BASE_VARS="$OPERATORS_DIR/vars-base.hcl"
55+
56+
cd "$REPO_ROOT_DIR"
57+
58+
59+
if $BUILD_BASE; then
60+
echo "=== Building base images ==="
61+
docker buildx bake base --file "$BAKE_FILE" \
62+
--file "$BASE_VARS"
63+
64+
docker buildx bake operator --file "$BAKE_FILE" \
65+
--file "$BASE_VARS"
2566

26-
# Get json
27-
operator_json=$(jq -c . "$dir/operator.json" 2>/dev/null)
28-
if [ $? -ne 0 ] || [ -z "$operator_json" ]; then
29-
continue
67+
docker buildx bake distiller-streaming --file "$BAKE_FILE" \
68+
--file "$BASE_VARS"
69+
fi
70+
71+
#
72+
# === Build operators ===
73+
#
74+
build_operators() {
75+
local cmd=(docker buildx bake)
76+
local has_labels=false
77+
78+
if [ -n "$TARGET" ]; then
79+
cmd+=("$TARGET")
80+
cmd+=(--provenance=false)
81+
else
82+
cmd+=(operators)
83+
fi
84+
85+
cmd+=(--file "$BAKE_FILE")
86+
cmd+=(--file "$VARS")
87+
88+
# Add labels from operator.json files
89+
for dir in "$OPERATORS_DIR"/*; do
90+
if [ -d "$dir" ] && [ -f "$dir/$OPERATOR_JSON" ] && [ -f "$dir/Containerfile" ]; then
91+
op_name=$(basename "$dir")
92+
operator_json_file=$(jq -c . "$dir/$OPERATOR_JSON" 2>/dev/null)
93+
if [ $? -eq 0 ] && [ -n "$operator_json_file" ]; then
94+
cmd+=(--set "${op_name}.labels.interactem.operator.spec=${operator_json_file}")
95+
has_labels=true
3096
fi
31-
# Add set argument using file instead of inline string
32-
label_args="${label_args} --set ${op_name}.labels.interactem.operator.spec='${operator_json}'"
3397
fi
3498
done
3599

36-
echo "$label_args"
100+
# Check if we found any labels
101+
if [ "$has_labels" = false ]; then
102+
echo "No operator.json files found or all are empty."
103+
return 1
104+
fi
105+
106+
cmd+=(--push)
107+
108+
# Execute the command
109+
"${cmd[@]}"
37110
}
38111

39-
LABEL_ARGS=$(generate_label_args)
40-
if [ -z "$LABEL_ARGS" ]; then
41-
echo "No operator.json files found or all are empty."
42-
exit 1
43-
fi
44-
45-
# Create a temporary script because escaping json is hard
46-
TMP_SCRIPT="/tmp/docker_buildx_script.sh"
47-
cat > "$TMP_SCRIPT" << EOL
48-
#!/bin/bash
49-
docker buildx bake operators --file $REPO_ROOT_DIR/operators/docker-bake.hcl --push --provenance=false $LABEL_ARGS
50-
EOL
51-
52-
chmod +x "$TMP_SCRIPT"
53-
"$TMP_SCRIPT"
112+
echo "=== Building operators ==="
113+
build_operators
54114
build_status=$?
55-
rm -f "$TMP_SCRIPT"
56115

57116
if [ $build_status -ne 0 ]; then
58117
echo "Failed to build/push images"
59118
exit 1
60-
fi
119+
fi
120+
121+
#
122+
# === Pull locally built images to podman ===
123+
#
124+
if $PUSH_LOCAL && $PULL_LOCAL; then
125+
echo "=== Pulling images back from local registry ==="
126+
cd $OPERATORS_DIR
127+
source $VENV_DIR/bin/activate
128+
poetry run python pull_images_from_bake.py
129+
fi
130+
131+
echo "=== Done ==="

operators/beam-compensation/Containerfile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
FROM ghcr.io/nersc/interactem/operator
1+
FROM ghcr.io/nersc/interactem/distiller-streaming
22

3-
RUN pip install scipy ncempy stempy
3+
# had to add cffi because ncempy messes with that package when adding, and nkeys acts up...
4+
RUN poetry add ncempy cffi
45

56
COPY ./run.py /app/run.py
67

0 commit comments

Comments
 (0)