Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 25 additions & 29 deletions k8s/cluster.yaml
Original file line number Diff line number Diff line change
@@ -1,34 +1,34 @@
apiVersion: v1
kind: Service
metadata:
name: service-dfray-proxy
name: service-dd-proxy
labels:
app: dfray-proxy
app: dd-proxy
spec:
type: ClusterIP
ports:
- name: dfray-proxy
- name: dd-proxy
port: 20200
targetPort: 20200
selector:
app: dfray-proxy
app: dd-proxy
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: deployment-dfray-proxy
name: deployment-dd-proxy
labels:
app: dfray-proxy
app: dd-proxy
spec:
replicas: 2
selector:
matchLabels:
app: dfray-proxy
app: dd-proxy
template:
metadata:
labels:
app: dfray-proxy
service: dfray-proxy
app: dd-proxy
service: dd-proxy
team: mint
admission.datadoghq.com/enabled: "true"
annotations:
Expand All @@ -52,16 +52,14 @@ spec:
- key: "node-role.kubernetes.io/nodeless"
operator: Exists
containers:
- name: dfray-proxy
image: registry.ddbuild.io/dfray-amd64:2025-06-17-b
- name: dd-proxy
image: registry.ddbuild.io/dd-amd64:2025-06-17-b
env:
- name: DATAFUSION_RAY_LOG_LEVEL
value: trace
- name: DFRAY_WORKER_DEPLOYMENT
value: deployment-dfray-worker
- name: DFRAY_WORKER_DEPLOYMENT_PORT
- name: dd_WORKER_DEPLOYMENT
value: deployment-dd-worker
- name: dd_WORKER_DEPLOYMENT_PORT
value: "20201"
- name: DFRAY_WORKER_DEPLOYMENT_NAMESPACE
- name: dd_WORKER_DEPLOYMENT_NAMESPACE
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So you mean we should keep dd lowercase to stay consistent?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, let me fix that.

valueFrom:
fieldRef:
fieldPath: metadata.namespace
Expand All @@ -76,7 +74,7 @@ spec:
command:
- sh
- "-c"
- "/dfray --mode proxy --port 20200"
- "/dd --mode proxy --port 20200"
ports:
- containerPort: 20200
resources:
Expand All @@ -90,19 +88,19 @@ spec:
apiVersion: apps/v1
kind: Deployment
metadata:
name: deployment-dfray-worker
name: deployment-dd-worker
labels:
app: dfray-worker
app: dd-worker
spec:
replicas: 2
selector:
matchLabels:
app: dfray-worker
app: dd-worker
template:
metadata:
labels:
app: dfray-worker
service: service-dfray-worker
app: dd-worker
service: service-dd-worker
team: mint
admission.datadoghq.com/enabled: "true"
annotations:
Expand All @@ -126,13 +124,11 @@ spec:
- key: "node-role.kubernetes.io/nodeless"
operator: Exists
containers:
- name: dfray-worker
image: registry.ddbuild.io/dfray-amd64:2025-06-17-b
- name: dd-worker
image: registry.ddbuild.io/dd-amd64:2025-06-17-b
env:
- name: DATAFUSION_RAY_LOG_LEVEL
value: trace
- name: RUST_LOG
value: datafusion_bindings=info,cs3_query=info
value: distributed_datafusion=trace,datafusion_bindings=info,cs3_query=info
- name: HDQ_URL
value: http://hdq-creed.data-eng.all-clusters.local-dc.fabric.dog:6420
- name: MEM_THRESHOLD
Expand All @@ -142,7 +138,7 @@ spec:
command:
- sh
- "-c"
- "/dfray --mode worker --port 20201"
- "/dd --mode worker --port 20201"
ports:
- containerPort: 20201
resources:
Expand Down
56 changes: 26 additions & 30 deletions k8s/dev_cluster.yaml
Original file line number Diff line number Diff line change
@@ -1,51 +1,49 @@
apiVersion: v1
kind: Service
metadata:
name: service-dfray-proxy
name: service-dd-proxy
labels:
app: dfray-proxy
app: dd-proxy
spec:
type: ClusterIP
ports:
- name: dfray-proxy
- name: dd-proxy
port: 20200
targetPort: 20200
selector:
app: dfray-proxy
app: dd-proxy
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: deployment-dfray-proxy
name: deployment-dd-proxy
labels:
app: dfray-proxy
app: dd-proxy
spec:
replicas: 1
selector:
matchLabels:
app: dfray-proxy
app: dd-proxy
template:
metadata:
labels:
app: dfray-proxy
service: dfray-proxy
app: dd-proxy
service: dd-proxy
spec:
containers:
- name: dfray-proxy
image: registry.ddbuild.io/dfray:2025-06-16-e
- name: dd-proxy
image: registry.ddbuild.io/dd:2025-06-16-e
env:
- name: DATAFUSION_RAY_LOG_LEVEL
value: debug
- name: DFRAY_WORKER_DEPLOYMENT
value: deployment-dfray-worker
- name: DFRAY_WORKER_DEPLOYMENT_PORT
- name: dd_WORKER_DEPLOYMENT
value: deployment-dd-worker
- name: dd_WORKER_DEPLOYMENT_PORT
value: "20201"
- name: DFRAY_WORKER_DEPLOYMENT_NAMESPACE
- name: dd_WORKER_DEPLOYMENT_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: RUST_LOG
value: datafusion_bindings=info,cs3_query=info
value: datafusion_bindings=info,cs3_query=info,distributed_datafusion=debug
- name: HDQ_URL
value: http://hdq-creed.data-eng.all-clusters.local-dc.fabric.dog:6420
- name: MEM_THRESHOLD
Expand All @@ -55,35 +53,33 @@ spec:
command:
- sh
- "-c"
- "/dfray --mode proxy --port 20200"
- "/dd --mode proxy --port 20200"
ports:
- containerPort: 20200
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: deployment-dfray-worker
name: deployment-dd-worker
labels:
app: dfray-worker
app: dd-worker
spec:
replicas: 2
selector:
matchLabels:
app: dfray-worker
app: dd-worker
template:
metadata:
labels:
app: dfray-worker
service: service-dfray-worker
app: dd-worker
service: service-dd-worker
spec:
containers:
- name: dfray-worker
image: registry.ddbuild.io/dfray:2025-06-16-e
- name: dd-worker
image: registry.ddbuild.io/dd:2025-06-16-e
env:
- name: DATAFUSION_RAY_LOG_LEVEL
value: debug
- name: RUST_LOG
value: datafusion_bindings=info,cs3_query=info
value: datafusion_bindings=info,cs3_query=info,distributed_datafusion=debug
- name: HDQ_URL
value: http://hdq-creed.data-eng.all-clusters.local-dc.fabric.dog:6420
- name: MEM_THRESHOLD
Expand All @@ -93,7 +89,7 @@ spec:
command:
- sh
- "-c"
- "/dfray --mode worker --port 20201"
- "/dd --mode worker --port 20201"
ports:
- containerPort: 20201
---
Expand Down
39 changes: 0 additions & 39 deletions scripts/build_and_push_docker.sh

This file was deleted.

10 changes: 4 additions & 6 deletions scripts/launch_tpch_cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ done
NUM_WORKERS=${NUM_WORKERS:-$DEFAULT_NUM_WORKERS}
TPCH_DATA_DIR=${TPCH_DATA_DIR:-$DEFAULT_TPCH_PATH}
LOG_DIR=${LOG_DIR:-$DEFAULT_LOG_PATH}
RUST_LOG=${RUST_LOG:-"info,distributed_datafusion=debug"}

# Validate inputs
if [ "$NUM_WORKERS" -lt 1 ]; then
Expand Down Expand Up @@ -116,8 +117,7 @@ echo " - Log Directory: $LOG_DIR"
echo

# Define environment variables
export DATAFUSION_RAY_LOG_LEVEL=trace
export DFRAY_TABLES="customer:parquet:${TPCH_DATA_DIR}/customer.parquet,\
export DD_TABLES="customer:parquet:${TPCH_DATA_DIR}/customer.parquet,\
lineitem:parquet:${TPCH_DATA_DIR}/lineitem.parquet,\
nation:parquet:${TPCH_DATA_DIR}/nation.parquet,\
orders:parquet:${TPCH_DATA_DIR}/orders.parquet,\
Expand Down Expand Up @@ -147,8 +147,7 @@ for ((i = 0; i < NUM_WORKERS; i++)); do
WORKER_NAME="worker$((i + 1))"
LOG_FILE="${LOG_DIR}/${WORKER_NAME}.log"
echo " Starting $WORKER_NAME on port $PORT..."
#env DATAFUSION_RAY_LOG_LEVEL="$DATAFUSION_RAY_LOG_LEVEL" DFRAY_TABLES="$DFRAY_TABLES" ./target/release/distributed-datafusion --mode worker --port $PORT >"$LOG_FILE" 2>&1 &
env RUST_BACKTRACE=1 DATAFUSION_RAY_LOG_LEVEL="$DATAFUSION_RAY_LOG_LEVEL" DFRAY_TABLES="$DFRAY_TABLES" ./target/debug/distributed-datafusion --mode worker --port $PORT >"$LOG_FILE" 2>&1 &
env RUST_LOG="$RUST_LOG" RUST_BACKTRACE=1 DD_TABLES="$DD_TABLES" ./target/debug/distributed-datafusion --mode worker --port $PORT >"$LOG_FILE" 2>&1 &
WORKER_PIDS[$i]=$!
WORKER_ADDRESSES[$i]="localhost:${PORT}"
done
Expand All @@ -167,8 +166,7 @@ WORKER_ADDRESSES_STR=$(
echo "Starting proxy on port 20200..."
echo "Connecting to workers: $WORKER_ADDRESSES_STR"
PROXY_LOG="${LOG_DIR}/proxy.log"
#env DATAFUSION_RAY_LOG_LEVEL="$DATAFUSION_RAY_LOG_LEVEL" DFRAY_TABLES="$DFRAY_TABLES" DFRAY_WORKER_ADDRESSES="$WORKER_ADDRESSES_STR" ./target/release/distributed-datafusion --mode proxy --port 20200 >"$PROXY_LOG" 2>&1 &
env RUST_BACKTRACE=1 DATAFUSION_RAY_LOG_LEVEL="$DATAFUSION_RAY_LOG_LEVEL" DFRAY_TABLES="$DFRAY_TABLES" DFRAY_WORKER_ADDRESSES="$WORKER_ADDRESSES_STR" ./target/debug/distributed-datafusion --mode proxy --port 20200 >"$PROXY_LOG" 2>&1 &
env RUST_LOG="$RUST_LOG" RUST_BACKTRACE=1 DD_TABLES="$DD_TABLES" DD_WORKER_ADDRESSES="$WORKER_ADDRESSES_STR" ./target/debug/distributed-datafusion --mode proxy --port 20200 >"$PROXY_LOG" 2>&1 &
PROXY_PID=$!

echo
Expand Down
2 changes: 1 addition & 1 deletion scripts/python_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ maturin develop

python tpch/make_data.py $TPCH_SCALING_FACTOR $TPCH_DATA_PATH

DATAFUSION_RAY_LOG_LEVEL=debug RAY_COLOR_PREFIX=1 RAY_DEDUP_LOGS=0 python tpch/tpcbench.py --data=file:///$TPCH_DATA_PATH/ --concurrency 3 --partitions-per-processor 2 --batch-size=8192 --worker-pool-min=20 --validate
RUST_LOG=distributed_datafusion=debug python tpch/tpcbench.py --data=file:///$TPCH_DATA_PATH/ --concurrency 3 --partitions-per-processor 2 --batch-size=8192 --worker-pool-min=20 --validate
Loading
Loading