Skip to content

Commit c3b5906

Browse files
committed
[Build] POC: Add UC OSS integration test workflow
Add a GitHub Actions workflow that: 1. Builds Unity Catalog OSS server from source 2. Starts a local UC server with managed tables enabled 3. Creates catalog, schema, and managed tables 4. Publishes Delta to local Maven (publishM2) 5. Runs unity-catalog-commit-coordinator-integration-tests.py via spark-submit --packages (validates POM dependency resolution) This is a POC to validate the end-to-end flow. Made-with: Cursor
1 parent 74de7ab commit c3b5906

File tree

1 file changed

+176
-0
lines changed

1 file changed

+176
-0
lines changed
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
name: "Unity Catalog Integration Test"
2+
on:
3+
push:
4+
paths-ignore:
5+
- '**.md'
6+
- '**.txt'
7+
pull_request:
8+
paths-ignore:
9+
- '**.md'
10+
- '**.txt'
11+
jobs:
12+
test:
13+
name: "UC Commit Coordinator Integration Test"
14+
runs-on: ubuntu-24.04
15+
env:
16+
UC_VERSION: "0.4.0"
17+
CATALOG_NAME: "delta_test"
18+
SCHEMA: "default"
19+
MANAGED_CC_TABLE: "managed_cc_table"
20+
MANAGED_NON_CC_TABLE: "managed_non_cc_table"
21+
steps:
22+
- uses: actions/checkout@v3
23+
24+
- name: Install Java 17
25+
uses: actions/setup-java@v3
26+
with:
27+
distribution: "zulu"
28+
java-version: "17"
29+
30+
- name: Set up Python
31+
uses: actions/setup-python@v4
32+
with:
33+
python-version: '3.10'
34+
35+
- name: Cache Scala, SBT
36+
uses: actions/cache@v3
37+
with:
38+
path: |
39+
~/.sbt
40+
~/.ivy2
41+
~/.cache/coursier
42+
key: delta-sbt-cache-uc-integration
43+
44+
# Step 1: Build UC OSS server from source
45+
- name: Clone and build Unity Catalog OSS
46+
run: |
47+
git clone --depth 1 --branch v${UC_VERSION} https://github.com/unitycatalog/unitycatalog.git /tmp/unitycatalog
48+
cd /tmp/unitycatalog
49+
build/sbt server/assembly
50+
51+
# Step 2: Start UC server
52+
- name: Start Unity Catalog server
53+
run: |
54+
cd /tmp/unitycatalog
55+
56+
# Start UC server with managed tables enabled
57+
UC_SERVER_DIR=$(mktemp -d)
58+
cat > "${UC_SERVER_DIR}/server.properties" <<EOF
59+
server.env=test
60+
server.managed-table.enabled=true
61+
storage-root.tables=${UC_SERVER_DIR}/ucroot
62+
EOF
63+
64+
bin/start-uc-server \
65+
--config-file "${UC_SERVER_DIR}/server.properties" \
66+
--port 8080 &
67+
68+
# Wait for server to be ready
69+
echo "Waiting for UC server to start..."
70+
for i in $(seq 1 30); do
71+
if curl -s http://localhost:8080/api/2.1/unity-catalog/catalogs > /dev/null 2>&1; then
72+
echo "UC server is ready"
73+
break
74+
fi
75+
if [ $i -eq 30 ]; then
76+
echo "UC server failed to start"
77+
exit 1
78+
fi
79+
sleep 1
80+
done
81+
82+
# Step 3: Create catalog, schema, and managed tables
83+
- name: Set up UC catalog and tables
84+
run: |
85+
UC_BASE="http://localhost:8080/api/2.1/unity-catalog"
86+
87+
# Create catalog
88+
curl -s -X POST "${UC_BASE}/catalogs" \
89+
-H "Content-Type: application/json" \
90+
-d "{\"name\": \"${CATALOG_NAME}\", \"comment\": \"Delta integration test catalog\"}"
91+
92+
# Create schema
93+
curl -s -X POST "${UC_BASE}/schemas" \
94+
-H "Content-Type: application/json" \
95+
-d "{\"name\": \"${SCHEMA}\", \"catalog_name\": \"${CATALOG_NAME}\"}"
96+
97+
# Create managed tables (commit-coordinator-owned and non-owned)
98+
# These are created via Spark SQL since managed table creation requires Delta metadata
99+
echo "Catalog and schema created successfully"
100+
101+
# Step 4: Build and publish Delta to local Maven
102+
- name: Publish Delta to local Maven
103+
run: build/sbt publishM2
104+
105+
# Step 5: Create managed tables via PySpark (needs Delta + UC packages)
106+
- name: Create managed tables
107+
env:
108+
CATALOG_URI: "http://localhost:8080"
109+
CATALOG_TOKEN: "static-token"
110+
run: |
111+
DELTA_VERSION=$(grep 'version :=' version.sbt | sed 's/.*"\(.*\)"/\1/')
112+
SPARK_FULL_VERSION=$(python3 project/scripts/get_spark_version_info.py --get-field default fullVersion | tr -d '"')
113+
SPARK_SHORT_VERSION=$(python3 project/scripts/get_spark_version_info.py --get-field default packageSuffix | tr -d '"')
114+
115+
pip install pyspark==${SPARK_FULL_VERSION}
116+
117+
PACKAGES="io.delta:delta-spark${SPARK_SHORT_VERSION}_2.13:${DELTA_VERSION}"
118+
PACKAGES="${PACKAGES},io.unitycatalog:unitycatalog-spark_2.13:${UC_VERSION}"
119+
120+
python3 - <<'PYEOF'
121+
import os
122+
from pyspark.sql import SparkSession
123+
124+
catalog = os.environ["CATALOG_NAME"]
125+
schema = os.environ["SCHEMA"]
126+
cc_table = os.environ["MANAGED_CC_TABLE"]
127+
non_cc_table = os.environ["MANAGED_NON_CC_TABLE"]
128+
129+
spark = SparkSession.builder \
130+
.appName("create_uc_tables") \
131+
.master("local[*]") \
132+
.config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
133+
.config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
134+
.config(f"spark.sql.catalog.{catalog}", "io.unitycatalog.spark.UCSingleCatalog") \
135+
.config(f"spark.sql.catalog.{catalog}.uri", os.environ["CATALOG_URI"]) \
136+
.config(f"spark.sql.catalog.{catalog}.token", os.environ["CATALOG_TOKEN"]) \
137+
.getOrCreate()
138+
139+
# Create managed tables
140+
spark.sql(f"""
141+
CREATE TABLE {catalog}.{schema}.{cc_table} (id INT)
142+
USING DELTA
143+
TBLPROPERTIES ('delta.enableCatalogManagedCommit' = 'true')
144+
""")
145+
146+
spark.sql(f"""
147+
CREATE TABLE {catalog}.{schema}.{non_cc_table} (id INT)
148+
USING DELTA
149+
""")
150+
151+
# Seed initial data
152+
spark.createDataFrame([(1,), (2,), (3,)], ["id"]).write.insertInto(
153+
f"{catalog}.{schema}.{cc_table}")
154+
spark.createDataFrame([(1,), (2,), (3,)], ["id"]).write.insertInto(
155+
f"{catalog}.{schema}.{non_cc_table}")
156+
157+
print("SUCCESS: Managed tables created and seeded")
158+
spark.stop()
159+
PYEOF
160+
161+
# Step 6: Run UC commit coordinator integration tests
162+
- name: Run UC commit coordinator integration tests
163+
env:
164+
CATALOG_URI: "http://localhost:8080"
165+
CATALOG_TOKEN: "static-token"
166+
run: |
167+
DELTA_VERSION=$(grep 'version :=' version.sbt | sed 's/.*"\(.*\)"/\1/')
168+
SPARK_SHORT_VERSION=$(python3 project/scripts/get_spark_version_info.py --get-field default packageSuffix | tr -d '"')
169+
170+
PACKAGES="io.delta:delta-spark${SPARK_SHORT_VERSION}_2.13:${DELTA_VERSION}"
171+
PACKAGES="${PACKAGES},io.unitycatalog:unitycatalog-spark_2.13:${UC_VERSION}"
172+
173+
spark-submit \
174+
--driver-class-path python/delta/testing \
175+
--packages "${PACKAGES}" \
176+
python/delta/integration_tests/unity-catalog-commit-coordinator-integration-tests.py

0 commit comments

Comments
 (0)