Skip to content

Commit 767a5da

Browse files
authored
Merge pull request #56 from Altinity/0.5.0-pre-release
0.5.0
2 parents c06a675 + a7312e0 commit 767a5da

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+2615
-447
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ target
66
/release
77
.devbox
88
/.envrc
9+
*.credentials

CHANGELOG.md

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,24 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8-
## [Unreleased](https://github.com/altinity/ice/compare/v0.4.0...master)
8+
## [Unreleased](https://github.com/altinity/ice/compare/v0.5.0...master)
9+
10+
## [0.5.0](https://github.com/altinity/ice/compare/v0.4.0...v0.5.0)
11+
12+
### Added
13+
- ice: Support for `ice insert` in watch mode. See [examples/s3watch](examples/s3watch) for details.
14+
- ice-rest-catalog: `MANIFEST_COMPACTION`, `DATA_COMPACTION`, `SNAPSHOT_CLEANUP` and `ORPHAN_CLEANUP` maintenance routines. These
15+
can be enabled either via ice-rest-catalog.yaml/maintenance section or performed ad-hoc via `ice-rest-catalog perform-maintenance`.
16+
- ice: `ice delete` command.
17+
18+
### Changed
19+
- ice: `ice delete-table` not to delete any data unless `--purge` is specified.
20+
- ice-rest-catalog: catalog maintenance config section. `snapshotTTLInDays` moved to `maintenance.snapshotTTLInDays`.
21+
22+
### Fixed
23+
- ice: Partitioning metadata missing when data is inserted with `--no-copy` or `--s3-copy-object`.
24+
- ice: `NULLS_FIRST`/`NULLS_LAST` being ignored when sorting.
25+
- ice: Path construction in `localfileio`.
926

1027
## [0.4.0](https://github.com/altinity/ice/compare/v0.3.1...v0.4.0)
1128

examples/s3watch/.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
/.ice-rest-catalog.yaml
2+
/.terraform.lock.hcl
3+
/terraform.tfstate
4+
/terraform.tfstate.backup
5+
/tf.export
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
uri: jdbc:sqlite:file:data/ice-rest-catalog/db.sqlite?journal_mode=WAL&synchronous=OFF&journal_size_limit=500
2+
warehouse: s3://${CATALOG_S3_BUCKET_NAME}
3+
s3:
4+
region: ${AWS_REGION}
5+
bearerTokens:
6+
- value: foo

examples/s3watch/.ice.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
uri: http://localhost:5000
2+
bearerToken: foo
3+
httpCacheDir: data/ice/http/cache

examples/s3watch/README.md

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
# examples/s3watch
2+
3+
This example demonstrates how ice can be used to continuously add files to the catalog as they are being uploaded to s3
4+
bucket. It works by making ice listen for S3 object creation events via SQS queue.
5+
6+
1. Allocate AWS resources.
7+
8+
```shell
9+
devbox shell
10+
11+
# auth into AWS
12+
#
13+
# either create a file named "aws.credentials" containing
14+
#
15+
# export AWS_ACCESS_KEY_ID=...
16+
# export AWS_SECRET_ACCESS_KEY=...
17+
# export AWS_SESSION_TOKEN=...
18+
# export AWS_REGION=us-west-2
19+
#
20+
# and then load it as shown below or use any other method
21+
source aws.credentials
22+
23+
# create s3 bucket + configure notification queue
24+
terraform init
25+
terraform apply
26+
27+
# save terraform output for easy loading
28+
echo $"
29+
export CATALOG_S3_BUCKET_NAME=$(terraform output -raw s3_bucket_name)
30+
export CATALOG_SQS_QUEUE_URL=$(terraform output -raw sqs_queue_url)
31+
" > tf.export
32+
```
33+
34+
2. Start Iceberg REST Catalog.
35+
36+
```shell
37+
devbox shell
38+
39+
source aws.credentials
40+
source tf.export
41+
42+
# generate config
43+
cat .ice-rest-catalog.envsubst.yaml | \
44+
envsubst -no-unset -no-empty > .ice-rest-catalog.yaml
45+
46+
# run
47+
ice-rest-catalog
48+
```
49+
50+
3. Start `ice insert` in watch mode.
51+
52+
```shell
53+
devbox shell
54+
55+
source aws.credentials # for sqs:ReceiveMessages
56+
source tf.export
57+
58+
# run
59+
ice insert flowers.iris -p --no-copy --skip-duplicates \
60+
s3://$CATALOG_S3_BUCKET_NAME/flowers/iris/external-data/*.parquet \
61+
--watch="$CATALOG_SQS_QUEUE_URL"
62+
```
63+
64+
4. Put some data into s3 bucket any way you want, e.g. using `aws s3 cp`.
65+
66+
```shell
67+
devbox shell
68+
69+
source aws.credentials
70+
source tf.export
71+
72+
# upload data to s3
73+
aws s3 cp iris.parquet s3://$CATALOG_S3_BUCKET_NAME/flowers/iris/external-data/
74+
```
75+
76+
5. Query data from ClickHouse.
77+
78+
```shell
79+
devbox shell
80+
81+
source tf.export
82+
83+
clickhouse local -q $"
84+
SET allow_experimental_database_iceberg = 1;
85+
86+
-- (re)create iceberg db
87+
DROP DATABASE IF EXISTS ice;
88+
89+
CREATE DATABASE ice
90+
ENGINE = DataLakeCatalog('http://localhost:5000')
91+
SETTINGS catalog_type = 'rest',
92+
auth_header = 'Authorization: Bearer foo',
93+
warehouse = 's3://${CATALOG_S3_BUCKET_NAME}';
94+
95+
select count(*) from ice.\`flowers.iris\`;
96+
"
97+
```
98+
99+
6. Clean up.
100+
101+
```shell
102+
devbox shell
103+
104+
source aws.credentials
105+
106+
terraform destroy
107+
rm -rf data/
108+
```

examples/s3watch/devbox.json

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"$schema": "https://raw.githubusercontent.com/jetify-com/devbox/0.10.7/.schema/devbox.schema.json",
3+
"packages": [
4+
"awscli2@latest",
5+
6+
7+
],
8+
"env": {
9+
"AT": "ice:examples/s3watch"
10+
},
11+
"shell": {
12+
"init_hook": [
13+
"export PATH=$(pwd):$(pwd)/.devbox/bin:$PATH",
14+
"[ -f .devbox/bin/clickhouse ] || (curl https://clickhouse.com/ | sh && mv clickhouse .devbox/bin/)"
15+
]
16+
}
17+
}

examples/s3watch/devbox.lock

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
{
2+
"lockfile_version": "1",
3+
"packages": {
4+
"awscli2@latest": {
5+
"last_modified": "2025-08-08T08:05:48Z",
6+
"resolved": "github:NixOS/nixpkgs/a3f3e3f2c983e957af6b07a1db98bafd1f87b7a1#awscli2",
7+
"source": "devbox-search",
8+
"version": "2.28.1",
9+
"systems": {
10+
"aarch64-darwin": {
11+
"outputs": [
12+
{
13+
"name": "out",
14+
"path": "/nix/store/rgavcd6nf9ycmm53r86js1n8zv6k5717-awscli2-2.28.1",
15+
"default": true
16+
},
17+
{
18+
"name": "dist",
19+
"path": "/nix/store/bcqbw14w0689hslsnd9r1vqh8445n5d3-awscli2-2.28.1-dist"
20+
}
21+
],
22+
"store_path": "/nix/store/rgavcd6nf9ycmm53r86js1n8zv6k5717-awscli2-2.28.1"
23+
},
24+
"aarch64-linux": {
25+
"outputs": [
26+
{
27+
"name": "out",
28+
"path": "/nix/store/agnnh10lr6xhvvyy9k74bbsiwaxd18ma-awscli2-2.28.1",
29+
"default": true
30+
},
31+
{
32+
"name": "dist",
33+
"path": "/nix/store/bjn51pqs4q12ajiq8idsv4pxfi85zqyv-awscli2-2.28.1-dist"
34+
}
35+
],
36+
"store_path": "/nix/store/agnnh10lr6xhvvyy9k74bbsiwaxd18ma-awscli2-2.28.1"
37+
},
38+
"x86_64-darwin": {
39+
"outputs": [
40+
{
41+
"name": "out",
42+
"path": "/nix/store/fy6lwzj66lhbzqx4023jpkl78l5sb3l4-awscli2-2.28.1",
43+
"default": true
44+
},
45+
{
46+
"name": "dist",
47+
"path": "/nix/store/72zij4gvx5rhr4l8jn0d4bas35brf19h-awscli2-2.28.1-dist"
48+
}
49+
],
50+
"store_path": "/nix/store/fy6lwzj66lhbzqx4023jpkl78l5sb3l4-awscli2-2.28.1"
51+
},
52+
"x86_64-linux": {
53+
"outputs": [
54+
{
55+
"name": "out",
56+
"path": "/nix/store/a830f5ksp5fa8v8fl0bw29amwazwbf50-awscli2-2.28.1",
57+
"default": true
58+
},
59+
{
60+
"name": "dist",
61+
"path": "/nix/store/6ddshy6z4h24brknz0a1ahjdvbm7yl6s-awscli2-2.28.1-dist"
62+
}
63+
],
64+
"store_path": "/nix/store/a830f5ksp5fa8v8fl0bw29amwazwbf50-awscli2-2.28.1"
65+
}
66+
}
67+
},
68+
"github:NixOS/nixpkgs/nixpkgs-unstable": {
69+
"resolved": "github:NixOS/nixpkgs/32f313e49e42f715491e1ea7b306a87c16fe0388?lastModified=1755268003&narHash=sha256-nNaeJjo861wFR0tjHDyCnHs1rbRtrMgxAKMoig9Sj%2Fw%3D"
70+
},
71+
72+
"last_modified": "2025-07-28T17:09:23Z",
73+
"resolved": "github:NixOS/nixpkgs/648f70160c03151bc2121d179291337ad6bc564b#jdk21_headless",
74+
"source": "devbox-search",
75+
"version": "21.0.7+6",
76+
"systems": {
77+
"aarch64-linux": {
78+
"outputs": [
79+
{
80+
"name": "out",
81+
"path": "/nix/store/sr7lg8i0c9f999klq8j7zpajwhx5y8j5-openjdk-headless-21.0.7+6",
82+
"default": true
83+
},
84+
{
85+
"name": "debug",
86+
"path": "/nix/store/7j7r7k6rj9lb33xsmk1rn00dj8kq1ajw-openjdk-headless-21.0.7+6-debug"
87+
}
88+
],
89+
"store_path": "/nix/store/sr7lg8i0c9f999klq8j7zpajwhx5y8j5-openjdk-headless-21.0.7+6"
90+
},
91+
"x86_64-linux": {
92+
"outputs": [
93+
{
94+
"name": "out",
95+
"path": "/nix/store/b8272lgswnw20fkd3b8av2ghqi60m725-openjdk-headless-21.0.7+6",
96+
"default": true
97+
},
98+
{
99+
"name": "debug",
100+
"path": "/nix/store/cjziiakfgvacvhdjg91bdcnz4zyh1q84-openjdk-headless-21.0.7+6-debug"
101+
}
102+
],
103+
"store_path": "/nix/store/b8272lgswnw20fkd3b8av2ghqi60m725-openjdk-headless-21.0.7+6"
104+
}
105+
}
106+
},
107+
108+
"last_modified": "2025-07-28T17:09:23Z",
109+
"resolved": "github:NixOS/nixpkgs/648f70160c03151bc2121d179291337ad6bc564b#terraform",
110+
"source": "devbox-search",
111+
"version": "1.12.2",
112+
"systems": {
113+
"aarch64-darwin": {
114+
"outputs": [
115+
{
116+
"name": "out",
117+
"path": "/nix/store/40gjbhfk5r4njbvkny3jcn8dz9slr138-terraform-1.12.2",
118+
"default": true
119+
}
120+
],
121+
"store_path": "/nix/store/40gjbhfk5r4njbvkny3jcn8dz9slr138-terraform-1.12.2"
122+
},
123+
"aarch64-linux": {
124+
"outputs": [
125+
{
126+
"name": "out",
127+
"path": "/nix/store/bvi0sf7qi75lk7aczgwp1bq811py9rj0-terraform-1.12.2",
128+
"default": true
129+
}
130+
],
131+
"store_path": "/nix/store/bvi0sf7qi75lk7aczgwp1bq811py9rj0-terraform-1.12.2"
132+
},
133+
"x86_64-darwin": {
134+
"outputs": [
135+
{
136+
"name": "out",
137+
"path": "/nix/store/km3x95bvr81n0lpyfr7nf00vifl3q6hy-terraform-1.12.2",
138+
"default": true
139+
}
140+
],
141+
"store_path": "/nix/store/km3x95bvr81n0lpyfr7nf00vifl3q6hy-terraform-1.12.2"
142+
},
143+
"x86_64-linux": {
144+
"outputs": [
145+
{
146+
"name": "out",
147+
"path": "/nix/store/vxazqkmsxlc6fgx3kl53jxiwjszghwrm-terraform-1.12.2",
148+
"default": true
149+
}
150+
],
151+
"store_path": "/nix/store/vxazqkmsxlc6fgx3kl53jxiwjszghwrm-terraform-1.12.2"
152+
}
153+
}
154+
}
155+
}
156+
}

examples/s3watch/iris.parquet

2.39 KB
Binary file not shown.

examples/s3watch/main.tf

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
terraform {
2+
required_providers {
3+
aws = {
4+
source = "hashicorp/aws"
5+
version = "~> 6.0"
6+
}
7+
}
8+
}
9+
10+
provider "aws" {}
11+
12+
locals {
13+
sqs_queue_prefix = "ice-s3watch"
14+
s3_bucket_prefix = "ice-s3watch"
15+
}
16+
17+
resource "aws_s3_bucket" "this" {
18+
bucket_prefix = local.s3_bucket_prefix
19+
force_destroy = true
20+
}
21+
22+
resource "aws_sqs_queue" "this" {
23+
name_prefix = local.sqs_queue_prefix
24+
}
25+
26+
resource "aws_sqs_queue_policy" "this" {
27+
queue_url = aws_sqs_queue.this.id
28+
policy = data.aws_iam_policy_document.queue.json
29+
}
30+
31+
data "aws_iam_policy_document" "queue" {
32+
statement {
33+
effect = "Allow"
34+
35+
principals {
36+
type = "*"
37+
identifiers = ["*"]
38+
}
39+
40+
actions = ["sqs:SendMessage"]
41+
resources = [aws_sqs_queue.this.arn]
42+
43+
condition {
44+
test = "ArnEquals"
45+
variable = "aws:SourceArn"
46+
values = [aws_s3_bucket.this.arn]
47+
}
48+
}
49+
}
50+
51+
resource "aws_s3_bucket_notification" "this" {
52+
bucket = aws_s3_bucket.this.id
53+
54+
queue {
55+
queue_arn = aws_sqs_queue.this.arn
56+
events = ["s3:ObjectCreated:*"]
57+
filter_suffix = ".parquet"
58+
}
59+
}
60+
61+
output "s3_bucket_name" {
62+
value = aws_s3_bucket.this.id
63+
}
64+
65+
output "sqs_queue_url" {
66+
value = aws_sqs_queue.this.id
67+
}

0 commit comments

Comments
 (0)