Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ target
/release
.devbox
/.envrc
*.credentials
19 changes: 18 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,24 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased](https://github.com/altinity/ice/compare/v0.4.0...master)
## [Unreleased](https://github.com/altinity/ice/compare/v0.5.0...master)

## [0.5.0](https://github.com/altinity/ice/compare/v0.4.0...v0.5.0)

### Added
- ice: Support for `ice insert` in watch mode. See [examples/s3watch](examples/s3watch) for details.
- ice-rest-catalog: `MANIFEST_COMPACTION`, `DATA_COMPACTION`, `SNAPSHOT_CLEANUP` and `ORPHAN_CLEANUP` maintenance routines. These
can be enabled either via ice-rest-catalog.yaml/maintenance section or performed ad-hoc via `ice-rest-catalog perform-maintenance`.
- ice: `ice delete` command.

### Changed
- ice: `ice delete-table` not to delete any data unless `--purge` is specified.
- ice-rest-catalog: catalog maintenance config section. `snapshotTTLInDays` moved to `maintenance.snapshotTTLInDays`.

### Fixed
- ice: Partitioning metadata missing when data is inserted with `--no-copy` or `--s3-copy-object`.
- ice: `NULLS_FIRST`/`NULLS_LAST` being ignored when sorting.
- ice: Path construction in `localfileio`.

## [0.4.0](https://github.com/altinity/ice/compare/v0.3.1...v0.4.0)

Expand Down
5 changes: 5 additions & 0 deletions examples/s3watch/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/.ice-rest-catalog.yaml
/.terraform.lock.hcl
/terraform.tfstate
/terraform.tfstate.backup
/tf.export
6 changes: 6 additions & 0 deletions examples/s3watch/.ice-rest-catalog.envsubst.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
uri: jdbc:sqlite:file:data/ice-rest-catalog/db.sqlite?journal_mode=WAL&synchronous=OFF&journal_size_limit=500
warehouse: s3://${CATALOG_S3_BUCKET_NAME}
s3:
region: ${AWS_REGION}
bearerTokens:
- value: foo
3 changes: 3 additions & 0 deletions examples/s3watch/.ice.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
uri: http://localhost:5000
bearerToken: foo
httpCacheDir: data/ice/http/cache
108 changes: 108 additions & 0 deletions examples/s3watch/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# examples/s3watch

This example demonstrates how ice can be used to continuously add files to the catalog as they are being uploaded to s3
bucket. It works by making ice listen for S3 object creation events via SQS queue.

1. Allocate AWS resources.

```shell
devbox shell

# auth into AWS
#
# either create a file named "aws.credentials" containing
#
# export AWS_ACCESS_KEY_ID=...
# export AWS_SECRET_ACCESS_KEY=...
# export AWS_SESSION_TOKEN=...
# export AWS_REGION=us-west-2
#
# and then load it as shown below or use any other method
source aws.credentials

# create s3 bucket + configure notification queue
terraform init
terraform apply

# save terraform output for easy loading
echo $"
export CATALOG_S3_BUCKET_NAME=$(terraform output -raw s3_bucket_name)
export CATALOG_SQS_QUEUE_URL=$(terraform output -raw sqs_queue_url)
" > tf.export
```

2. Start Iceberg REST Catalog.

```shell
devbox shell

source aws.credentials
source tf.export

# generate config
cat .ice-rest-catalog.envsubst.yaml | \
envsubst -no-unset -no-empty > .ice-rest-catalog.yaml

# run
ice-rest-catalog
```

3. Start `ice insert` in watch mode.

```shell
devbox shell

source aws.credentials # for sqs:ReceiveMessages
source tf.export

# run
ice insert flowers.iris -p --no-copy --skip-duplicates \
s3://$CATALOG_S3_BUCKET_NAME/flowers/iris/external-data/*.parquet \
--watch="$CATALOG_SQS_QUEUE_URL"
```

4. Put some data into s3 bucket any way you want, e.g. using `aws s3 cp`.

```shell
devbox shell

source aws.credentials
source tf.export

# upload data to s3
aws s3 cp iris.parquet s3://$CATALOG_S3_BUCKET_NAME/flowers/iris/external-data/
```

5. Query data from ClickHouse.

```shell
devbox shell

source tf.export

clickhouse local -q $"
SET allow_experimental_database_iceberg = 1;

-- (re)create iceberg db
DROP DATABASE IF EXISTS ice;

CREATE DATABASE ice
ENGINE = DataLakeCatalog('http://localhost:5000')
SETTINGS catalog_type = 'rest',
auth_header = 'Authorization: Bearer foo',
warehouse = 's3://${CATALOG_S3_BUCKET_NAME}';

select count(*) from ice.\`flowers.iris\`;
"
```

6. Clean up.

```shell
devbox shell

source aws.credentials

terraform destroy
rm -rf data/
```
17 changes: 17 additions & 0 deletions examples/s3watch/devbox.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"$schema": "https://raw.githubusercontent.com/jetify-com/devbox/0.10.7/.schema/devbox.schema.json",
"packages": [
"awscli2@latest",
"[email protected]",
"[email protected]+6"
],
"env": {
"AT": "ice:examples/s3watch"
},
"shell": {
"init_hook": [
"export PATH=$(pwd):$(pwd)/.devbox/bin:$PATH",
"[ -f .devbox/bin/clickhouse ] || (curl https://clickhouse.com/ | sh && mv clickhouse .devbox/bin/)"
]
}
}
156 changes: 156 additions & 0 deletions examples/s3watch/devbox.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
{
"lockfile_version": "1",
"packages": {
"awscli2@latest": {
"last_modified": "2025-08-08T08:05:48Z",
"resolved": "github:NixOS/nixpkgs/a3f3e3f2c983e957af6b07a1db98bafd1f87b7a1#awscli2",
"source": "devbox-search",
"version": "2.28.1",
"systems": {
"aarch64-darwin": {
"outputs": [
{
"name": "out",
"path": "/nix/store/rgavcd6nf9ycmm53r86js1n8zv6k5717-awscli2-2.28.1",
"default": true
},
{
"name": "dist",
"path": "/nix/store/bcqbw14w0689hslsnd9r1vqh8445n5d3-awscli2-2.28.1-dist"
}
],
"store_path": "/nix/store/rgavcd6nf9ycmm53r86js1n8zv6k5717-awscli2-2.28.1"
},
"aarch64-linux": {
"outputs": [
{
"name": "out",
"path": "/nix/store/agnnh10lr6xhvvyy9k74bbsiwaxd18ma-awscli2-2.28.1",
"default": true
},
{
"name": "dist",
"path": "/nix/store/bjn51pqs4q12ajiq8idsv4pxfi85zqyv-awscli2-2.28.1-dist"
}
],
"store_path": "/nix/store/agnnh10lr6xhvvyy9k74bbsiwaxd18ma-awscli2-2.28.1"
},
"x86_64-darwin": {
"outputs": [
{
"name": "out",
"path": "/nix/store/fy6lwzj66lhbzqx4023jpkl78l5sb3l4-awscli2-2.28.1",
"default": true
},
{
"name": "dist",
"path": "/nix/store/72zij4gvx5rhr4l8jn0d4bas35brf19h-awscli2-2.28.1-dist"
}
],
"store_path": "/nix/store/fy6lwzj66lhbzqx4023jpkl78l5sb3l4-awscli2-2.28.1"
},
"x86_64-linux": {
"outputs": [
{
"name": "out",
"path": "/nix/store/a830f5ksp5fa8v8fl0bw29amwazwbf50-awscli2-2.28.1",
"default": true
},
{
"name": "dist",
"path": "/nix/store/6ddshy6z4h24brknz0a1ahjdvbm7yl6s-awscli2-2.28.1-dist"
}
],
"store_path": "/nix/store/a830f5ksp5fa8v8fl0bw29amwazwbf50-awscli2-2.28.1"
}
}
},
"github:NixOS/nixpkgs/nixpkgs-unstable": {
"resolved": "github:NixOS/nixpkgs/32f313e49e42f715491e1ea7b306a87c16fe0388?lastModified=1755268003&narHash=sha256-nNaeJjo861wFR0tjHDyCnHs1rbRtrMgxAKMoig9Sj%2Fw%3D"
},
"[email protected]+6": {
"last_modified": "2025-07-28T17:09:23Z",
"resolved": "github:NixOS/nixpkgs/648f70160c03151bc2121d179291337ad6bc564b#jdk21_headless",
"source": "devbox-search",
"version": "21.0.7+6",
"systems": {
"aarch64-linux": {
"outputs": [
{
"name": "out",
"path": "/nix/store/sr7lg8i0c9f999klq8j7zpajwhx5y8j5-openjdk-headless-21.0.7+6",
"default": true
},
{
"name": "debug",
"path": "/nix/store/7j7r7k6rj9lb33xsmk1rn00dj8kq1ajw-openjdk-headless-21.0.7+6-debug"
}
],
"store_path": "/nix/store/sr7lg8i0c9f999klq8j7zpajwhx5y8j5-openjdk-headless-21.0.7+6"
},
"x86_64-linux": {
"outputs": [
{
"name": "out",
"path": "/nix/store/b8272lgswnw20fkd3b8av2ghqi60m725-openjdk-headless-21.0.7+6",
"default": true
},
{
"name": "debug",
"path": "/nix/store/cjziiakfgvacvhdjg91bdcnz4zyh1q84-openjdk-headless-21.0.7+6-debug"
}
],
"store_path": "/nix/store/b8272lgswnw20fkd3b8av2ghqi60m725-openjdk-headless-21.0.7+6"
}
}
},
"[email protected]": {
"last_modified": "2025-07-28T17:09:23Z",
"resolved": "github:NixOS/nixpkgs/648f70160c03151bc2121d179291337ad6bc564b#terraform",
"source": "devbox-search",
"version": "1.12.2",
"systems": {
"aarch64-darwin": {
"outputs": [
{
"name": "out",
"path": "/nix/store/40gjbhfk5r4njbvkny3jcn8dz9slr138-terraform-1.12.2",
"default": true
}
],
"store_path": "/nix/store/40gjbhfk5r4njbvkny3jcn8dz9slr138-terraform-1.12.2"
},
"aarch64-linux": {
"outputs": [
{
"name": "out",
"path": "/nix/store/bvi0sf7qi75lk7aczgwp1bq811py9rj0-terraform-1.12.2",
"default": true
}
],
"store_path": "/nix/store/bvi0sf7qi75lk7aczgwp1bq811py9rj0-terraform-1.12.2"
},
"x86_64-darwin": {
"outputs": [
{
"name": "out",
"path": "/nix/store/km3x95bvr81n0lpyfr7nf00vifl3q6hy-terraform-1.12.2",
"default": true
}
],
"store_path": "/nix/store/km3x95bvr81n0lpyfr7nf00vifl3q6hy-terraform-1.12.2"
},
"x86_64-linux": {
"outputs": [
{
"name": "out",
"path": "/nix/store/vxazqkmsxlc6fgx3kl53jxiwjszghwrm-terraform-1.12.2",
"default": true
}
],
"store_path": "/nix/store/vxazqkmsxlc6fgx3kl53jxiwjszghwrm-terraform-1.12.2"
}
}
}
}
}
Binary file added examples/s3watch/iris.parquet
Binary file not shown.
67 changes: 67 additions & 0 deletions examples/s3watch/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 6.0"
}
}
}

provider "aws" {}

locals {
sqs_queue_prefix = "ice-s3watch"
s3_bucket_prefix = "ice-s3watch"
}

resource "aws_s3_bucket" "this" {
bucket_prefix = local.s3_bucket_prefix
force_destroy = true
}

resource "aws_sqs_queue" "this" {
name_prefix = local.sqs_queue_prefix
}

resource "aws_sqs_queue_policy" "this" {
queue_url = aws_sqs_queue.this.id
policy = data.aws_iam_policy_document.queue.json
}

data "aws_iam_policy_document" "queue" {
statement {
effect = "Allow"

principals {
type = "*"
identifiers = ["*"]
}

actions = ["sqs:SendMessage"]
resources = [aws_sqs_queue.this.arn]

condition {
test = "ArnEquals"
variable = "aws:SourceArn"
values = [aws_s3_bucket.this.arn]
}
}
}

resource "aws_s3_bucket_notification" "this" {
bucket = aws_s3_bucket.this.id

queue {
queue_arn = aws_sqs_queue.this.arn
events = ["s3:ObjectCreated:*"]
filter_suffix = ".parquet"
}
}

output "s3_bucket_name" {
value = aws_s3_bucket.this.id
}

output "sqs_queue_url" {
value = aws_sqs_queue.this.id
}
Loading