Skip to content

Commit 07587a3

Browse files
committed
Add support for environment context
1 parent 5da8186 commit 07587a3

8 files changed

Lines changed: 136 additions & 0 deletions

File tree

pyiceberg/environment_context.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
from importlib.metadata import version
19+
20+
21+
class EnvironmentContext:
22+
_PROPERTIES: dict[str, str] = {
23+
"engine-name": "pyiceberg",
24+
"engine-version": version("pyiceberg"),
25+
}
26+
27+
def __init__(self) -> None:
28+
raise NotImplementedError("EnvironmentContext is a utility class and cannot be instantiated.")
29+
30+
@classmethod
31+
def get(cls) -> dict[str, str]:
32+
"""Return a read-only copy of all properties."""
33+
return cls._PROPERTIES.copy()
34+
35+
@classmethod
36+
def put(cls, key: str, value: str) -> None:
37+
"""Will add the given key/value pair in a global properties map."""
38+
cls._PROPERTIES[key] = value
39+
40+
@classmethod
41+
def remove(cls, key: str) -> str | None:
42+
"""Remove the key from the global properties map."""
43+
return cls._PROPERTIES.pop(key, None)

pyiceberg/table/snapshots.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
from pydantic import Field, PrivateAttr, model_serializer
2727

28+
from pyiceberg.environment_context import EnvironmentContext
2829
from pyiceberg.io import FileIO
2930
from pyiceberg.manifest import DataFile, DataFileContent, ManifestFile, _manifests
3031
from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
@@ -402,6 +403,9 @@ def _update_totals(total_property: str, added_property: str, removed_property: s
402403
removed_property=REMOVED_EQUALITY_DELETES,
403404
)
404405

406+
for key, value in EnvironmentContext.get().items():
407+
summary.__setitem__(key, value)
408+
405409
return summary
406410

407411

tests/integration/test_deletes.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from pyspark.sql import SparkSession
2424

2525
from pyiceberg.catalog.rest import RestCatalog
26+
from pyiceberg.environment_context import EnvironmentContext
2627
from pyiceberg.exceptions import NoSuchTableError
2728
from pyiceberg.expressions import AlwaysTrue, EqualTo, LessThanOrEqual
2829
from pyiceberg.manifest import ManifestEntryStatus
@@ -480,6 +481,8 @@ def test_partitioned_table_positional_deletes_sequence_number(spark: SparkSessio
480481
"total-files-size": snapshots[2].summary["total-files-size"],
481482
"total-position-deletes": "1",
482483
"total-records": "4",
484+
"engine-name": "pyiceberg",
485+
"engine-version": EnvironmentContext.get().get("engine-version"),
483486
},
484487
)
485488

tests/integration/test_inspect_table.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from pytest_lazy_fixtures import lf
2828

2929
from pyiceberg.catalog import Catalog
30+
from pyiceberg.environment_context import EnvironmentContext
3031
from pyiceberg.exceptions import NoSuchTableError
3132
from pyiceberg.expressions import (
3233
And,
@@ -277,6 +278,8 @@ def test_inspect_snapshots(
277278
("total-files-size", str(file_size)),
278279
("total-position-deletes", "0"),
279280
("total-equality-deletes", "0"),
281+
("engine-name", "pyiceberg"),
282+
("engine-version", EnvironmentContext.get().get("engine-version")),
280283
]
281284

282285
# Delete
@@ -290,6 +293,8 @@ def test_inspect_snapshots(
290293
("total-files-size", "0"),
291294
("total-position-deletes", "0"),
292295
("total-equality-deletes", "0"),
296+
("engine-name", "pyiceberg"),
297+
("engine-version", EnvironmentContext.get().get("engine-version")),
293298
]
294299

295300
lhs = spark.table(f"{identifier}.snapshots").toPandas()

tests/integration/test_writes/test_partitioned_writes.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from pyspark.sql import SparkSession
2626

2727
from pyiceberg.catalog import Catalog
28+
from pyiceberg.environment_context import EnvironmentContext
2829
from pyiceberg.exceptions import NoSuchTableError
2930
from pyiceberg.partitioning import PartitionField, PartitionSpec
3031
from pyiceberg.schema import Schema
@@ -498,6 +499,8 @@ def test_summaries_with_null(spark: SparkSession, session_catalog: Catalog, arro
498499
"total-files-size": str(file_size),
499500
"total-position-deletes": "0",
500501
"total-records": "3",
502+
"engine-name": "pyiceberg",
503+
"engine-version": EnvironmentContext.get().get("engine-version"),
501504
}
502505

503506
assert summaries[1] == {
@@ -511,6 +514,8 @@ def test_summaries_with_null(spark: SparkSession, session_catalog: Catalog, arro
511514
"total-files-size": str(file_size * 2),
512515
"total-position-deletes": "0",
513516
"total-records": "6",
517+
"engine-name": "pyiceberg",
518+
"engine-version": EnvironmentContext.get().get("engine-version"),
514519
}
515520
assert summaries[2] == {
516521
"removed-files-size": str(file_size * 2),
@@ -523,6 +528,8 @@ def test_summaries_with_null(spark: SparkSession, session_catalog: Catalog, arro
523528
"total-files-size": "0",
524529
"total-data-files": "0",
525530
"total-records": "0",
531+
"engine-name": "pyiceberg",
532+
"engine-version": EnvironmentContext.get().get("engine-version"),
526533
}
527534
assert summaries[3] == {
528535
"changed-partition-count": "3",
@@ -535,6 +542,8 @@ def test_summaries_with_null(spark: SparkSession, session_catalog: Catalog, arro
535542
"total-files-size": str(file_size),
536543
"total-data-files": "3",
537544
"total-records": "3",
545+
"engine-name": "pyiceberg",
546+
"engine-version": EnvironmentContext.get().get("engine-version"),
538547
}
539548
assert summaries[4] == {
540549
"changed-partition-count": "3",
@@ -547,6 +556,8 @@ def test_summaries_with_null(spark: SparkSession, session_catalog: Catalog, arro
547556
"total-files-size": str(file_size * 2),
548557
"total-data-files": "6",
549558
"total-records": "6",
559+
"engine-name": "pyiceberg",
560+
"engine-version": EnvironmentContext.get().get("engine-version"),
550561
}
551562
assert "removed-files-size" in summaries[5]
552563
assert "total-files-size" in summaries[5]
@@ -561,6 +572,8 @@ def test_summaries_with_null(spark: SparkSession, session_catalog: Catalog, arro
561572
"total-files-size": summaries[5]["total-files-size"],
562573
"total-data-files": "2",
563574
"total-records": "2",
575+
"engine-name": "pyiceberg",
576+
"engine-version": EnvironmentContext.get().get("engine-version"),
564577
}
565578
assert "added-files-size" in summaries[6]
566579
assert "total-files-size" in summaries[6]
@@ -575,6 +588,8 @@ def test_summaries_with_null(spark: SparkSession, session_catalog: Catalog, arro
575588
"total-files-size": summaries[6]["total-files-size"],
576589
"total-data-files": "4",
577590
"total-records": "4",
591+
"engine-name": "pyiceberg",
592+
"engine-version": EnvironmentContext.get().get("engine-version"),
578593
}
579594

580595

tests/integration/test_writes/test_writes.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
from pyiceberg.catalog import Catalog, load_catalog
4545
from pyiceberg.catalog.hive import HiveCatalog
4646
from pyiceberg.catalog.sql import SqlCatalog
47+
from pyiceberg.environment_context import EnvironmentContext
4748
from pyiceberg.exceptions import CommitFailedException, NoSuchTableError
4849
from pyiceberg.expressions import And, EqualTo, GreaterThanOrEqual, In, LessThan, Not
4950
from pyiceberg.io.pyarrow import UnsupportedPyArrowTypeException, _dataframe_to_data_files
@@ -231,6 +232,8 @@ def test_summaries(spark: SparkSession, session_catalog: Catalog, arrow_table_wi
231232
"total-files-size": str(file_size),
232233
"total-position-deletes": "0",
233234
"total-records": "3",
235+
"engine-name": "pyiceberg",
236+
"engine-version": EnvironmentContext.get().get("engine-version"),
234237
}
235238

236239
# Append
@@ -244,6 +247,8 @@ def test_summaries(spark: SparkSession, session_catalog: Catalog, arrow_table_wi
244247
"total-files-size": str(file_size * 2),
245248
"total-position-deletes": "0",
246249
"total-records": "6",
250+
"engine-name": "pyiceberg",
251+
"engine-version": EnvironmentContext.get().get("engine-version"),
247252
}
248253

249254
# Delete
@@ -257,6 +262,8 @@ def test_summaries(spark: SparkSession, session_catalog: Catalog, arrow_table_wi
257262
"total-files-size": "0",
258263
"total-position-deletes": "0",
259264
"total-records": "0",
265+
"engine-name": "pyiceberg",
266+
"engine-version": EnvironmentContext.get().get("engine-version"),
260267
}
261268

262269
# Append
@@ -270,6 +277,8 @@ def test_summaries(spark: SparkSession, session_catalog: Catalog, arrow_table_wi
270277
"total-files-size": str(file_size),
271278
"total-position-deletes": "0",
272279
"total-records": "3",
280+
"engine-name": "pyiceberg",
281+
"engine-version": EnvironmentContext.get().get("engine-version"),
273282
}
274283

275284

@@ -326,6 +335,8 @@ def test_summaries_partial_overwrite(spark: SparkSession, session_catalog: Catal
326335
"total-files-size": summaries[0]["total-files-size"],
327336
"total-position-deletes": "0",
328337
"total-records": "5",
338+
"engine-name": "pyiceberg",
339+
"engine-version": EnvironmentContext.get().get("engine-version"),
329340
}
330341
# Java produces:
331342
# {
@@ -367,6 +378,8 @@ def test_summaries_partial_overwrite(spark: SparkSession, session_catalog: Catal
367378
"total-files-size": summaries[1]["total-files-size"],
368379
"total-position-deletes": "0",
369380
"total-records": "4",
381+
"engine-name": "pyiceberg",
382+
"engine-version": EnvironmentContext.get().get("engine-version"),
370383
}
371384
assert len(tbl.scan().to_pandas()) == 4
372385

@@ -831,6 +844,8 @@ def test_summaries_with_only_nulls(
831844
"total-files-size": "0",
832845
"total-position-deletes": "0",
833846
"total-records": "0",
847+
"engine-name": "pyiceberg",
848+
"engine-version": EnvironmentContext.get().get("engine-version"),
834849
}
835850

836851
assert summaries[1] == {
@@ -843,6 +858,8 @@ def test_summaries_with_only_nulls(
843858
"total-files-size": str(file_size),
844859
"total-position-deletes": "0",
845860
"total-records": "2",
861+
"engine-name": "pyiceberg",
862+
"engine-version": EnvironmentContext.get().get("engine-version"),
846863
}
847864

848865
assert summaries[2] == {
@@ -855,6 +872,8 @@ def test_summaries_with_only_nulls(
855872
"total-files-size": "0",
856873
"total-position-deletes": "0",
857874
"total-records": "0",
875+
"engine-name": "pyiceberg",
876+
"engine-version": EnvironmentContext.get().get("engine-version"),
858877
}
859878

860879
assert summaries[3] == {
@@ -864,6 +883,8 @@ def test_summaries_with_only_nulls(
864883
"total-files-size": "0",
865884
"total-position-deletes": "0",
866885
"total-records": "0",
886+
"engine-name": "pyiceberg",
887+
"engine-version": EnvironmentContext.get().get("engine-version"),
867888
}
868889

869890

@@ -1156,6 +1177,8 @@ def test_inspect_snapshots(
11561177
("total-files-size", str(file_size)),
11571178
("total-position-deletes", "0"),
11581179
("total-equality-deletes", "0"),
1180+
("engine-name", "pyiceberg"),
1181+
("engine-version", EnvironmentContext.get().get("engine-version")),
11591182
]
11601183

11611184
# Delete
@@ -1169,6 +1192,8 @@ def test_inspect_snapshots(
11691192
("total-files-size", "0"),
11701193
("total-position-deletes", "0"),
11711194
("total-equality-deletes", "0"),
1195+
("engine-name", "pyiceberg"),
1196+
("engine-version", EnvironmentContext.get().get("engine-version")),
11721197
]
11731198

11741199
lhs = spark.table(f"{identifier}.snapshots").toPandas()

tests/table/test_snapshots.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import pytest
2121

22+
from pyiceberg.environment_context import EnvironmentContext
2223
from pyiceberg.manifest import DataFile, DataFileContent, ManifestContent, ManifestFile
2324
from pyiceberg.partitioning import PartitionField, PartitionSpec
2425
from pyiceberg.schema import Schema
@@ -315,6 +316,8 @@ def test_merge_snapshot_summaries_empty() -> None:
315316
"total-files-size": "0",
316317
"total-position-deletes": "0",
317318
"total-equality-deletes": "0",
319+
"engine-name": "pyiceberg",
320+
"engine-version": EnvironmentContext.get().get("engine-version"),
318321
},
319322
)
320323

@@ -349,6 +352,8 @@ def test_merge_snapshot_summaries_new_summary() -> None:
349352
"total-files-size": "4",
350353
"total-position-deletes": "5",
351354
"total-equality-deletes": "3",
355+
"engine-name": "pyiceberg",
356+
"engine-version": EnvironmentContext.get().get("engine-version"),
352357
},
353358
)
354359

@@ -391,6 +396,8 @@ def test_merge_snapshot_summaries_overwrite_summary() -> None:
391396
"total-files-size": "5",
392397
"total-position-deletes": "6",
393398
"total-equality-deletes": "4",
399+
"engine-name": "pyiceberg",
400+
"engine-version": EnvironmentContext.get().get("engine-version"),
394401
}
395402

396403
assert actual.additional_properties == expected

tests/test_environment_context.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
import re
18+
19+
from pyiceberg.environment_context import EnvironmentContext
20+
21+
22+
def test_default_value() -> None:
23+
actual = EnvironmentContext.get()
24+
assert len(actual) == 2
25+
assert actual["engine-name"] == "pyiceberg"
26+
assert re.match(r"^\d+\.\d+\.\d+", actual["engine-version"])
27+
28+
29+
def test_put_and_remove() -> None:
30+
EnvironmentContext.put("test-key", "test-value")
31+
assert EnvironmentContext.get()["test-key"] == "test-value"
32+
33+
EnvironmentContext.remove("test-key")
34+
assert "test-key" not in EnvironmentContext.get()

0 commit comments

Comments
 (0)