@@ -26,13 +26,13 @@ Set retention time for optimized files (ready to delete:
26
26
spark.conf.set("spark.databricks.delta.deletedFileRetentionDuration","0")
27
27
28
28
29
- Check existing table details (look for numFiles:
29
+ Check existing table details (look for numFiles and sizeInBytes :
30
30
spark.sql("describe detail atm").show(truncate=False)
31
- +------+------------------------------------+-------------------------+ -----------+ -------------------------------------+ ----------------------- +-------------------+ ------------------ +--------+ -----------+-----------------------------------------------+----------------+----------------+------------------------+
32
- |format|id |name |description|location |createdAt |lastModified |partitionColumns |numFiles|sizeInBytes|properties |minReaderVersion|minWriterVersion|tableFeatures |
33
- +------+------------------------------------+-------------------------+ -----------+ -------------------------------------+ ----------------------- +-------------------+ ------------------ +--------+ -----------+-----------------------------------------------+----------------+----------------+------------------------+
34
- |delta |81336019-7998-4b1d-b4da-1b7ca9d5c745 |spark_catalog.default.atm |NULL |oci://atm_data @fro8fl9kuqli/atm_delta |2024-07-15 12:57:15.812 |2024-08-21 07:57:05 |[year, month, day]|1822 |286807670 |{delta.deletedFileRetentionDuration -> 0 hours} |1 |2 |[appendOnly, invariants]|
35
- +------+------------------------------------+-------------------------+ -----------+ -------------------------------------+ ----------------------- +-------------------+ ------------------ +--------+ -----------+-----------------------------------------------+----------------+----------------+------------------------+
31
+ +------+------------------------------------+--------------------------------+ -----------+ ----------------------------------------------------+-----------------------+-------------------+------------------+ --------+ -----------+ ----------+----------------+----------------+------------------------+
32
+ |format|id |name |description|location |createdAt |lastModified |partitionColumns |numFiles|sizeInBytes|properties |minReaderVersion|minWriterVersion|tableFeatures |
33
+ +------+------------------------------------+--------------------------------+ -----------+ ----------------------------------------------------+-----------------------+-------------------+------------------+ --------+ -----------+ ----------+----------------+----------------+------------------------+
34
+ |delta |c15ad4ca-8c0f-4747-b064-1492d7b4b3c4 |spark_catalog.default.hsl_trains |NULL |oci://dataflow_app @fro8fl9kuqli/hsl_trains_data_part |2024-09-05 10:19:10.057 |2024-09-06 08:45:01 |[year, month, day]|2024 |16333676 |{} |1 |2 |[appendOnly, invariants]|
35
+ +------+------------------------------------+--------------------------------+ -----------+ ----------------------------------------------------+-----------------------+-------------------+------------------+ --------+ -----------+ ----------+----------------+----------------+------------------------+
36
36
37
37
Run optimzation:
38
38
spark.sql("OPTIMIZE atm").show(truncate=False)
@@ -45,14 +45,13 @@ spark.sql("vacuum atm RETAIN 0 HOURS")
45
45
46
46
and check details of your table:
47
47
spark.sql("describe detail atm").show(truncate=False)
48
- +------+------------------------------------+-------------------------+-----------+-------------------------------------+-----------------------+-------------------+------------------+--------+-----------+-----------------------------------------------+----------------+----------------+------------------------+
49
- |format|id |name |description|location |createdAt |lastModified |partitionColumns |numFiles|sizeInBytes|properties |minReaderVersion|minWriterVersion|tableFeatures |
50
- +------+------------------------------------+-------------------------+-----------+-------------------------------------+-----------------------+-------------------+------------------+--------+-----------+-----------------------------------------------+----------------+----------------+------------------------+
51
- |delta |81336019-7998-4b1d-b4da-1b7ca9d5c745|spark_catalog.default.atm|NULL |oci://atm_data@fro8fl9kuqli/atm_delta|2024-07-15 12:57:15.812|2024-09-06 08:26:45|[year, month, day]|21 |286807670 |{delta.deletedFileRetentionDuration -> 0 hours}|1 |2 |[appendOnly, invariants]|
52
- +------+------------------------------------+-------------------------+-----------+-------------------------------------+-----------------------+-------------------+------------------+--------+-----------+-----------------------------------------------+----------------+----------------+------------------------+
53
-
48
+ +----------------+----------------+------------------------+
49
+ |format|id |name |description|location |createdAt |lastModified |partitionColumns |numFiles|sizeInBytes|properties|minReaderVersion|minWriterVersion|tableFeatures |
50
+ +------+------------------------------------+--------------------------------+-----------+----------------------------------------------------+-----------------------+-------------------+------------------+--------+-----------+----------+----------------+----------------+------------------------+
51
+ |delta |c15ad4ca-8c0f-4747-b064-1492d7b4b3c4|spark_catalog.default.hsl_trains|NULL |oci://dataflow_app@fro8fl9kuqli/hsl_trains_data_part|2024-09-05 10:19:10.057|2024-09-06 08:47:48|[year, month, day]|7 |1583521 |{} |1 |2 |[appendOnly, invariants]|
52
+ +------+------------------------------------+--------------------------------+-----------+----------------------------------------------------+-----------------------+-------------------+------------------+--------+-----------+----------+----------------+----------------+------------------------+
54
53
55
-
54
+ Enjoy increased performance of your queries!
56
55
57
56
58
57
0 commit comments