diff --git a/ice/src/main/java/com/altinity/ice/Main.java b/ice/src/main/java/com/altinity/ice/Main.java index a26ed39f..a45eb5a8 100644 --- a/ice/src/main/java/com/altinity/ice/Main.java +++ b/ice/src/main/java/com/altinity/ice/Main.java @@ -69,14 +69,10 @@ void describe( @CommandLine.Option( names = {"--json"}, description = "Output JSON instead of YAML") - boolean json, - @CommandLine.Option( - names = {"--include-metrics"}, - description = "Include table metrics in the output") - boolean includeMetrics) + boolean json) throws IOException { try (RESTCatalog catalog = loadCatalog(this.configFile)) { - Describe.run(catalog, target, json, includeMetrics); + Describe.run(catalog, target, json); } } diff --git a/ice/src/main/java/com/altinity/ice/internal/cmd/Describe.java b/ice/src/main/java/com/altinity/ice/internal/cmd/Describe.java index 7f390cb7..26bb853b 100644 --- a/ice/src/main/java/com/altinity/ice/internal/cmd/Describe.java +++ b/ice/src/main/java/com/altinity/ice/internal/cmd/Describe.java @@ -1,7 +1,7 @@ package com.altinity.ice.internal.cmd; -import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.databind.JsonNode; +import com.altinity.ice.internal.model.TableMetadata; +import com.altinity.ice.internal.model.TableMetadata.*; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import java.io.IOException; @@ -9,9 +9,9 @@ import java.time.Instant; import java.time.ZoneId; import java.time.format.DateTimeFormatter; +import java.util.ArrayList; import java.util.List; import java.util.Map; -import java.util.stream.Collectors; import org.apache.iceberg.*; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; @@ -24,13 +24,10 @@ public final class Describe { private Describe() {} - // TODO: refactor: the use of StringBuilder below is absolutely criminal - public static void run(RESTCatalog catalog, String target, boolean json, boolean includeMetrics) - throws IOException { + public static void run(RESTCatalog catalog, String target, boolean json) throws IOException { String targetNamespace = null; String targetTable = null; if (target != null && !target.isEmpty()) { - // TODO: support catalog.ns.table var s = target.split("[.]", 2); switch (s.length) { case 2: @@ -42,8 +39,8 @@ public static void run(RESTCatalog catalog, String target, boolean json, boolean break; } } - // FIXME: there is no need to list nss/tables when target is given - var sb = new StringBuilder(); + + List tablesMetadata = new ArrayList<>(); List namespaces = catalog.listNamespaces(); for (Namespace namespace : namespaces) { if (targetNamespace != null && !targetNamespace.equals(namespace.toString())) { @@ -54,73 +51,55 @@ public static void run(RESTCatalog catalog, String target, boolean json, boolean if (targetTable != null && !targetTable.equals(tableId.name())) { continue; } - sb.append("---\n"); - sb.append("kind: Table\n"); - sb.append("metadata:\n"); - sb.append("\tid: " + tableId + "\n"); Table table = catalog.loadTable(tableId); - sb.append("data:\n"); - sb.append("\tschema_raw: |-\n" + prefixEachLine(table.schema().toString(), "\t\t") + "\n"); - sb.append( - "\tpartition_spec_raw: |-\n" + prefixEachLine(table.spec().toString(), "\t\t") + "\n"); - sb.append( - "\tsort_order_raw: |-\n" + prefixEachLine(table.sortOrder().toString(), "\t\t") + "\n"); - sb.append("\tproperties: \n"); - for (var property : table.properties().entrySet()) { - var v = property.getValue(); - if (v.contains("\n")) { - sb.append("\t\t" + property.getKey() + ": |-\n" + prefixEachLine(v, "\t\t\t") + "\n"); - } else { - sb.append("\t\t" + property.getKey() + ": \"" + v + "\"\n"); - } - } - sb.append("\tlocation: " + table.location() + "\n"); - sb.append("\tcurrent_snapshot: \n"); Snapshot snapshot = table.currentSnapshot(); + + SnapshotInfo snapshotInfo = null; if (snapshot != null) { - sb.append("\t\tsequence_number: " + snapshot.sequenceNumber() + "\n"); - sb.append("\t\tid: " + snapshot.snapshotId() + "\n"); - sb.append("\t\tparent_id: " + snapshot.parentId() + "\n"); - sb.append("\t\ttimestamp: " + snapshot.timestampMillis() + "\n"); - sb.append( - "\t\ttimestamp_iso: \"" - + Instant.ofEpochMilli(snapshot.timestampMillis()).toString() - + "\"\n"); - sb.append( - "\t\ttimestamp_iso_local: \"" - + Instant.ofEpochMilli(snapshot.timestampMillis()) + snapshotInfo = + new SnapshotInfo( + snapshot.sequenceNumber(), + snapshot.snapshotId(), + snapshot.parentId(), + snapshot.timestampMillis(), + Instant.ofEpochMilli(snapshot.timestampMillis()).toString(), + Instant.ofEpochMilli(snapshot.timestampMillis()) .atZone(ZoneId.systemDefault()) - .format(DateTimeFormatter.ISO_OFFSET_DATE_TIME) - + "\"\n"); - sb.append("\t\toperation: " + snapshot.operation() + "\n"); - sb.append("\t\tsummary:\n"); - for (var property : snapshot.summary().entrySet()) { - sb.append("\t\t\t" + property.getKey() + ": \"" + property.getValue() + "\"\n"); - } - sb.append("\t\tlocation: " + snapshot.manifestListLocation() + "\n"); + .format(DateTimeFormatter.ISO_OFFSET_DATE_TIME), + snapshot.operation(), + snapshot.summary(), + snapshot.manifestListLocation()); } - if (includeMetrics) { - printTableMetrics(table, sb); - } + List metrics = getTableMetrics(table); + + TableData tableData = + new TableData( + table.schema().toString(), + table.spec().toString(), + table.sortOrder().toString(), + table.properties(), + table.location(), + snapshotInfo, + metrics); + + tablesMetadata.add(new TableMetadata("Table", new Metadata(tableId.toString()), tableData)); } } - String r = sb.toString().replace("\t", " "); - if (json) { - r = convertYamlToJson(r); - } - System.out.println(r); + + ObjectMapper mapper = json ? new ObjectMapper() : new ObjectMapper(new YAMLFactory()); + String output = mapper.writeValueAsString(tablesMetadata); + System.out.println(output); } - private static void printTableMetrics(Table table, StringBuilder buffer) throws IOException { + private static List getTableMetrics(Table table) throws IOException { + List metricsList = new ArrayList<>(); TableScan scan = table.newScan().includeColumnStats(); CloseableIterable tasks = scan.planFiles(); for (FileScanTask task : tasks) { DataFile dataFile = task.file(); - buffer.append("\tmetrics:\n"); - buffer.append("\t\tfile: " + dataFile.path() + "\n"); - buffer.append("\t\trecord_count: " + dataFile.recordCount() + "\n"); + List columnMetrics = new ArrayList<>(); Map valueCounts = dataFile.valueCounts(); Map nullCounts = dataFile.nullValueCounts(); @@ -131,52 +110,36 @@ private static void printTableMetrics(Table table, StringBuilder buffer) throws continue; } - buffer.append("\t\tcolumns:\n"); for (Types.NestedField field : table.schema().columns()) { int id = field.fieldId(); - buffer.append("\t\t\t" + field.name() + ":\n"); - if (valueCounts != null) { - buffer.append("\t\t\t\tvalue_count: " + valueCounts.get(id) + "\n"); - } - if (nullCounts != null) { - buffer.append("\t\t\t\tnull_count: " + nullCounts.get(id) + "\n"); - } + String lowerBound = null; + String upperBound = null; + if (lowerBounds != null) { ByteBuffer lower = lowerBounds.get(id); - String lowerStr = - lower != null ? Conversions.fromByteBuffer(field.type(), lower).toString() : "null"; - buffer.append("\t\t\t\tlower_bound: " + lowerStr + "\n"); + lowerBound = + lower != null ? Conversions.fromByteBuffer(field.type(), lower).toString() : null; } if (upperBounds != null) { ByteBuffer upper = upperBounds.get(id); - String upperStr = - upper != null ? Conversions.fromByteBuffer(field.type(), upper).toString() : "null"; - buffer.append("\t\t\t\tupper_bound: " + upperStr + "\n"); + upperBound = + upper != null ? Conversions.fromByteBuffer(field.type(), upper).toString() : null; } - } - } - - tasks.close(); - } - private static String convertYamlToJson(String yaml) throws IOException { - YAMLFactory yamlFactory = new YAMLFactory(); - ObjectMapper yamlReader = new ObjectMapper(yamlFactory); - ObjectMapper jsonWriter = new ObjectMapper(); - StringBuilder result = new StringBuilder(); - try (JsonParser parser = yamlFactory.createParser(yaml)) { - while (!parser.isClosed()) { - JsonNode node = yamlReader.readTree(parser); - if (node != null) { - String json = jsonWriter.writeValueAsString(node); - result.append(json).append("\n"); - } + columnMetrics.add( + new ColumnMetrics( + field.name(), + valueCounts != null ? valueCounts.get(id) : null, + nullCounts != null ? nullCounts.get(id) : null, + lowerBound, + upperBound)); } + + metricsList.add( + new MetricsInfo(dataFile.path().toString(), dataFile.recordCount(), columnMetrics)); } - return result.toString().trim(); - } - private static String prefixEachLine(String v, String prefix) { - return v.lines().map(line -> prefix + line).collect(Collectors.joining("\n")); + tasks.close(); + return metricsList; } } diff --git a/ice/src/main/java/com/altinity/ice/internal/model/TableMetadata.java b/ice/src/main/java/com/altinity/ice/internal/model/TableMetadata.java new file mode 100644 index 00000000..1cfcba34 --- /dev/null +++ b/ice/src/main/java/com/altinity/ice/internal/model/TableMetadata.java @@ -0,0 +1,41 @@ +package com.altinity.ice.internal.model; + +import com.fasterxml.jackson.annotation.JsonInclude; +import java.util.List; +import java.util.Map; + +@JsonInclude(JsonInclude.Include.NON_NULL) +public record TableMetadata(String kind, Metadata metadata, TableData data) { + public TableMetadata { + if (kind == null) { + kind = "Table"; + } + } + + public record Metadata(String id) {} + + public record TableData( + String schema_raw, + String partition_spec_raw, + String sort_order_raw, + Map properties, + String location, + SnapshotInfo current_snapshot, + List metrics) {} + + public record SnapshotInfo( + long sequence_number, + long id, + Long parent_id, + long timestamp, + String timestamp_iso, + String timestamp_iso_local, + String operation, + Map summary, + String location) {} + + public record MetricsInfo(String file, long record_count, List columns) {} + + public record ColumnMetrics( + String name, Long value_count, Long null_count, String lower_bound, String upper_bound) {} +}