Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions ice/src/main/java/com/altinity/ice/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,10 @@ void describe(
@CommandLine.Option(
names = {"--json"},
description = "Output JSON instead of YAML")
boolean json,
@CommandLine.Option(
names = {"--include-metrics"},
description = "Include table metrics in the output")
boolean includeMetrics)
boolean json)
throws IOException {
try (RESTCatalog catalog = loadCatalog(this.configFile)) {
Describe.run(catalog, target, json, includeMetrics);
Describe.run(catalog, target, json);
}
}

Expand Down
155 changes: 59 additions & 96 deletions ice/src/main/java/com/altinity/ice/internal/cmd/Describe.java
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
package com.altinity.ice.internal.cmd;

import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.JsonNode;
import com.altinity.ice.internal.model.TableMetadata;
import com.altinity.ice.internal.model.TableMetadata.*;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.time.Instant;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.iceberg.*;
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.catalog.TableIdentifier;
Expand All @@ -24,13 +24,10 @@ public final class Describe {

private Describe() {}

// TODO: refactor: the use of StringBuilder below is absolutely criminal
public static void run(RESTCatalog catalog, String target, boolean json, boolean includeMetrics)
throws IOException {
public static void run(RESTCatalog catalog, String target, boolean json) throws IOException {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like TableMetadata, etc. records are defined but not actually used...

String targetNamespace = null;
String targetTable = null;
if (target != null && !target.isEmpty()) {
// TODO: support catalog.ns.table
var s = target.split("[.]", 2);
switch (s.length) {
case 2:
Expand All @@ -42,8 +39,8 @@ public static void run(RESTCatalog catalog, String target, boolean json, boolean
break;
}
}
// FIXME: there is no need to list nss/tables when target is given
var sb = new StringBuilder();

List<TableMetadata> tablesMetadata = new ArrayList<>();
List<Namespace> namespaces = catalog.listNamespaces();
for (Namespace namespace : namespaces) {
if (targetNamespace != null && !targetNamespace.equals(namespace.toString())) {
Expand All @@ -54,73 +51,55 @@ public static void run(RESTCatalog catalog, String target, boolean json, boolean
if (targetTable != null && !targetTable.equals(tableId.name())) {
continue;
}
sb.append("---\n");
sb.append("kind: Table\n");
sb.append("metadata:\n");
sb.append("\tid: " + tableId + "\n");
Table table = catalog.loadTable(tableId);
sb.append("data:\n");
sb.append("\tschema_raw: |-\n" + prefixEachLine(table.schema().toString(), "\t\t") + "\n");
sb.append(
"\tpartition_spec_raw: |-\n" + prefixEachLine(table.spec().toString(), "\t\t") + "\n");
sb.append(
"\tsort_order_raw: |-\n" + prefixEachLine(table.sortOrder().toString(), "\t\t") + "\n");
sb.append("\tproperties: \n");
for (var property : table.properties().entrySet()) {
var v = property.getValue();
if (v.contains("\n")) {
sb.append("\t\t" + property.getKey() + ": |-\n" + prefixEachLine(v, "\t\t\t") + "\n");
} else {
sb.append("\t\t" + property.getKey() + ": \"" + v + "\"\n");
}
}
sb.append("\tlocation: " + table.location() + "\n");
sb.append("\tcurrent_snapshot: \n");
Snapshot snapshot = table.currentSnapshot();

SnapshotInfo snapshotInfo = null;
if (snapshot != null) {
sb.append("\t\tsequence_number: " + snapshot.sequenceNumber() + "\n");
sb.append("\t\tid: " + snapshot.snapshotId() + "\n");
sb.append("\t\tparent_id: " + snapshot.parentId() + "\n");
sb.append("\t\ttimestamp: " + snapshot.timestampMillis() + "\n");
sb.append(
"\t\ttimestamp_iso: \""
+ Instant.ofEpochMilli(snapshot.timestampMillis()).toString()
+ "\"\n");
sb.append(
"\t\ttimestamp_iso_local: \""
+ Instant.ofEpochMilli(snapshot.timestampMillis())
snapshotInfo =
new SnapshotInfo(
snapshot.sequenceNumber(),
snapshot.snapshotId(),
snapshot.parentId(),
snapshot.timestampMillis(),
Instant.ofEpochMilli(snapshot.timestampMillis()).toString(),
Instant.ofEpochMilli(snapshot.timestampMillis())
.atZone(ZoneId.systemDefault())
.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME)
+ "\"\n");
sb.append("\t\toperation: " + snapshot.operation() + "\n");
sb.append("\t\tsummary:\n");
for (var property : snapshot.summary().entrySet()) {
sb.append("\t\t\t" + property.getKey() + ": \"" + property.getValue() + "\"\n");
}
sb.append("\t\tlocation: " + snapshot.manifestListLocation() + "\n");
.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME),
snapshot.operation(),
snapshot.summary(),
snapshot.manifestListLocation());
}

if (includeMetrics) {
printTableMetrics(table, sb);
}
List<MetricsInfo> metrics = getTableMetrics(table);

TableData tableData =
new TableData(
table.schema().toString(),
table.spec().toString(),
table.sortOrder().toString(),
table.properties(),
table.location(),
snapshotInfo,
metrics);

tablesMetadata.add(new TableMetadata("Table", new Metadata(tableId.toString()), tableData));
}
}
String r = sb.toString().replace("\t", " ");
if (json) {
r = convertYamlToJson(r);
}
System.out.println(r);

ObjectMapper mapper = json ? new ObjectMapper() : new ObjectMapper(new YAMLFactory());
String output = mapper.writeValueAsString(tablesMetadata);
System.out.println(output);
}

private static void printTableMetrics(Table table, StringBuilder buffer) throws IOException {
private static List<MetricsInfo> getTableMetrics(Table table) throws IOException {
List<MetricsInfo> metricsList = new ArrayList<>();
TableScan scan = table.newScan().includeColumnStats();
CloseableIterable<FileScanTask> tasks = scan.planFiles();

for (FileScanTask task : tasks) {
DataFile dataFile = task.file();
buffer.append("\tmetrics:\n");
buffer.append("\t\tfile: " + dataFile.path() + "\n");
buffer.append("\t\trecord_count: " + dataFile.recordCount() + "\n");
List<ColumnMetrics> columnMetrics = new ArrayList<>();

Map<Integer, Long> valueCounts = dataFile.valueCounts();
Map<Integer, Long> nullCounts = dataFile.nullValueCounts();
Expand All @@ -131,52 +110,36 @@ private static void printTableMetrics(Table table, StringBuilder buffer) throws
continue;
}

buffer.append("\t\tcolumns:\n");
for (Types.NestedField field : table.schema().columns()) {
int id = field.fieldId();
buffer.append("\t\t\t" + field.name() + ":\n");
if (valueCounts != null) {
buffer.append("\t\t\t\tvalue_count: " + valueCounts.get(id) + "\n");
}
if (nullCounts != null) {
buffer.append("\t\t\t\tnull_count: " + nullCounts.get(id) + "\n");
}
String lowerBound = null;
String upperBound = null;

if (lowerBounds != null) {
ByteBuffer lower = lowerBounds.get(id);
String lowerStr =
lower != null ? Conversions.fromByteBuffer(field.type(), lower).toString() : "null";
buffer.append("\t\t\t\tlower_bound: " + lowerStr + "\n");
lowerBound =
lower != null ? Conversions.fromByteBuffer(field.type(), lower).toString() : null;
}
if (upperBounds != null) {
ByteBuffer upper = upperBounds.get(id);
String upperStr =
upper != null ? Conversions.fromByteBuffer(field.type(), upper).toString() : "null";
buffer.append("\t\t\t\tupper_bound: " + upperStr + "\n");
upperBound =
upper != null ? Conversions.fromByteBuffer(field.type(), upper).toString() : null;
}
}
}

tasks.close();
}

private static String convertYamlToJson(String yaml) throws IOException {
YAMLFactory yamlFactory = new YAMLFactory();
ObjectMapper yamlReader = new ObjectMapper(yamlFactory);
ObjectMapper jsonWriter = new ObjectMapper();
StringBuilder result = new StringBuilder();
try (JsonParser parser = yamlFactory.createParser(yaml)) {
while (!parser.isClosed()) {
JsonNode node = yamlReader.readTree(parser);
if (node != null) {
String json = jsonWriter.writeValueAsString(node);
result.append(json).append("\n");
}
columnMetrics.add(
new ColumnMetrics(
field.name(),
valueCounts != null ? valueCounts.get(id) : null,
nullCounts != null ? nullCounts.get(id) : null,
lowerBound,
upperBound));
}

metricsList.add(
new MetricsInfo(dataFile.path().toString(), dataFile.recordCount(), columnMetrics));
}
return result.toString().trim();
}

private static String prefixEachLine(String v, String prefix) {
return v.lines().map(line -> prefix + line).collect(Collectors.joining("\n"));
tasks.close();
return metricsList;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package com.altinity.ice.internal.model;

import com.fasterxml.jackson.annotation.JsonInclude;
import java.util.List;
import java.util.Map;

@JsonInclude(JsonInclude.Include.NON_NULL)
public record TableMetadata(String kind, Metadata metadata, TableData data) {
public TableMetadata {
if (kind == null) {
kind = "Table";
}
}

public record Metadata(String id) {}

public record TableData(
String schema_raw,
String partition_spec_raw,
String sort_order_raw,
Map<String, String> properties,
String location,
SnapshotInfo current_snapshot,
List<MetricsInfo> metrics) {}

public record SnapshotInfo(
long sequence_number,
long id,
Long parent_id,
long timestamp,
String timestamp_iso,
String timestamp_iso_local,
String operation,
Map<String, String> summary,
String location) {}

public record MetricsInfo(String file, long record_count, List<ColumnMetrics> columns) {}

public record ColumnMetrics(
String name, Long value_count, Long null_count, String lower_bound, String upper_bound) {}
}