Skip to content

Commit 3f7c0d0

Browse files
committed
fix: cleanups
1 parent ff500a3 commit 3f7c0d0

File tree

2 files changed

+64
-199
lines changed

2 files changed

+64
-199
lines changed

src/adbc_catalog.cpp

Lines changed: 10 additions & 139 deletions
Original file line numberDiff line numberDiff line change
@@ -866,140 +866,8 @@ struct AdbcSchemaGlobalState : public GlobalTableFunctionState {
866866
}
867867
};
868868

869-
// Helper to convert Arrow format string to human-readable type name
870-
static string ArrowFormatToTypeName(const char *format) {
871-
if (!format) return "unknown";
872-
873-
// Handle basic types - see Arrow C Data Interface spec
874-
switch (format[0]) {
875-
case 'n': return "null";
876-
case 'b': return "boolean";
877-
case 'c': return "int8";
878-
case 'C': return "uint8";
879-
case 's': return "int16";
880-
case 'S': return "uint16";
881-
case 'i': return "int32";
882-
case 'I': return "uint32";
883-
case 'l': return "int64";
884-
case 'L': return "uint64";
885-
case 'e': return "float16";
886-
case 'f': return "float32";
887-
case 'g': return "float64";
888-
case 'z': return "binary";
889-
case 'Z': return "large_binary";
890-
case 'u': return "utf8";
891-
case 'U': return "large_utf8";
892-
case 'd': {
893-
// Decimal: d:precision,scale or d:precision,scale,bitwidth
894-
return "decimal" + string(format + 1);
895-
}
896-
case 'w': {
897-
// Fixed-width binary: w:bytewidth
898-
return "fixed_binary" + string(format + 1);
899-
}
900-
case 't': {
901-
// Temporal types
902-
if (strlen(format) < 2) return "temporal";
903-
switch (format[1]) {
904-
case 'd': {
905-
// Date: tdD (days) or tdm (milliseconds)
906-
if (strlen(format) >= 3 && format[2] == 'D') return "date32";
907-
if (strlen(format) >= 3 && format[2] == 'm') return "date64";
908-
return "date";
909-
}
910-
case 't': {
911-
// Time: tt[smun] (seconds/millis/micros/nanos)
912-
if (strlen(format) >= 3) {
913-
switch (format[2]) {
914-
case 's': return "time32[s]";
915-
case 'm': return "time32[ms]";
916-
case 'u': return "time64[us]";
917-
case 'n': return "time64[ns]";
918-
}
919-
}
920-
return "time";
921-
}
922-
case 's': {
923-
// Timestamp: ts[smun]:timezone
924-
string result = "timestamp";
925-
if (strlen(format) >= 3) {
926-
switch (format[2]) {
927-
case 's': result += "[s]"; break;
928-
case 'm': result += "[ms]"; break;
929-
case 'u': result += "[us]"; break;
930-
case 'n': result += "[ns]"; break;
931-
}
932-
}
933-
// Include timezone if present
934-
const char *tz = strchr(format, ':');
935-
if (tz && strlen(tz) > 1) {
936-
result += " tz=" + string(tz + 1);
937-
}
938-
return result;
939-
}
940-
case 'D': {
941-
// Duration: tD[smun]
942-
if (strlen(format) >= 3) {
943-
switch (format[2]) {
944-
case 's': return "duration[s]";
945-
case 'm': return "duration[ms]";
946-
case 'u': return "duration[us]";
947-
case 'n': return "duration[ns]";
948-
}
949-
}
950-
return "duration";
951-
}
952-
case 'i': {
953-
// Interval: tiM (months), tiD (days/time), tin (month/day/nano)
954-
if (strlen(format) >= 3) {
955-
switch (format[2]) {
956-
case 'M': return "interval[months]";
957-
case 'D': return "interval[days]";
958-
case 'n': return "interval[month_day_nano]";
959-
}
960-
}
961-
return "interval";
962-
}
963-
}
964-
return "temporal";
965-
}
966-
case '+': {
967-
// Nested types
968-
if (strlen(format) < 2) return "nested";
969-
switch (format[1]) {
970-
case 'l': return "list";
971-
case 'L': return "large_list";
972-
case 'w': return "fixed_list" + string(format + 2);
973-
case 's': return "struct";
974-
case 'm': return "map";
975-
case 'u': {
976-
// Union: +ud:type_ids or +us:type_ids
977-
if (strlen(format) >= 3) {
978-
if (format[2] == 'd') return "dense_union";
979-
if (format[2] == 's') return "sparse_union";
980-
}
981-
return "union";
982-
}
983-
case 'r': return "run_end_encoded";
984-
case 'v': {
985-
// List view types
986-
if (strlen(format) >= 3) {
987-
if (format[2] == 'l') return "list_view";
988-
if (format[2] == 'L') return "large_list_view";
989-
}
990-
return "list_view";
991-
}
992-
}
993-
return "nested";
994-
}
995-
default:
996-
// Return format string directly for unknown types
997-
return string(format);
998-
}
999-
}
1000-
1001-
// Helper to extract fields from an ArrowSchema
1002-
static void ExtractSchemaFields(ArrowSchema *schema, vector<SchemaFieldRow> &field_rows) {
869+
// Helper to extract fields from an ArrowSchema using DuckDB's built-in type conversion
870+
static void ExtractSchemaFields(DBConfig &config, ArrowSchema *schema, vector<SchemaFieldRow> &field_rows) {
1003871
if (!schema) return;
1004872

1005873
for (int64_t i = 0; i < schema->n_children; i++) {
@@ -1008,9 +876,12 @@ static void ExtractSchemaFields(ArrowSchema *schema, vector<SchemaFieldRow> &fie
1008876

1009877
SchemaFieldRow row;
1010878
row.field_name = child->name ? child->name : "";
1011-
row.field_type = ArrowFormatToTypeName(child->format);
1012-
// In Arrow C Data Interface, nullable is indicated by absence of ARROW_FLAG_NULLABLE bit NOT being set
1013-
// flags & 2 means nullable (ARROW_FLAG_NULLABLE = 2)
879+
880+
// Use DuckDB's built-in Arrow type conversion
881+
auto arrow_type = duckdb::ArrowType::GetArrowLogicalType(config, *child);
882+
row.field_type = arrow_type->GetDuckType().ToString();
883+
884+
// In Arrow C Data Interface, nullable is indicated by ARROW_FLAG_NULLABLE bit (flags & 2)
1014885
row.nullable = (child->flags & 2) != 0;
1015886
field_rows.push_back(row);
1016887
}
@@ -1072,8 +943,8 @@ static unique_ptr<GlobalTableFunctionState> AdbcSchemaInitGlobal(ClientContext &
1072943
throw IOException("adbc_schema: Failed to get table schema: " + string(e.what()));
1073944
}
1074945

1075-
// Extract fields from the schema
1076-
ExtractSchemaFields(&schema, global_state->field_rows);
946+
// Extract fields from the schema using DuckDB's type conversion
947+
ExtractSchemaFields(DBConfig::GetConfig(context), &schema, global_state->field_rows);
1077948

1078949
// Release the schema
1079950
if (schema.release) {

0 commit comments

Comments
 (0)