Skip to content

Commit e86213c

Browse files
Decouple decimal conversion from arrow metadata (#856)
* Decouple decimal conversion from arrow metadata * test * update changelog * add more to columninfo --------- Co-authored-by: Samikshya Chand <[email protected]>
1 parent 6de1d93 commit e86213c

File tree

13 files changed

+228
-196
lines changed

13 files changed

+228
-196
lines changed

NEXT_CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,6 @@
1313

1414
### Fixed
1515
- Updated JDBC URL regex to accept valid connection strings that were incorrectly rejected.
16-
16+
- Updated decimal conversion logic to fix numeric values missing decimal precision.
1717
---
1818
*Note: When making changes, please add your change under the appropriate section with a brief description.*

src/main/java/com/databricks/jdbc/api/impl/DatabricksResultSetMetaData.java

Lines changed: 18 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44
import static com.databricks.jdbc.common.DatabricksJdbcConstants.VOLUME_OPERATION_STATUS_COLUMN_NAME;
55
import static com.databricks.jdbc.common.MetadataResultConstants.LARGE_DISPLAY_COLUMNS;
66
import static com.databricks.jdbc.common.MetadataResultConstants.REMARKS_COLUMN;
7-
import static com.databricks.jdbc.common.util.DatabricksThriftUtil.getTypeFromTypeDesc;
8-
import static com.databricks.jdbc.common.util.DatabricksThriftUtil.getTypeTextFromTypeDesc;
7+
import static com.databricks.jdbc.common.util.DatabricksThriftUtil.*;
98
import static com.databricks.jdbc.common.util.DatabricksTypeUtil.TIMESTAMP;
109
import static com.databricks.jdbc.common.util.DatabricksTypeUtil.TIMESTAMP_NTZ;
1110
import static com.databricks.jdbc.common.util.DatabricksTypeUtil.VARIANT;
@@ -179,38 +178,37 @@ public DatabricksResultSetMetaData(
179178
for (int columnIndex = 0;
180179
columnIndex < resultManifest.getSchema().getColumnsSize();
181180
columnIndex++) {
182-
TColumnDesc columnInfo = resultManifest.getSchema().getColumns().get(columnIndex);
183-
ColumnInfoTypeName columnTypeName = getTypeFromTypeDesc(columnInfo.getTypeDesc());
184-
int columnType = DatabricksTypeUtil.getColumnType(columnTypeName);
185-
int[] precisionAndScale = getPrecisionAndScale(columnInfo, columnType);
181+
TColumnDesc columnDesc = resultManifest.getSchema().getColumns().get(columnIndex);
182+
ColumnInfo columnInfo = getColumnInfoFromTColumnDesc(columnDesc);
183+
int[] precisionAndScale = getPrecisionAndScale(columnInfo);
186184
int precision = precisionAndScale[0];
187185
int scale = precisionAndScale[1];
188186

189187
ImmutableDatabricksColumn.Builder columnBuilder = getColumnBuilder();
190188
columnBuilder
191-
.columnName(columnInfo.getColumnName())
192-
.columnTypeClassName(DatabricksTypeUtil.getColumnTypeClassName(columnTypeName))
193-
.columnType(columnType)
194-
.columnTypeText(
195-
getTypeTextFromTypeDesc(
196-
columnInfo
197-
.getTypeDesc())) // columnInfoTypeName does not have BIGINT, SMALLINT.
198-
// Extracting from thriftType in typeDesc
189+
.columnName(columnInfo.getName())
190+
.columnTypeClassName(
191+
DatabricksTypeUtil.getColumnTypeClassName(columnInfo.getTypeName()))
192+
.columnType(DatabricksTypeUtil.getColumnType(columnInfo.getTypeName()))
193+
.columnTypeText(getTypeTextFromTypeDesc(columnDesc.getTypeDesc()))
194+
// columnInfoTypeName does not have BIGINT, SMALLINT. Extracting from thriftType in
195+
// typeDesc
199196
.typePrecision(precision)
200197
.typeScale(scale)
201-
.displaySize(DatabricksTypeUtil.getDisplaySize(columnTypeName, precision, scale))
198+
.displaySize(
199+
DatabricksTypeUtil.getDisplaySize(columnInfo.getTypeName(), precision, scale))
202200
.isSearchable(true)
203201
.schemaName(null)
204202
.tableName(null)
205-
.isSigned(DatabricksTypeUtil.isSigned(columnTypeName));
203+
.isSigned(DatabricksTypeUtil.isSigned(columnInfo.getTypeName()));
206204
if (isVariantColumn(arrowMetadata, columnIndex)) {
207205
columnBuilder
208206
.columnTypeClassName("java.lang.String")
209207
.columnType(Types.OTHER)
210208
.columnTypeText(VARIANT);
211209
}
212210
columnsBuilder.add(columnBuilder.build());
213-
columnNameToIndexMap.putIfAbsent(columnInfo.getColumnName(), ++currIndex);
211+
columnNameToIndexMap.putIfAbsent(columnInfo.getName(), ++currIndex);
214212
}
215213
}
216214
}
@@ -628,20 +626,9 @@ public int[] getPrecisionAndScale(ColumnInfo columnInfo, int columnType) {
628626
return result;
629627
}
630628

631-
public int[] getPrecisionAndScale(TColumnDesc columnInfo, int columnType) {
632-
int[] result = getBasePrecisionAndScale(columnType, ctx);
633-
if (columnInfo.getTypeDesc() != null && columnInfo.getTypeDesc().getTypesSize() > 0) {
634-
TTypeEntry tTypeEntry = columnInfo.getTypeDesc().getTypes().get(0);
635-
if (tTypeEntry.isSetPrimitiveEntry()
636-
&& tTypeEntry.getPrimitiveEntry().isSetTypeQualifiers()
637-
&& tTypeEntry.getPrimitiveEntry().getTypeQualifiers().isSetQualifiers()) {
638-
Map<String, TTypeQualifierValue> qualifiers =
639-
tTypeEntry.getPrimitiveEntry().getTypeQualifiers().getQualifiers();
640-
result[0] = qualifiers.get("precision").getI32Value(); // precision
641-
result[1] = qualifiers.get("scale").getI32Value(); // scale
642-
}
643-
}
644-
return result;
629+
public int[] getPrecisionAndScale(ColumnInfo columnInfo) {
630+
return getPrecisionAndScale(
631+
columnInfo, DatabricksTypeUtil.getColumnType(columnInfo.getTypeName()));
645632
}
646633

647634
private boolean isLargeColumn(String columnName) {

src/main/java/com/databricks/jdbc/api/impl/arrow/ArrowResultChunk.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import com.databricks.jdbc.model.core.ExternalLink;
1919
import com.databricks.jdbc.model.telemetry.enums.DatabricksDriverErrorCode;
2020
import com.databricks.sdk.service.sql.BaseChunkInfo;
21+
import com.databricks.sdk.service.sql.ColumnInfo;
2122
import com.databricks.sdk.service.sql.ColumnInfoTypeName;
2223
import com.google.common.annotations.VisibleForTesting;
2324
import java.io.IOException;
@@ -195,12 +196,15 @@ boolean hasNextRow() {
195196

196197
/** Returns object in the current row at the specified columnIndex. */
197198
Object getColumnObjectAtCurrentRow(
198-
int columnIndex, ColumnInfoTypeName requiredType, String arrowMetadata)
199+
int columnIndex,
200+
ColumnInfoTypeName requiredType,
201+
String arrowMetadata,
202+
ColumnInfo columnInfo)
199203
throws DatabricksSQLException {
200204
ValueVector columnVector =
201205
this.resultChunk.getColumnVector(this.recordBatchCursorInChunk, columnIndex);
202206
return ArrowToJavaObjectConverter.convert(
203-
columnVector, this.rowCursorInRecordBatch, requiredType, arrowMetadata);
207+
columnVector, this.rowCursorInRecordBatch, requiredType, arrowMetadata, columnInfo);
204208
}
205209

206210
String getType(int columnIndex) {

src/main/java/com/databricks/jdbc/api/impl/arrow/ArrowStreamResult.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
package com.databricks.jdbc.api.impl.arrow;
22

3-
import static com.databricks.jdbc.common.util.DatabricksThriftUtil.getTypeFromTypeDesc;
3+
import static com.databricks.jdbc.common.util.DatabricksThriftUtil.getColumnInfoFromTColumnDesc;
44

55
import com.databricks.jdbc.api.impl.ComplexDataTypeParser;
66
import com.databricks.jdbc.api.impl.IExecutionResult;
@@ -142,12 +142,13 @@ public Object getObject(int columnIndex) throws DatabricksSQLException {
142142

143143
Object result =
144144
chunkIterator.getColumnObjectAtCurrentRow(
145-
columnIndex, ColumnInfoTypeName.STRING, "STRING");
145+
columnIndex, ColumnInfoTypeName.STRING, "STRING", columnInfos.get(columnIndex));
146146
ComplexDataTypeParser parser = new ComplexDataTypeParser();
147147
return parser.formatComplexTypeString(result.toString(), requiredType.name(), arrowMetadata);
148148
}
149149

150-
return chunkIterator.getColumnObjectAtCurrentRow(columnIndex, requiredType, arrowMetadata);
150+
return chunkIterator.getColumnObjectAtCurrentRow(
151+
columnIndex, requiredType, arrowMetadata, columnInfos.get(columnIndex));
151152
}
152153

153154
/**
@@ -224,8 +225,8 @@ private void setColumnInfo(TGetResultSetMetadataResp resultManifest) {
224225
if (resultManifest.getSchema() == null) {
225226
return;
226227
}
227-
for (TColumnDesc columnInfo : resultManifest.getSchema().getColumns()) {
228-
columnInfos.add(new ColumnInfo().setTypeName(getTypeFromTypeDesc(columnInfo.getTypeDesc())));
228+
for (TColumnDesc tColumnDesc : resultManifest.getSchema().getColumns()) {
229+
columnInfos.add(getColumnInfoFromTColumnDesc(tColumnDesc));
229230
}
230231
}
231232
}

src/main/java/com/databricks/jdbc/api/impl/arrow/InlineChunkProvider.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,8 +199,8 @@ private static Schema hiveSchemaToArrowSchema(TTableSchema hiveSchema)
199199
}
200200

201201
private static Field getArrowField(TColumnDesc columnDesc) throws SQLException {
202-
TTypeId thriftType = getThriftTypeFromTypeDesc(columnDesc.getTypeDesc());
203-
ArrowType arrowType = mapThriftToArrowType(thriftType);
202+
TPrimitiveTypeEntry primitiveTypeEntry = getTPrimitiveTypeOrDefault(columnDesc.getTypeDesc());
203+
ArrowType arrowType = mapThriftToArrowType(primitiveTypeEntry.getType());
204204
FieldType fieldType = new FieldType(true, arrowType, null);
205205
return new Field(columnDesc.getColumnName(), fieldType, null);
206206
}

src/main/java/com/databricks/jdbc/api/impl/converters/ArrowToJavaObjectConverter.java

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import com.databricks.jdbc.exception.DatabricksValidationException;
1313
import com.databricks.jdbc.log.JdbcLogger;
1414
import com.databricks.jdbc.log.JdbcLoggerFactory;
15+
import com.databricks.sdk.service.sql.ColumnInfo;
1516
import com.databricks.sdk.service.sql.ColumnInfoTypeName;
1617
import java.math.BigDecimal;
1718
import java.math.RoundingMode;
@@ -61,7 +62,8 @@ public static Object convert(
6162
ValueVector columnVector,
6263
int vectorIndex,
6364
ColumnInfoTypeName requiredType,
64-
String arrowMetadata)
65+
String arrowMetadata,
66+
ColumnInfo columnInfo)
6567
throws DatabricksSQLException {
6668
// check isNull before getting the object from the vector
6769
if (columnVector.isNull(vectorIndex)) {
@@ -102,7 +104,7 @@ public static Object convert(
102104
case DOUBLE:
103105
return convertToNumber(object, Double::parseDouble, Number::doubleValue);
104106
case DECIMAL:
105-
return convertToDecimal(object, arrowMetadata);
107+
return convertToDecimal(object, columnInfo);
106108
case BINARY:
107109
return convertToByteArray(object);
108110
case BOOLEAN:
@@ -262,25 +264,17 @@ private static byte[] convertToByteArray(Object object) {
262264
return (byte[]) object;
263265
}
264266

265-
static BigDecimal convertToDecimal(Object object, String arrowMetadata)
267+
static BigDecimal convertToDecimal(Object object, ColumnInfo columnInfo)
266268
throws DatabricksValidationException {
267-
if (object instanceof Text) {
268-
return new BigDecimal(object.toString());
269-
}
270-
int scale;
271-
try {
272-
scale =
273-
Integer.parseInt(
274-
arrowMetadata
275-
.substring(arrowMetadata.indexOf(',') + 1, arrowMetadata.indexOf(')'))
276-
.trim());
277-
} catch (Exception e) {
278-
LOGGER.error(
279-
e, "Failed to parse scale from arrow metadata: {}. Defaulting to scale 0", arrowMetadata);
280-
scale = 0;
281-
}
282-
if (object instanceof Number) {
283-
return new BigDecimal(object.toString()).setScale(scale, RoundingMode.HALF_UP);
269+
if (object instanceof Text || object instanceof Number) {
270+
BigDecimal bigDecimal = new BigDecimal(object.toString());
271+
Optional<Integer> bigDecimalScale =
272+
columnInfo.getTypeScale() != null
273+
? Optional.of(columnInfo.getTypeScale().intValue())
274+
: Optional.empty();
275+
return bigDecimalScale
276+
.map(scale -> bigDecimal.setScale(scale, RoundingMode.HALF_UP))
277+
.orElse(bigDecimal);
284278
}
285279
String errorMessage =
286280
String.format("Unsupported object type for decimal conversion: %s", object.getClass());

src/main/java/com/databricks/jdbc/common/util/DatabricksThriftUtil.java

Lines changed: 50 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import static com.databricks.jdbc.common.EnvironmentVariables.DEFAULT_RESULT_ROW_LIMIT;
44
import static com.databricks.jdbc.common.util.DatabricksTypeUtil.*;
5+
import static com.databricks.jdbc.model.client.thrift.generated.TTypeId.*;
56

67
import com.databricks.jdbc.api.internal.IDatabricksSession;
78
import com.databricks.jdbc.api.internal.IDatabricksStatementInternal;
@@ -14,13 +15,37 @@
1415
import com.databricks.jdbc.model.core.ExternalLink;
1516
import com.databricks.jdbc.model.core.StatementStatus;
1617
import com.databricks.jdbc.model.telemetry.enums.DatabricksDriverErrorCode;
18+
import com.databricks.sdk.service.sql.ColumnInfo;
1719
import com.databricks.sdk.service.sql.ColumnInfoTypeName;
1820
import com.databricks.sdk.service.sql.StatementState;
1921
import java.nio.ByteBuffer;
2022
import java.util.*;
2123

2224
public class DatabricksThriftUtil {
2325

26+
private static final Map<TTypeId, ColumnInfoTypeName> T_TYPE_ID_COLUMN_INFO_TYPE_NAME_MAP =
27+
Map.ofEntries(
28+
Map.entry(BOOLEAN_TYPE, ColumnInfoTypeName.BOOLEAN),
29+
Map.entry(TINYINT_TYPE, ColumnInfoTypeName.BYTE),
30+
Map.entry(SMALLINT_TYPE, ColumnInfoTypeName.SHORT),
31+
Map.entry(INT_TYPE, ColumnInfoTypeName.INT),
32+
Map.entry(BIGINT_TYPE, ColumnInfoTypeName.LONG),
33+
Map.entry(FLOAT_TYPE, ColumnInfoTypeName.FLOAT),
34+
Map.entry(DOUBLE_TYPE, ColumnInfoTypeName.DOUBLE),
35+
Map.entry(STRING_TYPE, ColumnInfoTypeName.STRING),
36+
Map.entry(VARCHAR_TYPE, ColumnInfoTypeName.STRING),
37+
Map.entry(TIMESTAMP_TYPE, ColumnInfoTypeName.TIMESTAMP),
38+
Map.entry(BINARY_TYPE, ColumnInfoTypeName.BINARY),
39+
Map.entry(DECIMAL_TYPE, ColumnInfoTypeName.DECIMAL),
40+
Map.entry(DATE_TYPE, ColumnInfoTypeName.DATE),
41+
Map.entry(CHAR_TYPE, ColumnInfoTypeName.CHAR),
42+
Map.entry(INTERVAL_YEAR_MONTH_TYPE, ColumnInfoTypeName.INTERVAL),
43+
Map.entry(INTERVAL_DAY_TIME_TYPE, ColumnInfoTypeName.INTERVAL),
44+
Map.entry(ARRAY_TYPE, ColumnInfoTypeName.ARRAY),
45+
Map.entry(MAP_TYPE, ColumnInfoTypeName.MAP),
46+
Map.entry(NULL_TYPE, ColumnInfoTypeName.STRING),
47+
Map.entry(STRUCT_TYPE, ColumnInfoTypeName.STRUCT));
48+
2449
private static final JdbcLogger LOGGER = JdbcLoggerFactory.getLogger(DatabricksThriftUtil.class);
2550
private static final List<TStatusCode> SUCCESS_STATUS_LIST =
2651
List.of(TStatusCode.SUCCESS_STATUS, TStatusCode.SUCCESS_WITH_INFO_STATUS);
@@ -148,49 +173,34 @@ public static StatementStatus getAsyncStatus(TStatus status) {
148173
}
149174

150175
public static String getTypeTextFromTypeDesc(TTypeDesc typeDesc) {
151-
TTypeId type = getThriftTypeFromTypeDesc(typeDesc);
152-
return type.name().replace("_TYPE", "");
176+
TPrimitiveTypeEntry primitiveTypeEntry = getTPrimitiveTypeOrDefault(typeDesc);
177+
return primitiveTypeEntry.getType().name().replace("_TYPE", "");
153178
}
154179

155-
public static ColumnInfoTypeName getTypeFromTypeDesc(TTypeDesc typeDesc) {
156-
TTypeId type = getThriftTypeFromTypeDesc(typeDesc);
157-
switch (type) {
158-
case BOOLEAN_TYPE:
159-
return ColumnInfoTypeName.BOOLEAN;
160-
case TINYINT_TYPE:
161-
return ColumnInfoTypeName.BYTE;
162-
case SMALLINT_TYPE:
163-
return ColumnInfoTypeName.SHORT;
164-
case INT_TYPE:
165-
return ColumnInfoTypeName.INT;
166-
case BIGINT_TYPE:
167-
return ColumnInfoTypeName.LONG;
168-
case FLOAT_TYPE:
169-
return ColumnInfoTypeName.FLOAT;
170-
case DOUBLE_TYPE:
171-
return ColumnInfoTypeName.DOUBLE;
172-
case TIMESTAMP_TYPE:
173-
return ColumnInfoTypeName.TIMESTAMP;
174-
case BINARY_TYPE:
175-
return ColumnInfoTypeName.BINARY;
176-
case DECIMAL_TYPE:
177-
return ColumnInfoTypeName.DECIMAL;
178-
case DATE_TYPE:
179-
return ColumnInfoTypeName.DATE;
180-
case CHAR_TYPE:
181-
return ColumnInfoTypeName.CHAR;
182-
case INTERVAL_YEAR_MONTH_TYPE:
183-
case INTERVAL_DAY_TIME_TYPE:
184-
return ColumnInfoTypeName.INTERVAL;
185-
case ARRAY_TYPE:
186-
return ColumnInfoTypeName.ARRAY;
187-
case MAP_TYPE:
188-
return ColumnInfoTypeName.MAP;
189-
case STRUCT_TYPE:
190-
return ColumnInfoTypeName.STRUCT;
191-
default:
192-
return ColumnInfoTypeName.STRING;
180+
public static ColumnInfo getColumnInfoFromTColumnDesc(TColumnDesc columnDesc) {
181+
TPrimitiveTypeEntry primitiveTypeEntry = getTPrimitiveTypeOrDefault(columnDesc.getTypeDesc());
182+
ColumnInfoTypeName columnInfoTypeName =
183+
T_TYPE_ID_COLUMN_INFO_TYPE_NAME_MAP.get(primitiveTypeEntry.getType());
184+
ColumnInfo columnInfo =
185+
new ColumnInfo()
186+
.setName(columnDesc.getColumnName())
187+
.setPosition((long) columnDesc.getPosition())
188+
.setTypeName(columnInfoTypeName)
189+
.setTypeText(getTypeTextFromTypeDesc(columnDesc.getTypeDesc()));
190+
if (primitiveTypeEntry.isSetTypeQualifiers()) {
191+
TTypeQualifiers typeQualifiers = primitiveTypeEntry.getTypeQualifiers();
192+
String scaleQualifierKey = TCLIServiceConstants.SCALE,
193+
precisionQualifierKey = TCLIServiceConstants.PRECISION;
194+
if (typeQualifiers.getQualifiers().get(scaleQualifierKey) != null) {
195+
columnInfo.setTypeScale(
196+
(long) typeQualifiers.getQualifiers().get(scaleQualifierKey).getI32Value());
197+
}
198+
if (typeQualifiers.getQualifiers().get(precisionQualifierKey) != null) {
199+
columnInfo.setTypePrecision(
200+
(long) typeQualifiers.getQualifiers().get(precisionQualifierKey).getI32Value());
201+
}
193202
}
203+
return columnInfo;
194204
}
195205

196206
/**

src/main/java/com/databricks/jdbc/common/util/DatabricksTypeUtil.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -444,13 +444,13 @@ public static String inferDatabricksType(Object obj) {
444444
return type;
445445
}
446446

447-
public static TTypeId getThriftTypeFromTypeDesc(TTypeDesc typeDesc) {
447+
public static TPrimitiveTypeEntry getTPrimitiveTypeOrDefault(TTypeDesc typeDesc) {
448+
TPrimitiveTypeEntry defaultPrimitiveTypeEntry = new TPrimitiveTypeEntry(TTypeId.STRING_TYPE);
448449
return Optional.ofNullable(typeDesc)
449450
.map(TTypeDesc::getTypes)
450451
.map(t -> t.get(0))
451452
.map(TTypeEntry::getPrimitiveEntry)
452-
.map(TPrimitiveTypeEntry::getType)
453-
.orElse(TTypeId.STRING_TYPE);
453+
.orElse(defaultPrimitiveTypeEntry);
454454
}
455455

456456
public static ArrowType mapThriftToArrowType(TTypeId typeId) throws SQLException {

0 commit comments

Comments
 (0)