Skip to content

Commit 4a60975

Browse files
authored
feat(plugin-oracle): Implemented getTableStatistics for oracle connector (#26120)
## Description There is a separate PR enabling tests, once that is in - we can work on the tests. Following unsolved problems exist: 1. Histogram implementation. ## Motivation and Context Stats can improve the plans involving Oracle connector. ## Impact <!---Describe any public API or user-facing feature change or any performance impact--> ## Test Plan <!---Please fill in how you tested your change--> ## Contributor checklist - [ ] Please make sure your submission complies with our [contributing guide](https://github.com/prestodb/presto/blob/master/CONTRIBUTING.md), in particular [code style](https://github.com/prestodb/presto/blob/master/CONTRIBUTING.md#code-style) and [commit standards](https://github.com/prestodb/presto/blob/master/CONTRIBUTING.md#commit-standards). - [ ] PR description addresses the issue accurately and concisely. If the change is non-trivial, a GitHub Issue is referenced. - [ ] Documented new properties (with its default value), SQL syntax, functions, or other functionality. - [ ] If release notes are required, they follow the [release notes guidelines](https://github.com/prestodb/presto/wiki/Release-Notes-Guidelines). - [ ] Adequate tests were added if applicable. - [ ] CI passed. ``` == RELEASE NOTES == Oracle Connector changes * Add : Implementation to fetch table stats from source tables ```
1 parent d1000c8 commit 4a60975

File tree

1 file changed

+110
-0
lines changed

1 file changed

+110
-0
lines changed

presto-oracle/src/main/java/com/facebook/presto/plugin/oracle/OracleClient.java

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,26 +13,40 @@
1313
*/
1414
package com.facebook.presto.plugin.oracle;
1515

16+
import com.facebook.airlift.log.Logger;
17+
import com.facebook.presto.common.predicate.TupleDomain;
1618
import com.facebook.presto.common.type.Decimals;
1719
import com.facebook.presto.common.type.VarcharType;
1820
import com.facebook.presto.plugin.jdbc.BaseJdbcClient;
1921
import com.facebook.presto.plugin.jdbc.BaseJdbcConfig;
2022
import com.facebook.presto.plugin.jdbc.ConnectionFactory;
23+
import com.facebook.presto.plugin.jdbc.JdbcColumnHandle;
2124
import com.facebook.presto.plugin.jdbc.JdbcConnectorId;
2225
import com.facebook.presto.plugin.jdbc.JdbcIdentity;
26+
import com.facebook.presto.plugin.jdbc.JdbcTableHandle;
2327
import com.facebook.presto.plugin.jdbc.JdbcTypeHandle;
2428
import com.facebook.presto.plugin.jdbc.mapping.ReadMapping;
29+
import com.facebook.presto.spi.ColumnHandle;
2530
import com.facebook.presto.spi.ConnectorSession;
2631
import com.facebook.presto.spi.PrestoException;
2732
import com.facebook.presto.spi.SchemaTableName;
33+
import com.facebook.presto.spi.statistics.ColumnStatistics;
34+
import com.facebook.presto.spi.statistics.DoubleRange;
35+
import com.facebook.presto.spi.statistics.Estimate;
36+
import com.facebook.presto.spi.statistics.TableStatistics;
37+
import com.google.common.collect.Maps;
2838
import jakarta.inject.Inject;
2939

3040
import java.sql.Connection;
3141
import java.sql.DatabaseMetaData;
42+
import java.sql.Date;
3243
import java.sql.PreparedStatement;
3344
import java.sql.ResultSet;
3445
import java.sql.SQLException;
3546
import java.sql.Types;
47+
import java.util.HashMap;
48+
import java.util.List;
49+
import java.util.Map;
3650
import java.util.Optional;
3751

3852
import static com.facebook.presto.common.type.DecimalType.createDecimalType;
@@ -47,13 +61,15 @@
4761
import static com.facebook.presto.plugin.jdbc.mapping.StandardColumnMappings.varbinaryReadMapping;
4862
import static com.facebook.presto.plugin.jdbc.mapping.StandardColumnMappings.varcharReadMapping;
4963
import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED;
64+
import static java.lang.Double.NaN;
5065
import static java.lang.String.format;
5166
import static java.util.Locale.ENGLISH;
5267
import static java.util.Objects.requireNonNull;
5368

5469
public class OracleClient
5570
extends BaseJdbcClient
5671
{
72+
private static final Logger LOG = Logger.get(OracleClient.class);
5773
private static final int FETCH_SIZE = 1000;
5874

5975
private final boolean synonymsEnabled;
@@ -93,6 +109,7 @@ protected ResultSet getTables(Connection connection, Optional<String> schemaName
93109
escapeNamePattern(tableName, Optional.of(escape)).orElse(null),
94110
getTableTypes());
95111
}
112+
96113
@Override
97114
public PreparedStatement getPreparedStatement(ConnectorSession session, Connection connection, String sql)
98115
throws SQLException
@@ -137,6 +154,99 @@ protected void renameTable(JdbcIdentity identity, String catalogName, SchemaTabl
137154
}
138155
}
139156

157+
@Override
158+
public TableStatistics getTableStatistics(ConnectorSession session, JdbcTableHandle handle, List<JdbcColumnHandle> columnHandles, TupleDomain<ColumnHandle> tupleDomain)
159+
{
160+
try {
161+
requireNonNull(handle.getSchemaName(), "schema name is null");
162+
requireNonNull(handle.getTableName(), "table name is null");
163+
String sql = format(
164+
"SELECT NUM_ROWS, AVG_ROW_LEN, LAST_ANALYZED\n" +
165+
"FROM DBA_TAB_STATISTICS\n" +
166+
"WHERE OWNER='%s'\n" +
167+
"AND TABLE_NAME='%s'",
168+
handle.getSchemaName().toUpperCase(), handle.getTableName().toUpperCase());
169+
try (Connection connection = connectionFactory.openConnection(JdbcIdentity.from(session));
170+
PreparedStatement preparedStatement = getPreparedStatement(session, connection, sql);
171+
PreparedStatement preparedStatementCol = getPreparedStatement(session, connection, getColumnStaticsSql(handle));
172+
ResultSet resultSet = preparedStatement.executeQuery();
173+
ResultSet resultSetColumnStats = preparedStatementCol.executeQuery()) {
174+
if (!resultSet.next()) {
175+
LOG.debug("Stats not found for table : %s.%s", handle.getSchemaName(), handle.getTableName());
176+
return TableStatistics.empty();
177+
}
178+
double numRows = resultSet.getDouble("NUM_ROWS");
179+
// double avgRowLen = resultSet.getDouble("AVG_ROW_LEN");
180+
Date lastAnalyzed = resultSet.getDate("LAST_ANALYZED");
181+
182+
Map<ColumnHandle, ColumnStatistics> columnStatisticsMap = new HashMap<>();
183+
Map<String, JdbcColumnHandle> columnHandleMap = Maps.uniqueIndex(columnHandles, JdbcColumnHandle::getColumnName);
184+
while (resultSetColumnStats.next() && numRows > 0) {
185+
String columnName = resultSetColumnStats.getString("COLUMN_NAME");
186+
double nullsCount = resultSetColumnStats.getDouble("NUM_NULLS");
187+
double ndv = resultSetColumnStats.getDouble("NUM_DISTINCT");
188+
// Oracle stores low and high values as RAW(1000) i.e. a byte array. No way to unwrap it, without a clue about the underlying type
189+
// So we use column type as a clue and parse to double by converting as string first.
190+
double lowValue = toDouble(resultSetColumnStats.getString("LOW_VALUE"));
191+
double highValue = toDouble(resultSetColumnStats.getString("HIGH_VALUE"));
192+
ColumnStatistics.Builder columnStatisticsBuilder = ColumnStatistics.builder()
193+
.setDataSize(Estimate.estimateFromDouble(resultSet.getDouble("DATA_LENGTH")))
194+
.setNullsFraction(Estimate.estimateFromDouble(nullsCount / numRows))
195+
.setDistinctValuesCount(Estimate.estimateFromDouble(ndv));
196+
ColumnStatistics columnStatistics = columnStatisticsBuilder.build();
197+
if (Double.isFinite(lowValue) && Double.isFinite(highValue)) {
198+
columnStatistics = columnStatisticsBuilder.setRange(new DoubleRange(lowValue, highValue)).build();
199+
}
200+
columnStatisticsMap.put(columnHandleMap.get(columnName), columnStatistics);
201+
}
202+
LOG.info("getTableStatics for table: %s.%s.%s with last analyzed: %s",
203+
handle.getCatalogName(), handle.getSchemaName(), handle.getTableName(), lastAnalyzed);
204+
return TableStatistics.builder()
205+
.setColumnStatistics(columnStatisticsMap)
206+
.setRowCount(Estimate.estimateFromDouble(numRows)).build();
207+
}
208+
}
209+
catch (SQLException | RuntimeException e) {
210+
throw new PrestoException(JDBC_ERROR, "Failed fetching statistics for table: " + handle, e);
211+
}
212+
}
213+
214+
private String getColumnStaticsSql(JdbcTableHandle handle)
215+
{
216+
// UTL_RAW.CAST_TO_BINARY_X does not render correctly so those types are not supported.
217+
return format(
218+
"SELECT COLUMN_NAME,\n" +
219+
"DATA_TYPE,\n" +
220+
"DATA_LENGTH,\n" +
221+
"NUM_NULLS,\n" +
222+
"NUM_DISTINCT,\n" +
223+
"DENSITY,\n" +
224+
"CASE DATA_TYPE\n" +
225+
" WHEN 'NUMBER' THEN TO_CHAR(UTL_RAW.CAST_TO_NUMBER(LOW_VALUE))\n" +
226+
" ELSE NULL\n" +
227+
"END AS LOW_VALUE,\n" +
228+
"CASE DATA_TYPE\n" +
229+
" WHEN 'NUMBER' THEN TO_CHAR(UTL_RAW.CAST_TO_NUMBER(HIGH_VALUE))\n" +
230+
" ELSE NULL\n" +
231+
"END AS HIGH_VALUE\n" +
232+
"FROM ALL_TAB_COLUMNS\n" +
233+
"WHERE OWNER = '%s'\n" +
234+
" AND TABLE_NAME = '%s'", handle.getSchemaName().toUpperCase(), handle.getTableName().toUpperCase());
235+
}
236+
237+
private double toDouble(String number)
238+
{
239+
try {
240+
return Double.parseDouble(number);
241+
}
242+
catch (Exception e) {
243+
// a string represented by number, may not even be a parseable number this is expected. e.g. if column type is
244+
// varchar.
245+
LOG.debug(e, "error while decoding : %s", number);
246+
}
247+
return NaN;
248+
}
249+
140250
@Override
141251
public Optional<ReadMapping> toPrestoType(ConnectorSession session, JdbcTypeHandle typeHandle)
142252
{

0 commit comments

Comments
 (0)