Skip to content

Commit 7a14430

Browse files
committed
extend redshift schema queries with external tables metadata
1 parent b27ab22 commit 7a14430

File tree

1 file changed

+117
-3
lines changed

1 file changed

+117
-3
lines changed

packages/cubejs-redshift-driver/src/RedshiftDriver.ts

Lines changed: 117 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,15 @@
44
* @fileoverview The `RedshiftDriver` and related types declaration.
55
*/
66

7-
import { getEnv } from '@cubejs-backend/shared';
7+
import { assertDataSource, getEnv } from '@cubejs-backend/shared';
88
import { PostgresDriver, PostgresDriverConfiguration } from '@cubejs-backend/postgres-driver';
99
import {
10+
DatabaseStructure,
1011
DownloadTableCSVData,
1112
DriverCapabilities,
13+
QueryColumnsResult,
14+
QuerySchemasResult,
15+
QueryTablesResult,
1216
StreamOptions,
1317
StreamTableDataWithTypes,
1418
UnloadOptions
@@ -40,10 +44,14 @@ export interface RedshiftDriverConfiguration extends PostgresDriverConfiguration
4044
exportBucket?: RedshiftDriverExportAWS;
4145
}
4246

47+
const IGNORED_SCHEMAS = ['pg_catalog', 'pg_internal', 'information_schema', 'mysql', 'performance_schema', 'sys', 'INFORMATION_SCHEMA'];
48+
4349
/**
4450
* Redshift driver class.
4551
*/
4652
export class RedshiftDriver extends PostgresDriver<RedshiftDriverConfiguration> {
53+
private readonly dbName: string;
54+
4755
/**
4856
* Returns default concurrency value.
4957
*/
@@ -55,7 +63,7 @@ export class RedshiftDriver extends PostgresDriver<RedshiftDriverConfiguration>
5563
* Class constructor.
5664
*/
5765
public constructor(
58-
options: RedshiftDriverConfiguration & {
66+
config: RedshiftDriverConfiguration & {
5967
/**
6068
* Data source name.
6169
*/
@@ -73,7 +81,15 @@ export class RedshiftDriver extends PostgresDriver<RedshiftDriverConfiguration>
7381
testConnectionTimeout?: number,
7482
} = {}
7583
) {
76-
super(options);
84+
super(config);
85+
86+
const dataSource =
87+
config.dataSource ||
88+
assertDataSource('default');
89+
90+
// We need a DB name for querying external tables.
91+
// It's not possible to get it later from the pool
92+
this.dbName = getEnv('dbName', { dataSource });
7793
}
7894

7995
protected primaryKeysQuery() {
@@ -84,6 +100,104 @@ export class RedshiftDriver extends PostgresDriver<RedshiftDriverConfiguration>
84100
return null;
85101
}
86102

103+
/**
104+
* @override
105+
*/
106+
protected informationSchemaQuery() {
107+
return `
108+
SELECT columns.column_name as ${this.quoteIdentifier('column_name')},
109+
columns.table_name as ${this.quoteIdentifier('table_name')},
110+
columns.table_schema as ${this.quoteIdentifier('table_schema')},
111+
columns.data_type as ${this.quoteIdentifier('data_type')}
112+
FROM information_schema.columns
113+
WHERE columns.table_schema NOT IN (${IGNORED_SCHEMAS.map(s => `'${s}'`).join(',')})
114+
`;
115+
}
116+
117+
/**
118+
* In Redshift external tables are not shown in regular Postgres information_schema,
119+
* so it needs to be queried separately.
120+
* @override
121+
*/
122+
public async tablesSchema(): Promise<DatabaseStructure> {
123+
const query = this.informationSchemaQuery();
124+
const tablesSchema = await this.query(query, []).then(data => data.reduce<DatabaseStructure>(this.informationColumnsSchemaReducer, {}));
125+
126+
const allSchemas = await this.getSchemas();
127+
const externalSchemas = allSchemas.filter(s => !tablesSchema[s.schema_name]).map(s => s.schema_name);
128+
129+
for (const externalSchema of externalSchemas) {
130+
tablesSchema[externalSchema] = {};
131+
const tablesRes = await this.tablesForExternalSchema(externalSchema);
132+
const tables = tablesRes.map(t => t.table_name);
133+
for (const tableName of tables) {
134+
const columnRes = await this.columnsForExternalTable(externalSchema, tableName);
135+
tablesSchema[externalSchema][tableName] = columnRes.map(def => ({
136+
name: def.column_name,
137+
type: def.data_type,
138+
attributes: []
139+
}));
140+
}
141+
}
142+
143+
return tablesSchema;
144+
}
145+
146+
// eslint-disable-next-line camelcase
147+
private async tablesForExternalSchema(schemaName: string): Promise<{ table_name: string }[]> {
148+
return this.query(`SHOW TABLES FROM SCHEMA ${this.dbName}.${schemaName}`, []);
149+
}
150+
151+
private async columnsForExternalTable(schemaName: string, tableName: string): Promise<QueryColumnsResult[]> {
152+
return this.query(`SHOW COLUMNS FROM TABLE ${this.dbName}.${schemaName}.${tableName}`, []);
153+
}
154+
155+
/**
156+
* @override
157+
*/
158+
protected getSchemasQuery() {
159+
return `
160+
SELECT table_schema as ${this.quoteIdentifier('schema_name')}
161+
FROM information_schema.tables
162+
WHERE table_schema NOT IN (${IGNORED_SCHEMAS.map(s => `'${s}'`).join(',')})
163+
GROUP BY table_schema
164+
`;
165+
}
166+
167+
/**
168+
* From the Redshift docs:
169+
* SHOW SCHEMAS FROM DATABASE database_name [LIKE 'filter_pattern'] [LIMIT row_limit ]
170+
* It returns regular schemas (queryable from information_schema) and external ones.
171+
* @override
172+
*/
173+
public async getSchemas(): Promise<QuerySchemasResult[]> {
174+
const schemas = await this.query<QuerySchemasResult>(`SHOW SCHEMAS FROM DATABASE ${this.dbName}`, []);
175+
176+
return schemas.filter(s => !IGNORED_SCHEMAS.includes(s.schema_name));
177+
}
178+
179+
public async getColumnsForSpecificTables(tables: QueryTablesResult[]): Promise<QueryColumnsResult[]> {
180+
const columns = await super.getColumnsForSpecificTables(tables);
181+
182+
// We might request the external tables, their descriptions won't be returned
183+
// by the super.getColumnsForSpecificTables(). Need to request them separately.
184+
const missedTables = tables.filter(table => !columns.some(column => column.schema_name === table.schema_name && column.table_name === table.table_name));
185+
186+
for (const table of missedTables) {
187+
const columnRes = await this.columnsForExternalTable(table.schema_name, table.table_name);
188+
columnRes.forEach(c => {
189+
columns.push({
190+
schema_name: c.schema_name,
191+
table_name: c.table_name,
192+
column_name: c.column_name,
193+
data_type: c.data_type,
194+
});
195+
});
196+
}
197+
198+
return columns;
199+
}
200+
87201
/**
88202
* @override
89203
*/

0 commit comments

Comments
 (0)