Skip to content

Commit b2da25d

Browse files
committed
extend redshift schema queries with external tables metadata
1 parent c872c72 commit b2da25d

File tree

1 file changed

+86
-5
lines changed

1 file changed

+86
-5
lines changed

packages/cubejs-redshift-driver/src/RedshiftDriver.ts

Lines changed: 86 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@
44
* @fileoverview The `RedshiftDriver` and related types declaration.
55
*/
66

7-
import { getEnv } from '@cubejs-backend/shared';
7+
import { assertDataSource, getEnv } from '@cubejs-backend/shared';
88
import { PostgresDriver, PostgresDriverConfiguration } from '@cubejs-backend/postgres-driver';
99
import {
1010
DownloadTableCSVData,
11-
DriverCapabilities,
11+
DriverCapabilities, QuerySchemasResult,
1212
StreamOptions,
13-
StreamTableDataWithTypes,
13+
StreamTableDataWithTypes, TablesSchema,
1414
UnloadOptions
1515
} from '@cubejs-backend/base-driver';
1616
import crypto from 'crypto';
@@ -40,10 +40,14 @@ export interface RedshiftDriverConfiguration extends PostgresDriverConfiguration
4040
exportBucket?: RedshiftDriverExportAWS;
4141
}
4242

43+
const IGNORED_SCHEMAS = ['pg_catalog', 'pg_internal', 'information_schema', 'mysql', 'performance_schema', 'sys', 'INFORMATION_SCHEMA'];
44+
4345
/**
4446
* Redshift driver class.
4547
*/
4648
export class RedshiftDriver extends PostgresDriver<RedshiftDriverConfiguration> {
49+
private readonly dbName: string;
50+
4751
/**
4852
* Returns default concurrency value.
4953
*/
@@ -55,7 +59,7 @@ export class RedshiftDriver extends PostgresDriver<RedshiftDriverConfiguration>
5559
* Class constructor.
5660
*/
5761
public constructor(
58-
options: RedshiftDriverConfiguration & {
62+
config: RedshiftDriverConfiguration & {
5963
/**
6064
* Data source name.
6165
*/
@@ -73,7 +77,15 @@ export class RedshiftDriver extends PostgresDriver<RedshiftDriverConfiguration>
7377
testConnectionTimeout?: number,
7478
} = {}
7579
) {
76-
super(options);
80+
super(config);
81+
82+
const dataSource =
83+
config.dataSource ||
84+
assertDataSource('default');
85+
86+
// We need a DB name for querying external tables.
87+
// It's not possible to get it later from the pool
88+
this.dbName = getEnv('dbName', { dataSource });
7789
}
7890

7991
protected primaryKeysQuery() {
@@ -84,6 +96,75 @@ export class RedshiftDriver extends PostgresDriver<RedshiftDriverConfiguration>
8496
return null;
8597
}
8698

99+
/**
100+
* @override
101+
*/
102+
protected informationSchemaQuery() {
103+
return `
104+
SELECT columns.column_name as ${this.quoteIdentifier('column_name')},
105+
columns.table_name as ${this.quoteIdentifier('table_name')},
106+
columns.table_schema as ${this.quoteIdentifier('table_schema')},
107+
columns.data_type as ${this.quoteIdentifier('data_type')}
108+
FROM information_schema.columns
109+
WHERE columns.table_schema NOT IN (${IGNORED_SCHEMAS.map(s => `'${s}'`).join(',')})
110+
`;
111+
}
112+
113+
/**
114+
* In Redshift external tables are not shown in regular Postgres information_schema,
115+
* so it needs to be queried separately.
116+
* @override
117+
*/
118+
public async tablesSchema(): Promise<TablesSchema> {
119+
const query = this.informationSchemaQuery();
120+
const tablesSchema = await this.query(query, []).then(data => data.reduce<TablesSchema>(this.informationColumnsSchemaReducer, {}));
121+
122+
const allSchemas = await this.getSchemas();
123+
const externalSchemas = allSchemas.filter(s => !tablesSchema[s.schema_name]).map(s => s.schema_name);
124+
125+
for (const externalSchema of externalSchemas) {
126+
tablesSchema[externalSchema] = {};
127+
// eslint-disable-next-line camelcase
128+
const tablesRes: { table_name: string }[] = await this.query(`SHOW TABLES FROM SCHEMA ${this.dbName}.${externalSchema}`, []);
129+
const tables = tablesRes.map(t => t.table_name);
130+
for (const tableName of tables) {
131+
// eslint-disable-next-line camelcase
132+
const columnRes: { column_name: string, data_type: string }[] = await this.query(`SHOW COLUMNS FROM TABLE ${this.dbName}.${externalSchema}.${tableName}`, []);
133+
tablesSchema[externalSchema][tableName] = columnRes.map(def => ({
134+
name: def.column_name,
135+
type: def.data_type,
136+
attributes: []
137+
}));
138+
}
139+
}
140+
141+
return tablesSchema;
142+
}
143+
144+
/**
145+
* @override
146+
*/
147+
protected getSchemasQuery() {
148+
return `
149+
SELECT table_schema as ${this.quoteIdentifier('schema_name')}
150+
FROM information_schema.tables
151+
WHERE table_schema NOT IN (${IGNORED_SCHEMAS.map(s => `'${s}'`).join(',')})
152+
GROUP BY table_schema
153+
`;
154+
}
155+
156+
/**
157+
* From the Redshift docs:
158+
* SHOW SCHEMAS FROM DATABASE database_name [LIKE 'filter_pattern'] [LIMIT row_limit ]
159+
* It returns regular schemas (queryable from information_schema) and external ones.
160+
* @override
161+
*/
162+
public async getSchemas(): Promise<QuerySchemasResult[]> {
163+
const schemas = await this.query<QuerySchemasResult>(`SHOW SCHEMAS FROM DATABASE ${this.dbName}`, []);
164+
165+
return schemas.filter(s => !IGNORED_SCHEMAS.includes(s.schema_name));
166+
}
167+
87168
/**
88169
* @override
89170
*/

0 commit comments

Comments
 (0)