Skip to content

Commit a78048b

Browse files
committed
extend redshift schema queries with external tables metadata
1 parent c872c72 commit a78048b

File tree

1 file changed

+86
-3
lines changed

1 file changed

+86
-3
lines changed

packages/cubejs-redshift-driver/src/RedshiftDriver.ts

Lines changed: 86 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,15 @@
44
* @fileoverview The `RedshiftDriver` and related types declaration.
55
*/
66

7-
import { getEnv } from '@cubejs-backend/shared';
7+
import { assertDataSource, getEnv } from '@cubejs-backend/shared';
88
import { PostgresDriver, PostgresDriverConfiguration } from '@cubejs-backend/postgres-driver';
99
import {
1010
DownloadTableCSVData,
1111
DriverCapabilities,
12+
QuerySchemasResult,
1213
StreamOptions,
1314
StreamTableDataWithTypes,
15+
TablesSchema,
1416
UnloadOptions
1517
} from '@cubejs-backend/base-driver';
1618
import crypto from 'crypto';
@@ -40,10 +42,14 @@ export interface RedshiftDriverConfiguration extends PostgresDriverConfiguration
4042
exportBucket?: RedshiftDriverExportAWS;
4143
}
4244

45+
const IGNORED_SCHEMAS = ['pg_catalog', 'pg_internal', 'information_schema', 'mysql', 'performance_schema', 'sys', 'INFORMATION_SCHEMA'];
46+
4347
/**
4448
* Redshift driver class.
4549
*/
4650
export class RedshiftDriver extends PostgresDriver<RedshiftDriverConfiguration> {
51+
private readonly dbName: string;
52+
4753
/**
4854
* Returns default concurrency value.
4955
*/
@@ -55,7 +61,7 @@ export class RedshiftDriver extends PostgresDriver<RedshiftDriverConfiguration>
5561
* Class constructor.
5662
*/
5763
public constructor(
58-
options: RedshiftDriverConfiguration & {
64+
config: RedshiftDriverConfiguration & {
5965
/**
6066
* Data source name.
6167
*/
@@ -73,7 +79,15 @@ export class RedshiftDriver extends PostgresDriver<RedshiftDriverConfiguration>
7379
testConnectionTimeout?: number,
7480
} = {}
7581
) {
76-
super(options);
82+
super(config);
83+
84+
const dataSource =
85+
config.dataSource ||
86+
assertDataSource('default');
87+
88+
// We need a DB name for querying external tables.
89+
// It's not possible to get it later from the pool
90+
this.dbName = getEnv('dbName', { dataSource });
7791
}
7892

7993
protected primaryKeysQuery() {
@@ -84,6 +98,75 @@ export class RedshiftDriver extends PostgresDriver<RedshiftDriverConfiguration>
8498
return null;
8599
}
86100

101+
/**
102+
* @override
103+
*/
104+
protected informationSchemaQuery() {
105+
return `
106+
SELECT columns.column_name as ${this.quoteIdentifier('column_name')},
107+
columns.table_name as ${this.quoteIdentifier('table_name')},
108+
columns.table_schema as ${this.quoteIdentifier('table_schema')},
109+
columns.data_type as ${this.quoteIdentifier('data_type')}
110+
FROM information_schema.columns
111+
WHERE columns.table_schema NOT IN (${IGNORED_SCHEMAS.map(s => `'${s}'`).join(',')})
112+
`;
113+
}
114+
115+
/**
116+
* In Redshift external tables are not shown in regular Postgres information_schema,
117+
* so it needs to be queried separately.
118+
* @override
119+
*/
120+
public async tablesSchema(): Promise<TablesSchema> {
121+
const query = this.informationSchemaQuery();
122+
const tablesSchema = await this.query(query, []).then(data => data.reduce<TablesSchema>(this.informationColumnsSchemaReducer, {}));
123+
124+
const allSchemas = await this.getSchemas();
125+
const externalSchemas = allSchemas.filter(s => !tablesSchema[s.schema_name]).map(s => s.schema_name);
126+
127+
for (const externalSchema of externalSchemas) {
128+
tablesSchema[externalSchema] = {};
129+
// eslint-disable-next-line camelcase
130+
const tablesRes: { table_name: string }[] = await this.query(`SHOW TABLES FROM SCHEMA ${this.dbName}.${externalSchema}`, []);
131+
const tables = tablesRes.map(t => t.table_name);
132+
for (const tableName of tables) {
133+
// eslint-disable-next-line camelcase
134+
const columnRes: { column_name: string, data_type: string }[] = await this.query(`SHOW COLUMNS FROM TABLE ${this.dbName}.${externalSchema}.${tableName}`, []);
135+
tablesSchema[externalSchema][tableName] = columnRes.map(def => ({
136+
name: def.column_name,
137+
type: def.data_type,
138+
attributes: []
139+
}));
140+
}
141+
}
142+
143+
return tablesSchema;
144+
}
145+
146+
/**
147+
* @override
148+
*/
149+
protected getSchemasQuery() {
150+
return `
151+
SELECT table_schema as ${this.quoteIdentifier('schema_name')}
152+
FROM information_schema.tables
153+
WHERE table_schema NOT IN (${IGNORED_SCHEMAS.map(s => `'${s}'`).join(',')})
154+
GROUP BY table_schema
155+
`;
156+
}
157+
158+
/**
159+
* From the Redshift docs:
160+
* SHOW SCHEMAS FROM DATABASE database_name [LIKE 'filter_pattern'] [LIMIT row_limit ]
161+
* It returns regular schemas (queryable from information_schema) and external ones.
162+
* @override
163+
*/
164+
public async getSchemas(): Promise<QuerySchemasResult[]> {
165+
const schemas = await this.query<QuerySchemasResult>(`SHOW SCHEMAS FROM DATABASE ${this.dbName}`, []);
166+
167+
return schemas.filter(s => !IGNORED_SCHEMAS.includes(s.schema_name));
168+
}
169+
87170
/**
88171
* @override
89172
*/

0 commit comments

Comments
 (0)