Skip to content

Commit e724f60

Browse files
HCK-9133: vector RE (#131)
* feat: vectors RE from instance * feat: vectors RE from DDL
1 parent 048c4e2 commit e724f60

File tree

5 files changed

+114
-36
lines changed

5 files changed

+114
-36
lines changed

forward_engineering/ddlProvider/ddlHelpers/columnDefinitionHelper.js

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ const _ = require('lodash');
22
const { commentIfDeactivated, wrapInQuotes, wrapComment } = require('../../utils/general');
33
const assignTemplates = require('../../utils/assignTemplates');
44
const templates = require('../templates');
5+
const { isVector, isString, isDateTime } = require('./typeHelper');
56

67
const addLength = (type, length) => {
78
return `${type}(${length})`;
@@ -58,8 +59,6 @@ const canHaveTimePrecision = type => ['time', 'timestamp'].includes(type);
5859
const canHaveScale = type => type === 'numeric';
5960
const canHaveTypeModifier = type => ['geography', 'geometry'].includes(type);
6061

61-
const isVector = type => ['vector', 'halfvec', 'sparsevec'].includes(type);
62-
6362
const decorateType = (type, columnDefinition) => {
6463
const { length, precision, scale, typeModifier, srid, timezone, timePrecision, dimension, subtype, array_type } =
6564
columnDefinition;
@@ -81,9 +80,6 @@ const decorateType = (type, columnDefinition) => {
8180
return addArrayDecorator(type, array_type);
8281
};
8382

84-
const isString = type => ['char', 'varchar', 'text', 'bit', 'varbit'].includes(type);
85-
const isDateTime = type => ['date', 'time', 'timestamp', 'interval'].includes(type);
86-
8783
const decorateDefault = (type, defaultValue, isArrayType) => {
8884
const constantsValues = ['current_timestamp', 'current_user', 'null'];
8985
if ((isString(type) || isDateTime(type)) && !constantsValues.includes(_.toLower(defaultValue)) && !isArrayType) {
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
const isString = type => ['char', 'varchar', 'text', 'bit', 'varbit'].includes(type);
2+
3+
const isDateTime = type => ['date', 'time', 'timestamp', 'interval'].includes(type);
4+
5+
const isVector = type => ['vector', 'halfvec', 'sparsevec'].includes(type);
6+
7+
module.exports = {
8+
isString,
9+
isDateTime,
10+
isVector,
11+
};

reverse_engineering/helpers/getJsonSchema.js

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
const _ = require('lodash');
2+
const { isVector } = require('../../forward_engineering/ddlProvider/ddlHelpers/typeHelper');
3+
14
const getJsonSchema = columns => {
25
const properties = columns.reduce((properties, column) => {
36
if (column.properties) {
@@ -10,6 +13,16 @@ const getJsonSchema = columns => {
1013
};
1114
}
1215

16+
if (isVector(column.type)) {
17+
return {
18+
...properties,
19+
[column.name]: {
20+
...column,
21+
items: _.fill(Array(column.dimension), { type: 'number', mode: 'real' }),
22+
},
23+
};
24+
}
25+
1326
return {
1427
...properties,
1528
[column.name]: column,

reverse_engineering/helpers/postgresHelpers/columnHelper.js

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
const _ = require('lodash');
2+
const { isVector } = require('../../../forward_engineering/ddlProvider/ddlHelpers/typeHelper');
23

34
const columnPropertiesMapper = {
45
column_default: 'default',
@@ -15,8 +16,21 @@ const columnPropertiesMapper = {
1516
numeric_scale: 'scale',
1617
datetime_precision: 'timePrecision',
1718
attribute_mode: {
18-
keyword: 'timePrecision',
19-
check: (column, value) => value !== -1 && canHaveTimePrecision(column.data_type),
19+
keyword: ({ column }) => {
20+
if (isVector(column.udt_name)) {
21+
return 'dimension';
22+
}
23+
return 'timePrecision';
24+
},
25+
check: (column, value) => {
26+
if (!value || value === -1) {
27+
return false;
28+
}
29+
if (isVector(column.udt_name)) {
30+
return true;
31+
}
32+
return canHaveTimePrecision(column.data_type);
33+
},
2034
},
2135
interval_type: 'intervalOptions',
2236
collation_name: 'collationRule',
@@ -28,6 +42,8 @@ const columnPropertiesMapper = {
2842
domain_name: 'domain_name',
2943
};
3044

45+
const keysToExclude = ['numberOfArrayDimensions', 'udt_name'];
46+
3147
const getColumnValue = (column, key, value) => {
3248
if (columnPropertiesMapper[key]?.check) {
3349
return columnPropertiesMapper[key].check(column, value) ? value : '';
@@ -36,16 +52,28 @@ const getColumnValue = (column, key, value) => {
3652
return _.get(columnPropertiesMapper, `${key}.values.${value}`, value);
3753
};
3854

55+
const getColumnKey = ({ column, key }) => {
56+
const mappedKey = columnPropertiesMapper[key];
57+
if (mappedKey?.keyword) {
58+
if (typeof mappedKey.keyword === 'function') {
59+
return mappedKey.keyword({ column });
60+
}
61+
return mappedKey.keyword;
62+
}
63+
return mappedKey;
64+
};
65+
3966
const mapColumnData = userDefinedTypes => column => {
4067
return _.chain(column)
4168
.toPairs()
42-
.map(([key, value]) => [
43-
columnPropertiesMapper[key]?.keyword || columnPropertiesMapper[key],
44-
getColumnValue(column, key, value),
45-
])
69+
.map(([key, value]) => [getColumnKey({ column, key }), getColumnValue(column, key, value)])
4670
.filter(([key, value]) => key && !_.isNil(value))
4771
.fromPairs()
48-
.thru(setColumnType(userDefinedTypes))
72+
.thru(col => {
73+
const columnWithType = setColumnType(userDefinedTypes)(col);
74+
keysToExclude.forEach(key => delete columnWithType[key]);
75+
return columnWithType;
76+
})
4977
.value();
5078
};
5179

@@ -187,6 +215,12 @@ const mapType = (userDefinedTypes, type) => {
187215
case 'regrole':
188216
case 'regtype':
189217
return { type: 'oid', mode: type };
218+
case 'vector':
219+
return { type: 'vector', subtype: 'vector' };
220+
case 'halfvec':
221+
return { type: 'vector', subtype: 'halfvec' };
222+
case 'sparsevec':
223+
return { type: 'vector', subtype: 'sparsevec' };
190224

191225
default: {
192226
if (_.some(userDefinedTypes, { name: type })) {

reverse_engineering/helpers/queryConstants.js

Lines changed: 48 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -94,19 +94,52 @@ const getGET_FUNCTIONS_WITH_PROCEDURES_ADDITIONAL = postgresVersion => {
9494
FROM pg_catalog.pg_proc WHERE pronamespace = $1;`;
9595
};
9696

97+
const getGET_FUNCTIONS_WITH_PROCEDURES = ({ extensionsToExclude = [] }) => {
98+
const extensionsStatement = extensionsToExclude.length ? extensionsToExclude.map(ext => `'${ext}'`).join(', ') : '';
99+
100+
return `
101+
SELECT
102+
specific_name,
103+
routine_name AS name,
104+
routine_type,
105+
routine_definition,
106+
external_language,
107+
security_type,
108+
type_udt_name AS return_data_type
109+
FROM information_schema.routines
110+
WHERE specific_schema = $1
111+
AND specific_name NOT IN (
112+
SELECT r.specific_name
113+
FROM information_schema.routines r
114+
JOIN pg_proc p
115+
ON r.routine_name = p.proname
116+
AND r.specific_schema = (
117+
SELECT n.nspname
118+
FROM pg_namespace n
119+
WHERE n.oid = p.pronamespace
120+
)
121+
JOIN pg_depend d
122+
ON d.objid = p.oid
123+
JOIN pg_extension e
124+
ON d.refobjid = e.oid
125+
WHERE e.extname IN (${extensionsStatement})
126+
);
127+
`;
128+
};
129+
97130
const queryConstants = {
98131
PING: 'SELECT schema_name FROM information_schema.schemata LIMIT 1;',
99132
GET_VERSION: 'SELECT version()',
100133
GET_VERSION_AS_NUM: 'SHOW server_version_num;',
101134
GET_SCHEMA_NAMES: 'SELECT schema_name FROM information_schema.schemata;',
102135
GET_TABLE_NAMES: `
103136
SELECT tables.table_name, tables.table_type FROM information_schema.tables AS tables
104-
INNER JOIN
137+
INNER JOIN
105138
(SELECT
106139
pg_class.relname AS table_name,
107140
pg_namespace.nspname AS table_schema
108141
FROM pg_catalog.pg_class AS pg_class
109-
INNER JOIN pg_catalog.pg_namespace AS pg_namespace
142+
INNER JOIN pg_catalog.pg_namespace AS pg_namespace
110143
ON (pg_namespace.oid = pg_class.relnamespace)
111144
WHERE pg_class.relispartition = false
112145
AND pg_class.relkind = ANY('{"r","v","t","m","p"}'))
@@ -120,8 +153,8 @@ const queryConstants = {
120153
GET_NAMESPACE_OID: 'SELECT oid FROM pg_catalog.pg_namespace WHERE nspname = $1',
121154
GET_TABLE_LEVEL_DATA: `
122155
SELECT pc.oid, pc.relpersistence, pc.reloptions, pt.spcname, pg_get_expr(pc.relpartbound, pc.oid) AS partition_expr
123-
FROM pg_catalog.pg_class AS pc
124-
LEFT JOIN pg_catalog.pg_tablespace AS pt
156+
FROM pg_catalog.pg_class AS pc
157+
LEFT JOIN pg_catalog.pg_tablespace AS pt
125158
ON pc.reltablespace = pt.oid
126159
WHERE pc.relname = $1 AND pc.relnamespace = $2;`,
127160
GET_TABLE_TOAST_OPTIONS: `
@@ -155,15 +188,15 @@ const queryConstants = {
155188
GET_ROWS_COUNT: fullTableName => `SELECT COUNT(*) AS quantity FROM ${fullTableName};`,
156189
GET_SAMPLED_DATA: (fullTableName, jsonColumns) => `SELECT ${jsonColumns} FROM ${fullTableName} LIMIT $1;`,
157190
GET_SAMPLED_DATA_SIZE: (fullTableName, jsonColumns) => `
158-
SELECT sum(pg_column_size(_hackolade_tmp_sampling_tbl.*)) AS _hackolade_tmp_sampling_tbl_size
191+
SELECT sum(pg_column_size(_hackolade_tmp_sampling_tbl.*)) AS _hackolade_tmp_sampling_tbl_size
159192
FROM (SELECT ${jsonColumns} FROM ${fullTableName} LIMIT $1) AS _hackolade_tmp_sampling_tbl;`,
160193
GET_INHERITS_PARENT_TABLE_NAME: `
161194
SELECT pc.relname AS parent_table_name FROM pg_catalog.pg_inherits AS pi
162195
INNER JOIN pg_catalog.pg_class AS pc
163196
ON pc.oid = pi.inhparent
164197
WHERE pi.inhrelid = $1;`,
165198
GET_TABLE_CONSTRAINTS: `
166-
SELECT pcon.conname AS constraint_name,
199+
SELECT pcon.conname AS constraint_name,
167200
pcon.contype AS constraint_type,
168201
pcon.connoinherit AS no_inherit,
169202
pcon.conkey AS constraint_keys,
@@ -181,14 +214,14 @@ const queryConstants = {
181214
GET_TABLE_INDEXES_V_10: getGET_TABLE_INDEXES(10),
182215
GET_TABLE_INDEXES_V_15: getGET_TABLE_INDEXES(15),
183216
GET_TABLE_FOREIGN_KEYS: `
184-
SELECT pcon.conname AS relationship_name,
217+
SELECT pcon.conname AS relationship_name,
185218
pcon.conkey AS table_columns_positions,
186219
pcon.confdeltype AS relationship_on_delete,
187220
pcon.confupdtype AS relationship_on_update,
188221
pcon.confmatchtype AS relationship_match,
189-
pc_foreign_table.relname AS foreign_table_name,
222+
pc_foreign_table.relname AS foreign_table_name,
190223
ARRAY(
191-
SELECT column_name::text FROM unnest(pcon.confkey) AS column_position
224+
SELECT column_name::text FROM unnest(pcon.confkey) AS column_position
192225
JOIN information_schema.columns ON (ordinal_position = column_position)
193226
WHERE table_name = pc_foreign_table.relname AND table_schema = foreign_table_namespace.nspname)::text[] AS foreign_columns,
194227
foreign_table_namespace.nspname AS foreign_table_schema
@@ -201,22 +234,13 @@ const queryConstants = {
201234
GET_VIEW_DATA: `SELECT * FROM information_schema.views WHERE table_name = $1 AND table_schema = $2;`,
202235
GET_VIEW_SELECT_STMT_FALLBACK: `SELECT definition FROM pg_views WHERE viewname = $1 AND schemaname = $2;`,
203236
GET_VIEW_OPTIONS: `
204-
SELECT oid,
237+
SELECT oid,
205238
reloptions AS view_options,
206239
relpersistence AS persistence,
207240
obj_description(oid, 'pg_class') AS description
208-
FROM pg_catalog.pg_class
241+
FROM pg_catalog.pg_class
209242
WHERE relname = $1 AND relnamespace = $2;`,
210-
GET_FUNCTIONS_WITH_PROCEDURES: `
211-
SELECT specific_name,
212-
routine_name AS name,
213-
routine_type,
214-
routine_definition,
215-
external_language,
216-
security_type,
217-
type_udt_name AS return_data_type
218-
FROM information_schema.routines
219-
WHERE specific_schema=$1;`,
243+
GET_FUNCTIONS_WITH_PROCEDURES: getGET_FUNCTIONS_WITH_PROCEDURES({ extensionsToExclude: ['vector'] }),
220244
GET_FUNCTIONS_WITH_PROCEDURES_ARGS: `
221245
SELECT parameter_name,
222246
parameter_mode,
@@ -315,7 +339,7 @@ const queryConstants = {
315339
(c.relname)::information_schema.sql_identifier,
316340
em.num, ((t.tgtype)::integer & 1), ((t.tgtype)::integer & 66)
317341
ORDER BY t.tgname))::information_schema.cardinal_number AS action_order,
318-
(regexp_match(pg_get_triggerdef(t.oid), '.{35,} WHEN \((.+)\) EXECUTE FUNCTION'::text))[1]::information_schema.character_data AS action_condition,
342+
(regexp_match(pg_get_triggerdef(t.oid), '.{35,} WHEN ((.+)) EXECUTE FUNCTION'::text))[1]::information_schema.character_data AS action_condition,
319343
(SUBSTRING(pg_get_triggerdef(t.oid)
320344
FROM (POSITION(('EXECUTE FUNCTION'::text) IN (SUBSTRING(pg_get_triggerdef(t.oid)
321345
FROM 48))) + 47)))::information_schema.character_data AS action_statement,
@@ -363,7 +387,7 @@ const queryConstants = {
363387
pg_class_referenced.relname AS referenced_table_name,
364388
pg_namespace_referenced.nspname AS referenced_table_schema
365389
FROM pg_catalog.pg_trigger as pg_trigger
366-
LEFT JOIN pg_catalog.pg_proc AS pg_proc
390+
LEFT JOIN pg_catalog.pg_proc AS pg_proc
367391
ON (pg_trigger.tgfoid = pg_proc.oid)
368392
LEFT JOIN pg_catalog.pg_attribute AS pg_attribute
369393
ON (pg_attribute.attnum = ANY(pg_trigger.tgattr::int2[]) AND pg_trigger.tgrelid = pg_attribute.attrelid)
@@ -376,7 +400,7 @@ const queryConstants = {
376400
LEFT JOIN pg_catalog.pg_namespace AS pg_namespace_referenced
377401
ON(pg_namespace_referenced.oid = pg_class_referenced.relnamespace)
378402
WHERE pg_class.relnamespace = $1 AND pg_class.oid = $2
379-
GROUP BY
403+
GROUP BY
380404
trigger_name,
381405
function_name,
382406
"constraint",

0 commit comments

Comments
 (0)