Skip to content

Commit 5480d2f

Browse files
Rahul IyerFeng, Xixuan (Aaron)
authored andcommitted
Bug fix: SVD works for flexible source tables
Pivotal Track: 61320838 Additional author: Feng, Xixuan (Aaron) <xfeng@gopivotal.com> - Fixed a bug that only source table with fixed column names can work, otherwise, an error is raised unexpectedly - Install-check tests are revised to also test this fix
1 parent ebdd372 commit 5480d2f

File tree

4 files changed

+244
-213
lines changed

4 files changed

+244
-213
lines changed

src/ports/postgres/modules/linalg/svd.py_in

Lines changed: 39 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ def _validate_args(schema_madlib, source_table, output_table_prefix, k,
9494
"SVD error: {1} column does not exist in {0}!"
9595
.format(source_table, val_id))
9696

97+
98+
9799
if result_summary_table is not None:
98100
if not result_summary_table.strip():
99101
plpy.error("SVD error: Invalid result summary table name!")
@@ -110,9 +112,10 @@ def create_summary_table(**args):
110112

111113
plpy.execute(
112114
"""
113-
SELECT {schema_madlib}.matrix_mult('{matrix_u}', NULL, NULL, NULL, False,
114-
'{matrix_s}', 'row_id', 'col_id', 'value', False,
115-
'{temp_prefix}_a');
115+
SELECT {schema_madlib}.matrix_mult(
116+
'{matrix_u}', NULL, NULL, NULL, False,
117+
'{matrix_s}', 'row_id', 'col_id', 'value', False,
118+
'{temp_prefix}_a');
116119
""".format(**args))
117120
plpy.execute(
118121
"""
@@ -270,6 +273,11 @@ def svd_sparse(schema_madlib, source_table, output_table_prefix,
270273
# copy the input table to a temporary table since all matrix operation
271274
# modules require the dimensionality of the sparse matrix in the table
272275
# as a row: (row_dim, col_dim, NULL)
276+
# Further, this function also hard codes the name of the (row, col, val)
277+
# triple as ('row_id', 'col_id', 'value'). It is important to note this
278+
# for further calls.
279+
## FIXME: Hardcoded names is a poor design requirement for matrix ops
280+
## and should be changed
273281
converted_tbl = __unique_string()
274282
create_temp_sparse_matrix_table_with_dims(source_table,
275283
converted_tbl,
@@ -280,15 +288,14 @@ def svd_sparse(schema_madlib, source_table, output_table_prefix,
280288
x_dense = __unique_string() + "_1"
281289
plpy.execute("""
282290
SELECT {schema_madlib}.matrix_densify(
283-
'{source_table}', '{in_row}', '{in_col}', '{in_val}',
291+
'{source_table}', 'row_id', 'col_id', 'value',
284292
'{x_dense}', True)
285293
""".format(schema_madlib=schema_madlib, source_table=source_table,
286-
in_row=row_id, in_col=col_id, in_val=value,
287294
x_dense=x_dense))
288295

289296
# Call SVD for the dense matrix
290297
svd(schema_madlib, x_dense, output_table_prefix,
291-
row_id, k, nIterations, result_summary_table)
298+
'row_id', k, nIterations, result_summary_table)
292299

293300
if result_summary_table:
294301
t1 = time.time()
@@ -355,18 +362,15 @@ def svd_sparse_native(schema_madlib, source_table, output_table_prefix,
355362
""".format(converted_tbl=converted_tbl, row_dim=row_dim,
356363
col_dim=col_dim))
357364
source_table = converted_tbl
358-
359-
# [row_dim, col_dim] = __get_dims(source_table, row_id, col_id, val)
360-
if row_dim < col_dim:
361-
is_narrow_matrix = False
362-
else:
363-
is_narrow_matrix = True
365+
## NOTE: At this point source_table has hard-coded column names:
366+
## 'row_id' --- 'col_id' --- 'value'
364367

365368
t0 = time.time() # measure the starting time
366369
_svd(schema_madlib, source_table, output_table_prefix, k,
367-
nIterations, False, row_id, col_id, val)
370+
nIterations, False, 'row_id', 'col_id', 'value')
368371

369-
if not is_narrow_matrix:
372+
# [row_dim, col_dim] = __get_dims(source_table, row_id, col_id, val)
373+
if col_dim > row_dim:
370374
# Switch U and V
371375
tmp = __unique_string()
372376
plpy.execute("""
@@ -380,7 +384,7 @@ def svd_sparse_native(schema_madlib, source_table, output_table_prefix,
380384
src_dense = __unique_string()
381385
plpy.execute("""
382386
select {schema_madlib}.matrix_densify(
383-
'{source_table}', '{row_id}', '{col_id}', '{val}', '{src_dense}', True)
387+
'{source_table}', 'row_id', 'col_id', 'value', '{src_dense}', True)
384388
""".format(source_table=source_table, row_id=row_id, col_id=col_id,
385389
val=val, src_dense=src_dense, schema_madlib=schema_madlib))
386390
arguments = {'schema_madlib': schema_madlib,
@@ -583,7 +587,10 @@ def _svd(schema_madlib, source_table, output_table_prefix,
583587
i > 0
584588
""".format(svd_bidiagonal=svd_bidiagonal))[0]['nz']
585589
if k > non_zero_svals:
586-
plpy.warning("k is set to the number of non-zero singular values")
590+
plpy.warning("Value of 'k' is greater than the number of non-zero "
591+
"singular values. Outputing only {nz} instead of {k} "
592+
"eigen values/vectors".format(nz=non_zero_svals, k=k)
593+
)
587594
k = non_zero_svals
588595

589596
# Compute the singular values and output to sparse table
@@ -698,7 +705,6 @@ def _lanczos_bidiagonalize_create_pq_table(schema_madlib, pq_table_prefix, col_d
698705
SELECT 1, 0, {schema_madlib}.__svd_unit_vector({col_dim})
699706
""".format(schema_madlib=schema_madlib,
700707
pq_table_prefix=pq_table_prefix, col_dim=col_dim))
701-
702708
# ------------------------------------------------------------------------
703709

704710

@@ -1110,22 +1116,22 @@ def svd_help_message(schema_madlib, message, **kwargs):
11101116

11111117
-- example input data
11121118
COPY mat (row_id, row_vec) FROM stdin;
1113-
1 {691,58,899,163,159,533,604,582,269,390}
1114-
0 {396,840,353,446,318,886,15,584,159,383}
1115-
3 {462,532,787,265,982,306,600,608,212,885}
1116-
2 {293,742,298,75,404,857,941,662,846,2}
1117-
5 {327,946,368,943,7,516,272,24,591,204}
1118-
4 {304,151,337,387,643,753,603,531,459,652}
1119-
7 {458,959,774,376,228,354,300,669,718,565}
1120-
6 {877,59,260,302,891,498,710,286,864,675}
1121-
9 {882,761,398,688,761,405,125,484,222,873}
1122-
8 {824,390,818,844,180,943,424,520,65,913}
1123-
11 {492,220,576,289,321,261,173,1,44,241}
1124-
10 {528,1,860,18,814,242,314,965,935,809}
1125-
13 {350,192,211,633,53,783,30,444,176,932}
1126-
12 {415,701,221,503,67,393,479,218,219,916}
1127-
15 {739,651,678,577,273,935,661,47,373,618}
1128-
14 {909,472,871,695,930,455,398,893,693,838}
1119+
1 {{691,58,899,163,159,533,604,582,269,390}}
1120+
0 {{396,840,353,446,318,886,15,584,159,383}}
1121+
3 {{462,532,787,265,982,306,600,608,212,885}}
1122+
2 {{293,742,298,75,404,857,941,662,846,2}}
1123+
5 {{327,946,368,943,7,516,272,24,591,204}}
1124+
4 {{304,151,337,387,643,753,603,531,459,652}}
1125+
7 {{458,959,774,376,228,354,300,669,718,565}}
1126+
6 {{877,59,260,302,891,498,710,286,864,675}}
1127+
9 {{882,761,398,688,761,405,125,484,222,873}}
1128+
8 {{824,390,818,844,180,943,424,520,65,913}}
1129+
11 {{492,220,576,289,321,261,173,1,44,241}}
1130+
10 {{528,1,860,18,814,242,314,965,935,809}}
1131+
13 {{350,192,211,633,53,783,30,444,176,932}}
1132+
12 {{415,701,221,503,67,393,479,218,219,916}}
1133+
15 {{739,651,678,577,273,935,661,47,373,618}}
1134+
14 {{909,472,871,695,930,455,398,893,693,838}}
11291135
\.
11301136

11311137
DROP TABLE if exists svd_u;

0 commit comments

Comments
 (0)