Skip to content

Commit 1b8d22c

Browse files
Associate latent variables with generators.
Add bayesdb_add_latent to add a latent variable from a metamodel's create_generator operation. Two generators may not currently share an overlapping latent variable name. (XXX It is not clear that this is the right design choice. We could require the variable to be qualified by the generator's name if ambiguous. But we'll defer that for now.)
1 parent e810543 commit 1b8d22c

File tree

11 files changed

+34
-38
lines changed

11 files changed

+34
-38
lines changed

src/ast.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@
7878
'name',
7979
])
8080
PopVar = namedtuple('PopVar', [
81-
'latent',
8281
'name',
8382
'stattype',
8483
])

src/bql.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -532,7 +532,6 @@ def _create_population(bdb, phrase):
532532
duplicates = set()
533533
missing = set()
534534
invalid = set()
535-
nlatent = 0
536535
colno_sql = '''
537536
SELECT colno FROM bayesdb_column
538537
WHERE tabname = :table AND name = :column_name
@@ -541,16 +540,11 @@ def _create_population(bdb, phrase):
541540
SELECT COUNT(*) FROM bayesdb_stattype WHERE name = :stattype
542541
'''
543542
for variable in phrase.schema:
544-
latent = variable.latent
545543
name = casefold(variable.name)
546544
stattype = casefold(variable.stattype)
547545
if name in variable_map:
548546
duplicates.add(name)
549547
continue
550-
if latent:
551-
nlatent += 1
552-
variable_map[name] = -nlatent
553-
continue
554548
cursor = bdb.sql_execute(colno_sql, {
555549
'table': phrase.table,
556550
'column_name': name,

src/core.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,23 @@ def bayesdb_variable_stattype(bdb, population_id, colno):
278278
assert len(row) == 1
279279
return row[0]
280280

281+
def bayesdb_add_latent(bdb, population_id, generator_id, var, stattype):
282+
"""Add a generator's latent variable to a population.
283+
284+
NOTE: To be used ONLY by a metamodel's create_generator method
285+
when establishing any latent variables of that generator.
286+
"""
287+
with bdb.savepoint():
288+
cursor = bdb.sql_execute('''
289+
SELECT MIN(colno) FROM bayesdb_variable WHERE population_id = ?
290+
''', (population_id,))
291+
colno = min(-1, cursor_value(cursor) - 1)
292+
bdb.sql_execute('''
293+
INSERT INTO bayesdb_variable
294+
(population_id, colno, name, stattype)
295+
VALUES (?, ?, ?, ?)
296+
''', (population_id, colno, var, stattype))
297+
281298
def bayesdb_population_cell_value(bdb, population_id, rowid, colno):
282299
if colno < 0:
283300
# Latent variables do not appear in the table.

src/grammar.y

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ command(drop_pop) ::= K_DROP K_POPULATION ifexists(ifexists)
7272
pop_schema(one) ::= pop_var(var).
7373
pop_schema(many) ::= pop_schema(schema) T_COMMA pop_var(var).
7474
pop_var(manifest) ::= column_name(name) stattype(st).
75-
pop_var(latent) ::= K_LATENT column_name(name) stattype(st).
7675
stattype(st) ::= L_NAME(name).
7776

7877
/* XXX Temporary generators? */

src/parse.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -182,10 +182,7 @@ def p_command_drop_pop(self, ifexists, name):
182182

183183
def p_pop_schema_one(self, var): return [var]
184184
def p_pop_schema_many(self, schema, var): schema.append(var); return schema
185-
def p_pop_var_manifest(self, name, st):
186-
return ast.PopVar(False, name, st)
187-
def p_pop_var_latent(self, name, st):
188-
return ast.PopVar(True, name, st)
185+
def p_pop_var_manifest(self, name, st): return ast.PopVar(name, st)
189186
def p_stattype_st(self, name): return name
190187

191188
def p_command_creategen(self, ifnotexists0, name, ifnotexists1, pop,

src/schema.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,12 +131,15 @@
131131
132132
CREATE TABLE bayesdb_variable (
133133
population_id INTEGER NOT NULL REFERENCES bayesdb_population(id),
134+
generator_id INTEGER REFERENCES bayesdb_generator(id),
134135
colno INTEGER NOT NULL,
135-
name TEXT COLLATE NOCASE NOT NULL,
136+
name TEXT COLLATE NOCASE NOT NULL,
136137
stattype TEXT COLLATE NOCASE NOT NULL
137138
REFERENCES bayesdb_stattype(name),
138139
PRIMARY KEY(population_id, colno),
139-
UNIQUE(population_id, name)
140+
UNIQUE(population_id, name),
141+
UNIQUE(generator_id, colno),
142+
UNIQUE(generator_id, name)
140143
);
141144
142145
INSERT INTO bayesdb_variable (population_id, name, colno, stattype)

tests/test_bql.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -101,14 +101,6 @@ def test_trivial_population():
101101
# XXX if (not) exists
102102
bdb.execute('create population p for t(age numerical)')
103103
bdb.execute('drop population p')
104-
bdb.execute('''
105-
create population p for t (
106-
age numerical,
107-
latent cluster categorical,
108-
latent error numerical
109-
)
110-
''')
111-
bdb.execute('drop population p')
112104

113105
@stochastic(max_runs=2, min_passes=1)
114106
def test_conditional_probability(seed):

tests/test_cgpm.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,7 @@ def test_cgpm():
139139
country_of_operator CATEGORICAL,
140140
launch_mass NUMERICAL,
141141
perigee NUMERICAL,
142-
period NUMERICAL,
143-
latent kepler_error NUMERICAL
142+
period NUMERICAL
144143
)
145144
''')
146145
bdb.execute('''

tests/test_guess.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,8 @@ def test_guess_population():
121121
# Population already exists.
122122
bayesdb_guess_population(bdb, 'p', 't')
123123
assert bdb.sql_execute('SELECT * FROM bayesdb_variable').fetchall() == [
124-
(1, 1, 'y', 'categorical'),
125-
(1, 2, 'z', 'numerical'),
124+
(1, None, 1, 'y', 'categorical'),
125+
(1, None, 2, 'z', 'numerical'),
126126
]
127127

128128
def isqrt(n):

tests/test_parse.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -492,18 +492,16 @@ def test_trivial_commands():
492492
launch_mass numerical,
493493
perigee numerical,
494494
apogee numerical,
495-
period numerical,
496-
latent kepler_error numerical
495+
period numerical
497496
)
498497
''') == \
499498
[ast.CreatePop(False, 'satellites', 'satellites_ucs', [
500-
ast.PopVar(False, 'country_of_operator', 'categorical'),
501-
ast.PopVar(False, 'orbit_type', 'categorical'),
502-
ast.PopVar(False, 'launch_mass', 'numerical'),
503-
ast.PopVar(False, 'perigee', 'numerical'),
504-
ast.PopVar(False, 'apogee', 'numerical'),
505-
ast.PopVar(False, 'period', 'numerical'),
506-
ast.PopVar(True, 'kepler_error', 'numerical'),
499+
ast.PopVar('country_of_operator', 'categorical'),
500+
ast.PopVar('orbit_type', 'categorical'),
501+
ast.PopVar('launch_mass', 'numerical'),
502+
ast.PopVar('perigee', 'numerical'),
503+
ast.PopVar('apogee', 'numerical'),
504+
ast.PopVar('period', 'numerical'),
507505
])]
508506
assert parse_bql_string('drop population satellites') == \
509507
[ast.DropPop(False, 'satellites')]

0 commit comments

Comments
 (0)