Skip to content

Commit 51b6f91

Browse files
author
F Saad
committed
Implement LATENT specification in cgpm_metamodel grammar, parser, and stub in _create_schema.\nProgress #435.
1 parent d9ddbc1 commit 51b6f91

File tree

4 files changed

+35
-1
lines changed

4 files changed

+35
-1
lines changed

src/metamodels/cgpm_metamodel.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,7 @@ def _create_schema(bdb, generator_id, schema_ast):
648648
# State.
649649
variables = []
650650
categoricals = {}
651+
declared_latents = []
651652
cgpm_composition = []
652653
modelled = set()
653654
subsample = None
@@ -696,6 +697,18 @@ def _retrieve_stattype_dist_params(var):
696697
variables.append([var, stattype, dist, params])
697698
modelled.add(var)
698699

700+
elif isinstance(clause, cgpm_schema.parse.Latent):
701+
# Reject if the latent variable has already been declared.
702+
if any(l[0] == clause.name for l in declared_latents):
703+
duplicate.add(clause.name)
704+
705+
# XXX FILL ME XXX
706+
707+
# Register the latent variable name and stattype into bayesdb.
708+
# Do something related to the error checking data structures.
709+
710+
declared_latents.append((clause.name, clause.stattype))
711+
699712
elif isinstance(clause, cgpm_schema.parse.Foreign):
700713
# Foreign model: some set of output variables is to be
701714
# modelled by foreign logic, possibly conditional on some
@@ -753,7 +766,7 @@ def _retrieve_stattype_dist_params(var):
753766
subsample = clause.n
754767

755768
else:
756-
assert False
769+
raise BQLError(bdb, 'Unknown clause: %r' % (clause,))
757770

758771
# Raise an exception if there were duplicates or unknown
759772
# variables.

src/metamodels/cgpm_schema/grammar.y

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ clause(basic) ::= var(var) dist(dist) param_opt(params).
3434
clause(foreign) ::= K_MODEL vars(outputs) given_opt(inputs)
3535
K_USING foreign(name) param_opt(params).
3636
clause(subsamp) ::= K_SUBSAMPLE L_NUMBER(n).
37+
clause(latent) ::= K_LATENT var(var) stattype(st).
3738

3839
dist(name) ::= L_NAME(dist).
3940
foreign(name) ::= L_NAME(foreign).
@@ -45,6 +46,8 @@ vars(one) ::= var(var).
4546
vars(many) ::= vars(vars) T_COMMA var(var).
4647
var(name) ::= L_NAME(var).
4748

49+
stattype(s) ::= L_NAME(st).
50+
4851
param_opt(none) ::= .
4952
param_opt(some) ::= T_LROUND params(ps) T_RROUND.
5053
params(one) ::= param(param).

src/metamodels/cgpm_schema/parse.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
KEYWORDS = {
3333
'given': grammar.K_GIVEN,
34+
'latent': grammar.K_LATENT,
3435
'model': grammar.K_MODEL,
3536
'subsample': grammar.K_SUBSAMPLE,
3637
'using': grammar.K_USING,
@@ -132,6 +133,8 @@ def p_clause_foreign(self, outputs, inputs, name, params):
132133
return Foreign(outputs, inputs, name, params)
133134
def p_clause_subsamp(self, n):
134135
return Subsample(n)
136+
def p_clause_latent(self, var, st):
137+
return Latent(var, st)
135138

136139
def p_dist_name(self, dist): return casefold(dist)
137140
def p_foreign_name(self, foreign): return casefold(foreign)
@@ -143,6 +146,8 @@ def p_vars_one(self, var): return [var]
143146
def p_vars_many(self, vars, var): vars.append(var); return vars
144147
def p_var_name(self, var): return var
145148

149+
def p_stattype_s(self, st): return st
150+
146151
def p_param_opt_none(self): return []
147152
def p_param_opt_some(self, ps): return ps
148153
def p_params_one(self, param): return [param]
@@ -166,3 +171,8 @@ def p_param_nam(self, p, nam): return (p, nam)
166171
Subsample = namedtuple('Subsample', [
167172
'n',
168173
])
174+
175+
Latent = namedtuple('Latent', [
176+
'name',
177+
'stattype',
178+
])

tests/foobar.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@
152152
launch_mass NUMERICAL,
153153
perigee NUMERICAL,
154154
period NUMERICAL,
155+
155156
LATENT kepler_noise NUMERICAL,
156157
LATENT kepler_cluster_id CATEGORICAL
157158
)
@@ -172,11 +173,18 @@
172173
bdb.execute('''
173174
CREATE GENERATOR g0 FOR satellites USING cgpm (
174175
apogee NORMAL,
176+
177+
LATENT kepler_cluster_id NUMERICAL,
178+
LATENT kepler_noise NUMERICAL,
179+
175180
MODEL kepler_cluster_id, kepler_noise, period GIVEN apogee, perigee
176181
USING venturescript (source = "{}"),
182+
177183
MODEL perigee GIVEN apogee USING linreg,
184+
178185
MODEL class_of_orbit GIVEN apogee, period, perigee
179186
USING forest (k = 4),
187+
180188
SUBSAMPLE 100,
181189
)
182190
'''.format(kepler_source))

0 commit comments

Comments
 (0)