Skip to content

Commit e9569e7

Browse files
author
F Saad
committed
Implement cgpm_metamodel.analyze to parse the cgpm_analyze grammar.
1 parent af5379c commit e9569e7

File tree

2 files changed

+91
-9
lines changed

2 files changed

+91
-9
lines changed

src/metamodels/cgpm_metamodel.py

Lines changed: 62 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
from bayeslite.util import casefold
6767

6868
import cgpm_schema.parse
69+
import cgpm_analyze.parse
6970

7071
CGPM_SCHEMA_1 = '''
7172
INSERT INTO bayesdb_metamodel (name, version) VALUES ('cgpm', 1);
@@ -221,7 +222,7 @@ def initialize_models(self, bdb, generator_id, modelnos):
221222
for cgpm_ext in schema['cgpm_composition']:
222223
cgpms = [self._initialize_cgpm(bdb, generator_id, cgpm_ext)
223224
for _ in xrange(n)]
224-
engine.compose_cgpm(cgpms, N=1, multithread=self._ncpu)
225+
engine.compose_cgpm(cgpms, multithread=self._ncpu)
225226

226227
# Store the newly initialized engine.
227228
engine_json = json_dumps(engine.to_metadata())
@@ -252,16 +253,70 @@ def analyze_models(self, bdb, generator_id, modelnos=None, iterations=1,
252253

253254
if ckpt_iterations is not None or ckpt_seconds is not None:
254255
# XXX
255-
raise NotImplementedError('cgpm analysis checkpoint')
256-
if program is not None:
257-
# XXX
258-
raise NotImplementedError('cgpm analysis programs')
256+
raise NotImplementedError('CGpm analysis checkpoint not supported.')
257+
258+
if program is None:
259+
program = []
260+
261+
population_id = core.bayesdb_generator_population(bdb, generator_id)
262+
263+
def retrieve_analyze_variables(ast):
264+
# Transition all variables by default.
265+
if len(ast) == 0:
266+
variables = core.bayesdb_variable_names(bdb, population_id)
267+
# Exactly 1 clause supported.
268+
elif len(ast) == 1:
269+
clause = ast[0]
270+
# Transition user specified variables only.
271+
if isinstance(clause, cgpm_analyze.parse.Variables):
272+
variables = clause.vars
273+
# Transition all variables except user specified skip.
274+
elif isinstance(clause, cgpm_analyze.parse.Skip):
275+
variables = filter(
276+
lambda v: v not in clause.vars,
277+
core.bayesdb_variable_names(bdb, population_id))
278+
# Unknown/impossible clause.
279+
else:
280+
raise ValueError('Unknown clause in ANALYZE: %s.' % ast)
281+
# Crash if more than 1 clause.
282+
else:
283+
raise ValueError('1 clause permitted in ANALYZE: %s.' % ast)
284+
return variables
285+
286+
def foreign(varname):
287+
schema = self._schema(bdb, generator_id)
288+
return all(v[0]!=varname for v in schema['variables'])
289+
290+
# Retrieve target variables.
291+
analyze_ast = cgpm_analyze.parse.parse(program)
292+
variables = retrieve_analyze_variables(analyze_ast)
293+
varnames_gpmcc = [v for v in variables if not foreign(v)]
294+
varnames_foreign = [v for v in variables if foreign(v)]
259295

260296
# Get the engine.
261297
engine = self._engine(bdb, generator_id)
262298

263-
# Do the transition.
264-
engine.transition(N=iterations, S=max_seconds, multithread=self._ncpu)
299+
# Transition gpmcc variables.
300+
if varnames_gpmcc:
301+
print varnames_gpmcc
302+
varnos_gpmcc = [
303+
core.bayesdb_variable_number(bdb, population_id, v)
304+
for v in varnames_gpmcc
305+
]
306+
engine.transition(
307+
N=iterations, S=max_seconds, cols=varnos_gpmcc,
308+
multithread=self._ncpu)
309+
310+
# Transition foreign variables.
311+
if varnames_foreign:
312+
print varnames_foreign
313+
varnos_foreign = [
314+
core.bayesdb_variable_number(bdb, population_id, v)
315+
for v in varnames_foreign
316+
]
317+
engine.transition_foreign(
318+
N=iterations, S=max_seconds, cols=varnos_foreign,
319+
multithread=self._ncpu)
265320

266321
# Serialize the engine.
267322
engine_json = json_dumps(engine.to_metadata())

tests/foobar.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
import numpy as np
1919
import random # XXX
2020

21+
import pytest
22+
2123
#from cgpm.regressions.forest import RandomForest
2224
from cgpm.regressions.forest import RandomForest
2325
from cgpm.regressions.linreg import LinearRegression
@@ -183,8 +185,33 @@
183185
print 'INITIALIZING'
184186
bdb.execute('INITIALIZE 1 MODELS FOR g0')
185187

186-
print 'ANALYZING'
187-
bdb.execute('ANALYZE g0 FOR 1 ITERATION WAIT')
188+
print 'ANALYZING EVERYONE'
189+
bdb.execute("""
190+
ANALYZE g0 FOR 1 ITERATION WAIT(
191+
;
192+
);""")
193+
194+
print 'ANALYZING ONLY SOME'
195+
bdb.execute("""
196+
ANALYZE g0 FOR 1 ITERATION WAIT(
197+
VARIABLES kepler_cluster_id;
198+
);""")
199+
200+
# Analyze all but kepler_cluster_id.
201+
print 'ANALYZING SKIPPING SOME'
202+
bdb.execute("""
203+
ANALYZE g0 FOR 1 ITERATION WAIT(
204+
SKIP kepler_cluster_id, kepler_noise, period;
205+
);""")
206+
207+
# Disallow both SKIP and VARIABLES clauses.
208+
with pytest.raises(Exception):
209+
bdb.execute("""
210+
ANALYZE g0 FOR 1 ITERATION WAIT(
211+
SKIP kepler_cluster_id;
212+
VARIABLES apogee, perigee;
213+
);""")
214+
188215

189216
print 'DEP PROB'
190217
print bdb.execute('''

0 commit comments

Comments
 (0)