|
35 | 35 | from bayeslite.util import json_dumps |
36 | 36 |
|
37 | 37 | import cgpm_schema.parse |
| 38 | +import cgpm_analyze.parse |
38 | 39 |
|
39 | 40 | CGPM_SCHEMA_1 = ''' |
40 | 41 | INSERT INTO bayesdb_metamodel (name, version) VALUES ('cgpm', 1); |
@@ -190,7 +191,7 @@ def initialize_models(self, bdb, generator_id, modelnos): |
190 | 191 | for cgpm_ext in schema['cgpm_composition']: |
191 | 192 | cgpms = [self._initialize_cgpm(bdb, generator_id, cgpm_ext) |
192 | 193 | for _ in xrange(n)] |
193 | | - engine.compose_cgpm(cgpms, N=1, multithread=self._ncpu) |
| 194 | + engine.compose_cgpm(cgpms, multithread=self._ncpu) |
194 | 195 |
|
195 | 196 | # Store the newly initialized engine. |
196 | 197 | engine_json = json_dumps(engine.to_metadata()) |
@@ -221,16 +222,46 @@ def analyze_models(self, bdb, generator_id, modelnos=None, iterations=1, |
221 | 222 |
|
222 | 223 | if ckpt_iterations is not None or ckpt_seconds is not None: |
223 | 224 | # XXX |
224 | | - raise NotImplementedError('cgpm analysis checkpoint') |
225 | | - if program is not None: |
226 | | - # XXX |
227 | | - raise NotImplementedError('cgpm analysis programs') |
| 225 | + raise NotImplementedError('CGpm analysis checkpoint not supported.') |
228 | 226 |
|
229 | | - # Get the engine. |
230 | | - engine = self._engine(bdb, generator_id) |
| 227 | + if program is None: |
| 228 | + program = [] |
| 229 | + |
| 230 | + population_id = core.bayesdb_generator_population(bdb, generator_id) |
| 231 | + |
| 232 | + def retrieve_analyze_variables(ast): |
| 233 | + # Transition all variables by default. |
| 234 | + if len(ast) == 0: |
| 235 | + variables = core.bayesdb_variable_names(bdb, population_id) |
| 236 | + # Exactly 1 clause supported. |
| 237 | + elif len(ast) == 1: |
| 238 | + clause = ast[0] |
| 239 | + # Transition user specified variables only. |
| 240 | + if isinstance(clause, cgpm_analyze.parse.Variables): |
| 241 | + variables = clause.vars |
| 242 | + # Transition all variables except user specified skip. |
| 243 | + elif isinstance(clause, cgpm_analyze.parse.Skip): |
| 244 | + variables = filter( |
| 245 | + lambda v: v not in clause.vars, |
| 246 | + core.bayesdb_variable_names(bdb, population_id)) |
| 247 | + # Unknown/impossible clause. |
| 248 | + else: |
| 249 | + raise ValueError('Unknown clause in ANALYZE: %s.' % ast) |
| 250 | + # Crash if more than 1 clause. |
| 251 | + else: |
| 252 | + raise ValueError('1 clause permitted in ANALYZE: %s.' % ast) |
| 253 | + return variables |
231 | 254 |
|
232 | | - # Do the transition. |
233 | | - engine.transition(N=iterations, S=max_seconds, multithread=self._ncpu) |
| 255 | + # Retrieve target variables. |
| 256 | + analyze_ast = cgpm_analyze.parse.parse(program) |
| 257 | + variables = retrieve_analyze_variables(analyze_ast) |
| 258 | + varnos = [core.bayesdb_variable_number(bdb, population_id, v) |
| 259 | + for v in variables] |
| 260 | + |
| 261 | + # Run transition. |
| 262 | + engine = self._engine(bdb, generator_id) |
| 263 | + engine.transition( |
| 264 | + N=iterations, S=max_seconds, cols=varnos, multithread=self._ncpu) |
234 | 265 |
|
235 | 266 | # Serialize the engine. |
236 | 267 | engine_json = json_dumps(engine.to_metadata()) |
@@ -323,7 +354,7 @@ def simulate_joint(self, bdb, generator_id, targets, constraints, modelno, |
323 | 354 | cgpm_query = [colno for _r, colno in targets] |
324 | 355 | cgpm_evidence = { |
325 | 356 | colno: self._to_numeric(bdb, generator_id, colno, value) |
326 | | - for colno, value in constraints |
| 357 | + for _r, colno, value in constraints |
327 | 358 | } |
328 | 359 | engine = self._engine(bdb, generator_id) |
329 | 360 | samples = engine.simulate( |
@@ -586,6 +617,7 @@ def _create_schema(bdb, generator_id, schema_ast): |
586 | 617 | # State. |
587 | 618 | variables = [] |
588 | 619 | categoricals = {} |
| 620 | + declared_latents = [] |
589 | 621 | cgpm_composition = [] |
590 | 622 | modelled = set() |
591 | 623 | subsample = None |
@@ -634,6 +666,18 @@ def _retrieve_stattype_dist_params(var): |
634 | 666 | variables.append([var, stattype, dist, params]) |
635 | 667 | modelled.add(var) |
636 | 668 |
|
| 669 | + elif isinstance(clause, cgpm_schema.parse.Latent): |
| 670 | + # Reject if the latent variable has already been declared. |
| 671 | + if any(l[0] == clause.name for l in declared_latents): |
| 672 | + duplicate.add(clause.name) |
| 673 | + |
| 674 | + # XXX FILL ME XXX |
| 675 | + |
| 676 | + # Register the latent variable name and stattype into bayesdb. |
| 677 | + # Do something related to the error checking data structures. |
| 678 | + |
| 679 | + declared_latents.append((clause.name, clause.stattype)) |
| 680 | + |
637 | 681 | elif isinstance(clause, cgpm_schema.parse.Foreign): |
638 | 682 | # Foreign model: some set of output variables is to be |
639 | 683 | # modelled by foreign logic, possibly conditional on some |
@@ -677,22 +721,21 @@ def _retrieve_stattype_dist_params(var): |
677 | 721 | _, dist, params = _retrieve_stattype_dist_params(var) |
678 | 722 | cctypes.append(dist) |
679 | 723 | ccargs.append(params) |
680 | | - else: |
681 | | - # Finally, add a cgpm_composition record. |
682 | | - cgpm_composition.append({ |
683 | | - 'name': name, |
684 | | - 'inputs': inputs, |
685 | | - 'outputs': outputs, |
686 | | - 'kwds': kwds, |
687 | | - }) |
| 724 | + # Finally, add a cgpm_composition record. |
| 725 | + cgpm_composition.append({ |
| 726 | + 'name': name, |
| 727 | + 'inputs': inputs, |
| 728 | + 'outputs': outputs, |
| 729 | + 'kwds': kwds, |
| 730 | + }) |
688 | 731 |
|
689 | 732 | elif isinstance(clause, cgpm_schema.parse.Subsample): |
690 | 733 | if subsample is not None: |
691 | 734 | raise BQLError(bdb, 'Duplicate subsample: %r' % (clause.n,)) |
692 | 735 | subsample = clause.n |
693 | 736 |
|
694 | 737 | else: |
695 | | - assert False |
| 738 | + raise BQLError(bdb, 'Unknown clause: %r' % (clause,)) |
696 | 739 |
|
697 | 740 | # Raise an exception if there were duplicates or unknown |
698 | 741 | # variables. |
|
0 commit comments