Migrate cgpm_analyze and cgpm_schema into their own subdirectories.

F Saad · F Saad · commit 025ef7272f5b · 2016-07-07T11:28:26.000-04:00
Adds __init__.py files in the two new packages.
Updates setup.py to include the two packages, and grammar files.
Updates cgpm_metamodel to correctly import cgpm_schema.parse
Updates schema_schema.parse and cgpm_analyze.parse to reference grammar.py
diff --git a/setup.py b/setup.py
@@ -186,7 +186,8 @@ def run_tests(self):
 lemonade = 'external/lemonade/dist'
 grammars = [
     'src/grammar.y',
-    'src/metamodels/cgpm_grammar.y',
+    'src/metamodels/cgpm_analyze/grammar.y',
+    'src/metamodels/cgpm_schema/grammar.y',
 ]
 
 setup(
@@ -213,6 +214,8 @@ def run_tests(self):
     packages=[
         'bayeslite',
         'bayeslite.metamodels',
+        'bayeslite.metamodels.cgpm_schema',
+        'bayeslite.metamodels.cgpm_analyze',
         'bayeslite.plex',
         'bayeslite.shell',
         'bayeslite.weakprng',
diff --git a/src/metamodels/cgpm_analyze/__init__.py b/src/metamodels/cgpm_analyze/__init__.py
@@ -0,0 +1,15 @@
+# -*- coding: utf-8 -*-
+
+#   Copyright (c) 2010-2016, MIT Probabilistic Computing Project
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
diff --git a/src/metamodels/cgpm_analyze/parse.py b/src/metamodels/cgpm_analyze/parse.py
@@ -19,29 +19,29 @@
 from bayeslite.exception import BQLParseError
 from bayeslite.util import casefold
 
-import analyze_grammar
+import grammar
 
 '''
-grep -o 'K_[A-Z][A-Z0-9_]*' < analyze_grammar.y | sort -u | awk '
+grep -o 'K_[A-Z][A-Z0-9_]*' < grammar.y | sort -u | awk '
 {
     sub("^K_", "", $1);
-    printf("    '\''%s'\'': analyze_grammar.K_%s,\n", tolower($1), $1);
+    printf("    '\''%s'\'': grammar.K_%s,\n", tolower($1), $1);
 }'
 '''
 
 KEYWORDS = {
-    'skip': analyze_grammar.K_SKIP,
-    'variables': analyze_grammar.K_VARIABLES,
+    'skip': grammar.K_SKIP,
+    'variables': grammar.K_VARIABLES,
 }
 
 PUNCTUATION = {
-    ',': analyze_grammar.T_COMMA,
-    ';': analyze_grammar.T_SEMI,
+    ',': grammar.T_COMMA,
+    ';': grammar.T_SEMI,
 }
 
 def parse(tokens):
     semantics = CGpmAnalyzeSemantics()
-    parser = analyze_grammar.Parser(semantics)
+    parser = grammar.Parser(semantics)
     for token in tokenize(tokens):
         semantics.context.append(token)
         if len(semantics.context) > 10:
@@ -61,9 +61,9 @@ def tokenize(tokens):
             elif token in PUNCTUATION:
                 yield PUNCTUATION[token], token
             else:               # XXX check for alphanumeric/_
-                yield analyze_grammar.L_NAME, token
+                yield grammar.L_NAME, token
         elif isinstance(token, (int, float)):
-            yield analyze_grammar.L_NUMBER, token
+            yield grammar.L_NUMBER, token
         else:
             raise IOError('Invalid token: %r' % (token,))
     yield 0, ''                 # EOF
@@ -108,44 +108,3 @@ def p_column_name_n(self, name):            return name
 Skip = namedtuple('Skip', [
     'vars',
 ])
-
-if __name__ == '__main__':
-    tokens = [
-        'SKIPS', 'a', ',', 'b', ';',        # XXX Why does this not raise?
-        'VARIABLES', 'a', ',', 'b', ';',
-        'SKIP', 'a', ';'
-    ]
-    print parse(tokens)
-    # [None, Variables(vars=['a', 'b']), Skip(vars=['a'])]
-
-    tokens = [
-        'VARIABLES', 'a', ',', 'b', ';',
-        'SKIP', 'a',    ';',
-        'SKIPS', 'a', ',', 'b', ';'
-    ]
-    print parse(tokens)
-    # [Variables(vars=['a', 'b']), Skip(vars=['a'])]
-
-    tokens = [
-        'VARIABLES', 'a', ',', 'b', ';',
-        'SKIP', 'a', ',', ';',
-        'SKIPS', 'a', ',', 'b', ';'
-    ]
-    print parse(tokens)
-    # [Variables(vars=['a', 'b']), Skip(vars=['a', 'SKIPS'])]
-
-    tokens = [
-        'VARIABLES', 'a', ',', 'b', ';',
-        'SKIP', 'a', ',', ';',
-        'SKIPS', ',', 'a', ',', 'b', ';'
-    ]
-    print parse(tokens)
-    # [Variables(vars=['a', 'b']), Skip(vars=['a', 'SKIPS', 'a', 'b'])]
-
-    tokens = [
-        'VARIABLES', 'a', ',', 'b', ';',
-        'SKIP', 'a', ',', ';',
-        'SKIP', 'a', ',', 'b', ';'
-    ]
-    print parse(tokens)
-    # [Variables(vars=['a', 'b']), Skip(vars=['a', 'a', 'b'])]
diff --git a/src/metamodels/cgpm_metamodel.py b/src/metamodels/cgpm_metamodel.py
@@ -65,7 +65,7 @@
 from bayeslite.stats import arithmetic_mean
 from bayeslite.util import casefold
 
-import cgpm_parse
+import cgpm_schema.parse
 
 CGPM_SCHEMA_1 = '''
 INSERT INTO bayesdb_metamodel (name, version) VALUES ('cgpm', 1);
@@ -119,7 +119,7 @@ def register(self, bdb):
                     ' with unknown schema version: %d' % (version,))
 
     def create_generator(self, bdb, generator_id, schema_tokens):
-        schema_ast = cgpm_parse.parse(schema_tokens)
+        schema_ast = cgpm_schema.parse.parse(schema_tokens)
         schema = _create_schema(bdb, generator_id, schema_ast)
 
         # Store the schema.
@@ -636,7 +636,7 @@ def _retrieve_stattype_dist_params(var):
     # Process each clause one by one.
     for clause in schema_ast:
 
-        if isinstance(clause, cgpm_parse.Basic):
+        if isinstance(clause, cgpm_schema.parse.Basic):
             # Basic Crosscat component model: one variable to be put
             # into Crosscat views.
             var = clause.var
@@ -665,7 +665,7 @@ def _retrieve_stattype_dist_params(var):
             variables.append([var, stattype, dist, params])
             modelled.add(var)
 
-        elif isinstance(clause, cgpm_parse.Foreign):
+        elif isinstance(clause, cgpm_schema.parse.Foreign):
             # Foreign model: some set of output variables is to be
             # modelled by foreign logic, possibly conditional on some
             # set of input variables.
@@ -717,7 +717,7 @@ def _retrieve_stattype_dist_params(var):
                         'kwds': kwds,
                     })
 
-        elif isinstance(clause, cgpm_parse.Subsample):
+        elif isinstance(clause, cgpm_schema.parse.Subsample):
             if subsample is not None:
                 raise BQLError(bdb, 'Duplicate subsample: %r' % (clause.n,))
             subsample = clause.n
diff --git a/src/metamodels/cgpm_schema/__init__.py b/src/metamodels/cgpm_schema/__init__.py
@@ -0,0 +1,15 @@
+# -*- coding: utf-8 -*-
+
+#   Copyright (c) 2010-2016, MIT Probabilistic Computing Project
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
diff --git a/src/metamodels/cgpm_schema/grammar.y b/src/metamodels/cgpm_schema/grammar.y
diff --git a/src/metamodels/cgpm_schema/parse.py b/src/metamodels/cgpm_schema/parse.py
@@ -19,33 +19,33 @@
 from bayeslite.exception import BQLParseError
 from bayeslite.util import casefold
 
-import cgpm_grammar
+import grammar
 
 '''
-grep -o 'K_[A-Z][A-Z0-9_]*' < cgpm_grammar.y | sort -u | awk '
+grep -o 'K_[A-Z][A-Z0-9_]*' < grammar.y | sort -u | awk '
 {
     sub("^K_", "", $1);
-    printf("    '\''%s'\'': cgpm_grammar.K_%s,\n", tolower($1), $1);
+    printf("    '\''%s'\'': grammar.K_%s,\n", tolower($1), $1);
 }'
 '''
 
 KEYWORDS = {
-    'given': cgpm_grammar.K_GIVEN,
-    'model': cgpm_grammar.K_MODEL,
-    'subsample': cgpm_grammar.K_SUBSAMPLE,
-    'using': cgpm_grammar.K_USING,
+    'given': grammar.K_GIVEN,
+    'model': grammar.K_MODEL,
+    'subsample': grammar.K_SUBSAMPLE,
+    'using': grammar.K_USING,
 }
 
 PUNCTUATION = {
-    '(': cgpm_grammar.T_LROUND,
-    ')': cgpm_grammar.T_RROUND,
-    ',': cgpm_grammar.T_COMMA,
-    '=': cgpm_grammar.T_EQ,
+    '(': grammar.T_LROUND,
+    ')': grammar.T_RROUND,
+    ',': grammar.T_COMMA,
+    '=': grammar.T_EQ,
 }
 
 def parse(tokenses):
     semantics = CGPM_Semantics()
-    parser = cgpm_grammar.Parser(semantics)
+    parser = grammar.Parser(semantics)
     for token in tokenize(tokenses):
         semantics.context.append(token)
         if len(semantics.context) > 10:
@@ -64,9 +64,9 @@ def tokenize(tokenses):
             elif token in PUNCTUATION:
                 yield PUNCTUATION[token], token
             else:               # XXX check for alphanumeric/_
-                yield cgpm_grammar.L_NAME, token
+                yield grammar.L_NAME, token
         elif isinstance(token, (int, float)):
-            yield cgpm_grammar.L_NUMBER, token
+            yield grammar.L_NUMBER, token
         else:
             raise IOError('Invalid token: %r' % (token,))
     yield 0, ''                 # EOF