kensho-technologies · chewselene · Apr 7, 2021 · Apr 16, 2021 · Apr 16, 2021 · Apr 16, 2021
diff --git a/Pipfile b/Pipfile
@@ -40,10 +40,11 @@ sphinx = ">=1.8,<2"
 
 [packages]  # Make sure to keep in sync with setup.py requirements.
 ciso8601 = ">=2.1.3,<3"
+dataclasses-json = ">=0.5.2,<0.6"
 funcy = ">=1.7.3,<2"
 graphql-core = ">=3.1.2,<3.2"  # minor versions sometimes contain breaking changes
 six = ">=1.10.0"
-sqlalchemy = ">=1.3.0,<2"
+sqlalchemy = ">=1.3.0,<1.4"
 
 # The below is necessary to make a few pylint passes work properly, since pylint expects to be able
 # to run "import graphql_compiler" in the environment in which it runs.

diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/graphql_compiler/compiler/compiler_frontend.py b/graphql_compiler/compiler/compiler_frontend.py
@@ -133,7 +133,7 @@
 from .validation import validate_schema_and_query_ast
 
 
-@dataclass(frozen=True)
+@dataclass(init=True, repr=True, eq=False, frozen=True)
 class OutputMetadata:
     """Metadata about a query's outputs."""
 

diff --git a/graphql_compiler/query_planning_and_execution/__init__.py b/graphql_compiler/query_planning_and_execution/__init__.py
@@ -0,0 +1 @@
+# Copyright 2021-present Kensho Technologies, LLC.
diff --git a/.../schema_transformation/make_query_plan.py → ...planning_and_execution/make_query_plan.py b/.../schema_transformation/make_query_plan.py → ...planning_and_execution/make_query_plan.py
@@ -1,7 +1,8 @@
 # Copyright 2019-present Kensho Technologies, LLC.
 from copy import copy
 from dataclasses import dataclass
-from typing import FrozenSet, List, Optional, Tuple, cast
+from typing import FrozenSet, List, Optional, Tuple, Union, cast
+from uuid import uuid4
 
 from graphql import print_ast
 from graphql.language.ast import (
@@ -17,11 +18,25 @@
     StringValueNode,
 )
 from graphql.pyutils import FrozenList
+from sqlalchemy.dialects.mssql.base import MSDialect
 
 from ..ast_manipulation import get_only_query_definition
+from ..compiler.common import (
+    CYPHER_LANGUAGE,
+    GREMLIN_LANGUAGE,
+    MATCH_LANGUAGE,
+    compile_graphql_to_cypher,
+    compile_graphql_to_gremlin,
+    compile_graphql_to_match,
+    compile_graphql_to_sql,
+)
+from ..compiler.sqlalchemy_extensions import print_sqlalchemy_query_string
 from ..exceptions import GraphQLValidationError
+from ..global_utils import QueryStringWithParameters
 from ..schema import FilterDirective, OutputDirective
-from .split_query import AstType, SubQueryNode
+from ..schema.schema_info import CommonSchemaInfo, SQLAlchemySchemaInfo
+from ..schema_transformation.split_query import AstType, SubQueryNode
+from .typedefs import ProviderMetadata, QueryPlan, SimpleExecute
 
 
 @dataclass
@@ -66,44 +81,65 @@ class QueryPlanDescriptor:
     output_join_descriptors: List[OutputJoinDescriptor]
 
 
-def make_query_plan(
-    root_sub_query_node: SubQueryNode, intermediate_output_names: FrozenSet[str]
-) -> QueryPlanDescriptor:
-    """Return a QueryPlanDescriptor, whose query ASTs have @filters added.
-
-    For each parent of parent and child SubQueryNodes, a new @filter directive will be added
-    in the child AST. It will be added on the field whose @output directive has the out_name
-    equal to the child's out name as specified in the QueryConnection. The newly added @filter
-    will be a 'in_collection' type filter, and the name of the local variable is guaranteed to
-    be the same as the out_name of the @output on the parent.
-
-    ASTs contained in the input node and its children nodes will not be modified.
+def _make_simple_execute_node(
+    query_and_parameters: QueryStringWithParameters,
+    schema_info: Union[CommonSchemaInfo, SQLAlchemySchemaInfo],
+    provider_id: str,
+    backend_type: Optional[str],
+) -> SimpleExecute:
+    """Generate a SimpleExecuteNode.
 
     Args:
-        root_sub_query_node: representing the base of a query split into pieces
-                             that we want to turn into a query plan.
-        intermediate_output_names: names of outputs to be removed at the end.
+        query_and_parameters: the query and parameters for which to create a SimpleExecute.
+        schema_info: schema information to use for query compilation.
+        provider_id: the identifier of the provider to be queried.
+        backend_type: the backend type. Note: this is inferred from the SQLAlchemySchemaInfo's
+                      dialect for SQL backends.
 
     Returns:
-        QueryPlanDescriptor containing a tree of SubQueryPlans that wrap around each individual
-        query AST, the set of intermediate output names that are to be removed at the end, and
-        information on which outputs are to be connect to which in what manner.
-    """
-    output_join_descriptors: List[OutputJoinDescriptor] = []
+        SimpleExecute with all necessary information to execute the given query_and_parameters.
 
-    root_sub_query_plan = SubQueryPlan(
-        query_ast=root_sub_query_node.query_ast,
-        schema_id=root_sub_query_node.schema_id,
-        parent_query_plan=None,
-        child_query_plans=[],
-    )
-
-    _make_query_plan_recursive(root_sub_query_node, root_sub_query_plan, output_join_descriptors)
+    """
+    if isinstance(schema_info, SQLAlchemySchemaInfo):
+        # Compiled SQL query.
+        compilation_result = compile_graphql_to_sql(schema_info, query_and_parameters.query_string)
+        query = print_sqlalchemy_query_string(compilation_result.query, schema_info.dialect)
+
+        provider_metadata = ProviderMetadata(
+            backend_type=schema_info.dialect.name,
+            requires_fold_postprocessing=isinstance(schema_info.dialect, MSDialect),
+        )
+    else:
+        if backend_type == CYPHER_LANGUAGE:
+            compilation_result = compile_graphql_to_cypher(
+                schema_info, query_and_parameters.query_string
+            )
+        elif backend_type == GREMLIN_LANGUAGE:
+            compilation_result = compile_graphql_to_gremlin(
+                schema_info, query_and_parameters.query_string
+            )
+        elif backend_type == MATCH_LANGUAGE:
+            compilation_result = compile_graphql_to_match(
+                schema_info, query_and_parameters.query_string
+            )
+        # TODO(selene): add InterpreterAdapter based backends
+        else:
+            raise AssertionError(f"Unknown non-SQL backend_type {backend_type}.")
+        # Extract the query and create provider_metadata (process is the same across Cypher,
+        # Gremlin, and Match backends)
+        query = compilation_result.query
+        provider_metadata = ProviderMetadata(
+            backend_type=backend_type, requires_fold_postprocessing=False
+        )
 
-    return QueryPlanDescriptor(
-        root_sub_query_plan=root_sub_query_plan,
-        intermediate_output_names=intermediate_output_names,
-        output_join_descriptors=output_join_descriptors,
+    return SimpleExecute(
+        str(uuid4()),
+        provider_id,
+        provider_metadata,
+        query,
+        query_and_parameters.parameters,
+        compilation_result.output_metadata,
+        compilation_result.input_metadata,
     )
 
 
@@ -291,6 +327,118 @@ def _get_in_collection_filter_directive(input_filter_name: str) -> DirectiveNode
     )
 
 
+def _get_plan_and_depth_in_dfs_order(query_plan: SubQueryPlan) -> List[Tuple[SubQueryPlan, int]]:
+    """Return a list of topologically sorted (query plan, depth) tuples."""
+
+    def _get_plan_and_depth_in_dfs_order_helper(
+        query_plan: SubQueryPlan, depth: int
+    ) -> List[Tuple[SubQueryPlan, int]]:
+        plan_and_depth_in_dfs_order = [(query_plan, depth)]
+        for child_query_plan in query_plan.child_query_plans:
+            plan_and_depth_in_dfs_order.extend(
+                _get_plan_and_depth_in_dfs_order_helper(child_query_plan, depth + 1)
+            )
+        return plan_and_depth_in_dfs_order
+
+    return _get_plan_and_depth_in_dfs_order_helper(query_plan, 0)
+
+
+######
+# Public API
+######
+
+
+def generate_query_plan(
+    query_and_parameters: QueryStringWithParameters,
+    provider_id: str,
+    *,
+    desired_page_size: Optional[int] = 5000,
+) -> QueryPlan:
+    """Generate a query plan for query_and_parameters targeting data source 'provider_id'.
+
+    Note: this functionality is intended to replace make_query_plan below. TODO(selene)
+
+    Args:
+        query_and_parameters: the query and parameters for which to create a QueryPlan.
+        provider_id: the identifier of the provider to be queried.
+        desired_page_size: target page size. None indicates that pagination should be disabled.
+                           The benefit of pagination with this sync executor is that:
+                           - You can receive results as they arrive (see execute_query_plan)
+                           - The load on the database is more controlled, and timeouts are
+                             less likely.
+                           The desired_page_size is fulfilled on a best-effort basis; setting
+                           a particular desired_page_size does not guarantee pages of precisely
+                           that size. Setting desired_page_size to a number less than 1000 is
+                           not recommended, since the server is unlikely to have sufficiently
+                           granular statistics to accomplish that, and the client is more likely
+                           to fetch many entirely empty pages. Note: this is not currently used,
+                           but will be in the near future! TODO(selene): implement ParallelPaginated
+
+    Returns:
+        QueryPlan for the given query_and_parameters against the data source specified
+        by the provider_id.
+
+    """
+    # TODO(selene): look up schema_info and backend_type by provider_id
+    schema_info = None
+    backend_type = None
+    query_plan_node = _make_simple_execute_node(
+        query_and_parameters, schema_info, provider_id, backend_type
+    )
+
+    version = 1
+    return QueryPlan(
+        version,
+        provider_id,
+        query_and_parameters.query_string,
+        query_and_parameters.parameters,
+        desired_page_size,
+        query_plan_node.output_metadata,
+        query_plan_node,
+    )
+
+
+def make_query_plan(
+    root_sub_query_node: SubQueryNode, intermediate_output_names: FrozenSet[str]
+) -> QueryPlanDescriptor:
+    """Return a QueryPlanDescriptor, whose query ASTs have @filters added.
+
+    For each parent of parent and child SubQueryNodes, a new @filter directive will be added
+    in the child AST. It will be added on the field whose @output directive has the out_name
+    equal to the child's out name as specified in the QueryConnection. The newly added @filter
+    will be a 'in_collection' type filter, and the name of the local variable is guaranteed to
+    be the same as the out_name of the @output on the parent.
+
+    ASTs contained in the input node and its children nodes will not be modified.
+
+    Args:
+        root_sub_query_node: representing the base of a query split into pieces
+                             that we want to turn into a query plan.
+        intermediate_output_names: names of outputs to be removed at the end.
+
+    Returns:
+        QueryPlanDescriptor containing a tree of SubQueryPlans that wrap around each individual
+        query AST, the set of intermediate output names that are to be removed at the end, and
+        information on which outputs are to be connect to which in what manner.
+    """
+    output_join_descriptors: List[OutputJoinDescriptor] = []
+
+    root_sub_query_plan = SubQueryPlan(
+        query_ast=root_sub_query_node.query_ast,
+        schema_id=root_sub_query_node.schema_id,
+        parent_query_plan=None,
+        child_query_plans=[],
+    )
+
+    _make_query_plan_recursive(root_sub_query_node, root_sub_query_plan, output_join_descriptors)
+
+    return QueryPlanDescriptor(
+        root_sub_query_plan=root_sub_query_plan,
+        intermediate_output_names=intermediate_output_names,
+        output_join_descriptors=output_join_descriptors,
+    )
+
+
 def print_query_plan(query_plan_descriptor: QueryPlanDescriptor, indentation_depth: int = 4) -> str:
     """Return a string describing query plan."""
     query_plan_strings = [""]
@@ -311,17 +459,3 @@ def print_query_plan(query_plan_descriptor: QueryPlanDescriptor, indentation_dep
     query_plan_strings.append(str(query_plan_descriptor.intermediate_output_names) + "\n")
 
     return "".join(query_plan_strings)
-
-
-def _get_plan_and_depth_in_dfs_order(query_plan: SubQueryPlan) -> List[Tuple[SubQueryPlan, int]]:
-    """Return a list of topologically sorted (query plan, depth) tuples."""
-
-    def _get_plan_and_depth_in_dfs_order_helper(query_plan, depth):
-        plan_and_depth_in_dfs_order = [(query_plan, depth)]
-        for child_query_plan in query_plan.child_query_plans:
-            plan_and_depth_in_dfs_order.extend(
-                _get_plan_and_depth_in_dfs_order_helper(child_query_plan, depth + 1)
-            )
-        return plan_and_depth_in_dfs_order
-
-    return _get_plan_and_depth_in_dfs_order_helper(query_plan, 0)
diff --git a/graphql_compiler/query_planning_and_execution/sync_query_execution.py b/graphql_compiler/query_planning_and_execution/sync_query_execution.py
@@ -0,0 +1,50 @@
+# Copyright 2021-present Kensho Technologies, LLC.
+from typing import Any, Callable, ContextManager, Dict, Iterable, Mapping
+
+from ..post_processing.sql_post_processing import post_process_mssql_folds
+from ..query_formatting.common import deserialize_multiple_arguments
+from .typedefs import QueryExecutionFunc, QueryPlan, SimpleExecute
+
+
+def _execute_simple_execute_node(
+    provider_client_makers: Dict[str, Callable[[], ContextManager[QueryExecutionFunc]]],
+    query_plan_node: SimpleExecute,
+) -> Iterable[Mapping[str, Any]]:
+    """Execute a SimpleExecute."""
+    provider_client_maker = provider_client_makers[query_plan_node.provider_id]
+    arguments = deserialize_multiple_arguments(
+        query_plan_node.arguments, query_plan_node.input_metadata
+    )
+    requires_postprocessing = query_plan_node.provider_metadata.requires_fold_postprocessing
+
+    with provider_client_maker() as query_client:
+        result = query_client(query_plan_node.query, query_plan_node.input_metadata, arguments)
+
+        # Apply post processing for MSSQL folds if applicable.
+        if requires_postprocessing:
+            list_dict_result = [dict(entry) for entry in result]
+            post_process_mssql_folds(list_dict_result, query_plan_node.output_metadata)
+            return list_dict_result
+
+        # Otherwise, return result as is.
+        return result
+
+
+######
+# Public API
+######
+
+
+def execute_query_plan(
+    provider_client_makers: Dict[str, Callable[[], ContextManager[QueryExecutionFunc]]],
+    query_plan: QueryPlan,
+) -> Iterable[Mapping[str, Any]]:
+    """Execute a QueryPlan."""
+    # TODO(selene): figure out if we want some sort of class to contain provider_client_makers
+    if isinstance(query_plan.plan_root_node, SimpleExecute):
+        return _execute_simple_execute_node(provider_client_makers, query_plan.plan_root_node)
+    else:
+        raise NotImplementedError(
+            f"Received plan_root_node with unsupported type "
+            f"{type(query_plan.plan_root_node)}: {query_plan.plan_root_node}"
+        )
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		# Copyright 2021-present Kensho Technologies, LLC.
chewselene marked this conversation as resolved. Show resolved Hide resolved