Adding initial changes for aggregation example in quickstart code.

patryk-wyzgowski · patryk-wyzgowski · commit db3b53c48eda · 2024-06-20T10:51:27.000+02:00
diff --git a/docs/quickstart/quickstart_code.py b/docs/quickstart/quickstart_code.py
@@ -1,4 +1,6 @@
 # pylint: disable=missing-return-doc, missing-param-doc, missing-function-docstring
+from typing import Union, Tuple, Any
+
 import dbally
 import asyncio
 
@@ -54,14 +56,20 @@ def from_country(self, country: str) -> sqlalchemy.ColumnElement:
         """
         return Candidate.country == country
 
+    @decorators.view_aggregation()
+    def group_by_university(self, aggregation:str): # -> Union[Select[Tuple[Any, Any]], Select]:  # pylint: disable=W0602, C0116, W9011
+        return sqlalchemy.select(Candidate.university, sqlalchemy.func.count(Candidate.university).label("count")) \
+            .group_by(Candidate.university)
+
 
 async def main():
     llm = LiteLLM(model_name="gpt-3.5-turbo")
 
     collection = dbally.create_collection("recruitment", llm, event_handlers=[CLIEventHandler()])
     collection.add(CandidateView, lambda: CandidateView(engine))
 
-    result = await collection.ask("Find me French candidates suitable for a senior data scientist position.")
+    # result = await collection.ask("Find me French candidates suitable for a senior data scientist position.")
+    result = await collection.ask("Could you count the candidates university-wise and present the rows?")
 
     print(f"The generated SQL query is: {result.context.get('sql')}")
     print()
diff --git a/src/dbally/iql_generator/iql_generator.py b/src/dbally/iql_generator/iql_generator.py
@@ -38,11 +38,12 @@ def __init__(
         """
         self._llm = llm
         self._prompt_template = prompt_template or copy.deepcopy(default_iql_template)
-        self._promptify_view = promptify_view or _promptify_filters
+        self._promptify_view = promptify_view or _promptify_filters or _promptify_aggregations
 
     async def generate_iql(
         self,
         filters: List[ExposedFunction],
+        aggregations: List[ExposedFunction],
         question: str,
         event_tracker: EventTracker,
         conversation: Optional[IQLPromptTemplate] = None,
@@ -62,12 +63,14 @@ async def generate_iql(
             IQL - iql generated based on the user question
         """
         filters_for_prompt = self._promptify_view(filters)
+        aggregations_for_prompt = self._promptify_view(aggregations)
 
         template = conversation or self._prompt_template
 
         llm_response = await self._llm.generate_text(
             template=template,
-            fmt={"filters": filters_for_prompt, "question": question},
+            fmt={"filters": filters_for_prompt, "question": question,
+                 "aggregation": aggregations_for_prompt},
             event_tracker=event_tracker,
             options=llm_options,
         )
@@ -114,3 +117,19 @@ def _promptify_filters(
     """
     filters_for_prompt = "\n".join([str(filter) for filter in filters])
     return filters_for_prompt
+
+
+def _promptify_aggregations(
+    aggregations: List[ExposedFunction],
+) -> str:
+    """
+    Formats filters for prompt
+
+    Args:
+        filters: list of filters exposed by the view
+
+    Returns:
+        filters_for_prompt: filters formatted for prompt
+    """
+    aggregations_for_prompt = "\n".join([str(aggregation) for aggregation in aggregations])
+    return aggregations_for_prompt
diff --git a/src/dbally/nl_responder/nl_responder_prompt_template.py b/src/dbally/nl_responder/nl_responder_prompt_template.py
@@ -24,7 +24,7 @@ def __init__(
         """
 
         super().__init__(chat, response_format, llm_response_parser)
-        self.chat = check_prompt_variables(chat, {"rows", "question"})
+        self.chat = check_prompt_variables(chat, {"rows", "question", "aggregation"})
 
 
 default_nl_responder_template = NLResponderPromptTemplate(
@@ -34,7 +34,7 @@ def __init__(
             "content": "You are a helpful assistant that helps answer the user's questions "
             "based on the table provided. You MUST use the table to answer the question. "
             "You are very intelligent and obedient.\n"
-            "The table ALWAYS contains full answer to a question.\n"
+            "The table ALWAYS contains full answer to a question including necessary {aggregation}.\n"
             "Answer the question in a way that is easy to understand and informative.\n"
             "DON'T MENTION using a table in your answer.",
         },
diff --git a/src/dbally/nl_responder/query_explainer_prompt_template.py b/src/dbally/nl_responder/query_explainer_prompt_template.py
@@ -21,7 +21,7 @@ def __init__(
         llm_response_parser: Callable = lambda x: x,
     ) -> None:
         super().__init__(chat, response_format, llm_response_parser)
-        self.chat = check_prompt_variables(chat, {"question", "query", "number_of_results"})
+        self.chat = check_prompt_variables(chat, {"question", "query", "aggregation", "number_of_results"})
 
 
 default_query_explainer_template = QueryExplainerPromptTemplate(
@@ -34,14 +34,14 @@ def __init__(
             "Your task is to provide natural language description of the table used by the logical query "
             "to the database.\n"
             "Describe the table in a way that is short and informative.\n"
-            "Make your answer as short as possible, start it by infroming the user that the underlying "
+            "Make your answer as short as possible, start it by informing the user that the underlying "
             "data is too long to print and then describe the table based on the question and the query.\n"
             "DON'T MENTION using a query in your answer.\n",
         },
         {
             "role": "user",
             "content": "The query below represents the answer to a question: {question}.\n"
-            "Describe the table generated using this query: {query}.\n"
+            "Describe the table generated using this query: {query} which applies {aggregation}.\n"
             "Number of results to this query: {number_of_results}.\n",
         },
     )
diff --git a/src/dbally/view_selection/llm_view_selector.py b/src/dbally/view_selection/llm_view_selector.py
@@ -35,7 +35,7 @@ def __init__(
         """
         self._llm = llm
         self._prompt_template = prompt_template or copy.deepcopy(default_view_selector_template)
-        self._promptify_views = promptify_views or _promptify_views
+        self._promptify_views = promptify_views or _promptify_views or _promptify_aggregations
 
     async def select_view(
         self,
@@ -81,3 +81,17 @@ def _promptify_views(views: Dict[str, str]) -> str:
     """
 
     return "\n".join([f"{name}: {description}" for name, description in views.items()])
+
+
+def _promptify_aggregations(views: Dict[str, str]) -> str:
+    """
+    Formats views for aggregation
+
+    Args:
+        views: dictionary of available view names with corresponding descriptions.
+
+    Returns:
+        views_for_prompt: views formatted for prompt
+    """
+
+    return "\n".join([f"{name}: {description}" for name, description in views.items()])
diff --git a/src/dbally/views/decorators.py b/src/dbally/views/decorators.py
@@ -14,3 +14,17 @@ def wrapped(func: typing.Callable) -> typing.Callable:  # pylint: disable=missin
         return func
 
     return wrapped
+
+def view_aggregation() -> typing.Callable:
+    """
+    Decorator for marking a method as an aggregation
+
+    Returns:
+        Function that returns the decorated method
+    """
+
+    def wrapped(func: typing.Callable) -> typing.Callable:  # pylint: disable=missing-return-doc
+        func._methodDecorator = view_aggregation  # type:ignore # pylint: disable=protected-access
+        return func
+
+    return wrapped
diff --git a/src/dbally/views/sqlalchemy_base.py b/src/dbally/views/sqlalchemy_base.py
@@ -64,6 +64,26 @@ async def _build_filter_bool_op(self, bool_op: syntax.BoolOp) -> sqlalchemy.Colu
             return alchemy_op(await self._build_filter_node(bool_op.child))
         raise ValueError(f"BoolOp {bool_op} has no children")
 
+    async def _build_aggregation_node(self, node: syntax.Node) -> sqlalchemy.ColumnElement:
+        """
+        Converts a filter node from the IQLQuery to a SQLAlchemy expression.
+        """
+        if isinstance(node, syntax.BoolOp):
+            return await self._build_filter_bool_op(node)
+        if isinstance(node, syntax.FunctionCall):
+            return await self.call_filter_method(node)
+
+        raise ValueError(f"Unsupported grammar: {node}")
+
+    async def apply_aggregation(self, aggregation: IQLQuery) -> None:
+        """
+        Applies the chosen aggregation to the view.
+
+        Args:
+            aggregation: IQLQuery object representing the aggregation to apply
+        """
+        self._select = self._select.where(await self._build_filter_node(aggregation.root))
+
     def execute(self, dry_run: bool = False) -> ViewExecutionResult:
         """
         Executes the generated SQL query and returns the results.
diff --git a/src/dbally/views/structured.py b/src/dbally/views/structured.py
@@ -58,6 +58,7 @@ async def ask(
         """
         iql_generator = self.get_iql_generator(llm)
         filter_list = self.list_filters()
+        aggregation_list = self.list_aggregations()
 
         iql_filters, conversation = await iql_generator.generate_iql(
             question=query,
@@ -104,6 +105,23 @@ async def apply_filters(self, filters: IQLQuery) -> None:
             filters: [IQLQuery](../../concepts/iql.md) object representing the filters to apply
         """
 
+    @abc.abstractmethod
+    def list_aggregations(self) -> List[ExposedFunction]:
+        """
+
+        Returns:
+            Aggregations defined inside the View.
+        """
+
+    @abc.abstractmethod
+    async def apply_aggregations(self, filters: IQLQuery) -> None:
+        """
+        Applies the chosen filters to the view.
+
+        Args:
+            filters: [IQLQuery](../../concepts/iql.md) object representing the filters to apply
+        """
+
     @abc.abstractmethod
     def execute(self, dry_run: bool = False) -> ViewExecutionResult:
         """