diff --git a/.changes/unreleased/Features-20251006-140352.yaml b/.changes/unreleased/Features-20251006-140352.yaml new file mode 100644 index 00000000000..a6b9d16582c --- /dev/null +++ b/.changes/unreleased/Features-20251006-140352.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Support partial parsing for function nodes +time: 2025-10-06T14:03:52.258104-05:00 +custom: + Author: QMalcolm + Issue: "12072" diff --git a/core/dbt/contracts/files.py b/core/dbt/contracts/files.py index d409c8e957a..17c5d18d519 100644 --- a/core/dbt/contracts/files.py +++ b/core/dbt/contracts/files.py @@ -161,6 +161,7 @@ class SourceFile(BaseSourceFile): docs: List[str] = field(default_factory=list) macros: List[str] = field(default_factory=list) env_vars: List[str] = field(default_factory=list) + functions: List[str] = field(default_factory=list) @classmethod def big_seed(cls, path: FilePath) -> "SourceFile": diff --git a/core/dbt/contracts/graph/manifest.py b/core/dbt/contracts/graph/manifest.py index 5db13d49a2d..a41de1763ce 100644 --- a/core/dbt/contracts/graph/manifest.py +++ b/core/dbt/contracts/graph/manifest.py @@ -1714,9 +1714,10 @@ def add_exposure(self, source_file: SchemaSourceFile, exposure: Exposure): self.exposures[exposure.unique_id] = exposure source_file.exposures.append(exposure.unique_id) - def add_function(self, function: FunctionNode): + def add_function(self, source_file: SourceFile, function: FunctionNode): _check_duplicates(function, self.functions) self.functions[function.unique_id] = function + source_file.functions.append(function.unique_id) def add_metric( self, source_file: SchemaSourceFile, metric: Metric, generated_from: Optional[str] = None diff --git a/core/dbt/parser/functions.py b/core/dbt/parser/functions.py index 229f9cd1ffd..a6906b7b658 100644 --- a/core/dbt/parser/functions.py +++ b/core/dbt/parser/functions.py @@ -1,4 +1,5 @@ from dbt.artifacts.resources.types import NodeType +from dbt.contracts.files import SourceFile from dbt.contracts.graph.nodes import FunctionNode, ManifestNode from dbt.parser.base import SimpleSQLParser from dbt.parser.search import FileBlock @@ -21,7 +22,9 @@ def get_compiled_path(cls, block: FileBlock): # overrides SimpleSQLParser.add_result_node def add_result_node(self, block: FileBlock, node: ManifestNode): assert isinstance(node, FunctionNode), "Got non FunctionNode in FunctionParser" + file = block.file + assert isinstance(file, SourceFile) if node.config.enabled: - self.manifest.add_function(node) + self.manifest.add_function(file, node) else: - self.manifest.add_disabled(block.file, node) + self.manifest.add_disabled(file, node) diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py index a1485af7dfa..c9a6510e9f6 100644 --- a/core/dbt/parser/manifest.py +++ b/core/dbt/parser/manifest.py @@ -536,6 +536,9 @@ def safe_update_project_parser_files_partially(self, project_parser_files: Dict) self.skip_parsing = self.partial_parser.skip_parsing() if self.skip_parsing: # nothing changed, so we don't need to generate project_parser_files + fire_event( + Note(msg="Nothing changed, skipping partial parsing."), level=EventLevel.DEBUG + ) self.manifest = self.saved_manifest # type: ignore[assignment] else: # create child_map and parent_map diff --git a/core/dbt/parser/partial.py b/core/dbt/parser/partial.py index 02aa05f602c..cd5b18fb422 100644 --- a/core/dbt/parser/partial.py +++ b/core/dbt/parser/partial.py @@ -58,6 +58,7 @@ "generate_schema_name", "generate_database_name", "generate_alias_name", + "function", ] @@ -295,6 +296,10 @@ def delete_from_saved(self, file_id): if saved_source_file.parse_file_type == ParseFileType.Fixture: self.delete_fixture_node(saved_source_file) + # functions + if saved_source_file.parse_file_type == ParseFileType.Function: + self.delete_function_node(saved_source_file) + fire_event(PartialParsingFile(operation="deleted", file_id=file_id)) # Updates for non-schema files @@ -310,6 +315,8 @@ def update_in_saved(self, file_id): self.update_doc_in_saved(new_source_file, old_source_file) elif new_source_file.parse_file_type == ParseFileType.Fixture: self.update_fixture_in_saved(new_source_file, old_source_file) + elif new_source_file.parse_file_type == ParseFileType.Function: + self.update_function_in_saved(new_source_file, old_source_file) else: raise Exception(f"Invalid parse_file_type in source_file {file_id}") fire_event(PartialParsingFile(operation="updated", file_id=file_id)) @@ -405,6 +412,15 @@ def update_fixture_in_saved(self, new_source_file, old_source_file): self.saved_files[new_source_file.file_id] = deepcopy(new_source_file) self.add_to_pp_files(new_source_file) + def update_function_in_saved( + self, new_source_file: SourceFile, old_source_file: SourceFile + ) -> None: + if self.already_scheduled_for_parsing(old_source_file): + return + self.delete_function_node(old_source_file) + self.saved_files[new_source_file.file_id] = deepcopy(new_source_file) + self.add_to_pp_files(new_source_file) + def remove_mssat_file(self, source_file: AnySourceFile): # nodes [unique_ids] -- SQL files # There should always be a node for a SQL file @@ -630,6 +646,31 @@ def delete_fixture_node(self, source_file): source_file.unit_tests.remove(unique_id) self.saved_manifest.files.pop(source_file.file_id) + def delete_function_node(self, source_file: SourceFile): + # There should always be a node for a Function file + if not isinstance(source_file, SourceFile) or not source_file.functions: + return + + # There can only be one node of a function + function_unique_id = source_file.functions[0] + + # Remove the function node from the saved manifest + function_node = self.saved_manifest.functions.pop(function_unique_id) + + # Remove the function node from the source file so that it's not viewed as a + # duplicate when it's re-added + source_file.nodes.remove(function_unique_id) + + # schedule function for parsing. + # Note: We _don't_ need to schedule referencing nodes for reparsing, because a change in a function changes nothing + # for the parsing/compilation of a node that references it. It only affects the referencer's execution in the data warehouse. + self._schedule_for_parsing( + "functions", + function_node, + function_node.name, + self.delete_schema_function, + ) + # Schema files ----------------------- # Changed schema files def change_schema_file(self, file_id): @@ -744,6 +785,7 @@ def handle_change(key: str, delete: Callable): handle_change("unit_tests", self.delete_schema_unit_test) handle_change("saved_queries", self.delete_schema_saved_query) handle_change("data_tests", self.delete_schema_data_test_patch) + handle_change("functions", self.delete_schema_function) def _handle_element_change( self, schema_file, saved_yaml_dict, new_yaml_dict, env_var_changes, dict_key: str, delete @@ -1080,6 +1122,16 @@ def delete_schema_unit_test(self, schema_file, unit_test_dict): schema_file.unit_tests.remove(unique_id) # No disabled unit tests yet + def delete_schema_function(self, schema_file: SchemaSourceFile, function_dict: dict): + function_name = function_dict["name"] + functions = schema_file.node_patches.copy() + for unique_id in functions: + if unique_id in self.saved_manifest.functions: + function = self.saved_manifest.functions[unique_id] + if function.name == function_name: + self.saved_manifest.functions.pop(unique_id) + schema_file.functions.remove(unique_id) + def get_schema_element(self, elem_list, elem_name): for element in elem_list: if "name" in element and element["name"] == elem_name: diff --git a/tests/functional/partial_parsing/fixtures.py b/tests/functional/partial_parsing/fixtures.py index 4923d803ff8..2a1aa11ad6f 100644 --- a/tests/functional/partial_parsing/fixtures.py +++ b/tests/functional/partial_parsing/fixtures.py @@ -1314,3 +1314,35 @@ - name: bar description: Lorem ipsum. """ + +my_func_sql = """ +value * 2 +""" + +my_func_yml = """ +functions: + - name: my_func + description: "Doubles an integer" + arguments: + - name: value + data_type: int + description: "An integer to be doubled" + returns: + data_type: int +""" + +updated_my_func_sql = """ +number * 2.0 +""" + +updated_my_func_yml = """ +functions: + - name: my_func + description: "Doubles a float" + arguments: + - name: number + data_type: float + description: "A float to be doubled" + returns: + data_type: float +""" diff --git a/tests/functional/partial_parsing/test_pp_functions.py b/tests/functional/partial_parsing/test_pp_functions.py new file mode 100644 index 00000000000..49e4b6e085e --- /dev/null +++ b/tests/functional/partial_parsing/test_pp_functions.py @@ -0,0 +1,71 @@ +import pytest + +from dbt.artifacts.resources import FunctionArgument, FunctionReturns +from dbt.contracts.graph.manifest import Manifest +from dbt.tests.util import run_dbt, write_file +from dbt_common.events.types import Note +from tests.functional.partial_parsing.fixtures import ( + my_func_sql, + my_func_yml, + updated_my_func_sql, + updated_my_func_yml, +) +from tests.utils import EventCatcher + + +class TestPartialParsingFunctions: + @pytest.fixture(scope="class") + def functions(self): + return { + "my_func.sql": my_func_sql, + "my_func.yml": my_func_yml, + } + + def test_pp_functions(self, project): + # initial run + manifest = run_dbt(["parse"]) + assert isinstance(manifest, Manifest) + assert len(manifest.functions) == 1 + function = manifest.functions["function.test.my_func"] + assert function.raw_code == "value * 2" + assert function.description == "Doubles an integer" + assert function.arguments == [ + FunctionArgument(name="value", data_type="int", description="An integer to be doubled") + ] + assert function.returns == FunctionReturns(data_type="int") + + # update sql + write_file(updated_my_func_sql, project.project_root, "functions", "my_func.sql") + manifest = run_dbt(["parse"]) + assert isinstance(manifest, Manifest) + assert len(manifest.functions) == 1 + function = manifest.functions["function.test.my_func"] + assert function.raw_code == "number * 2.0" + assert function.description == "Doubles an integer" + assert function.arguments == [ + FunctionArgument(name="value", data_type="int", description="An integer to be doubled") + ] + assert function.returns == FunctionReturns(data_type="int") + + # update yml + write_file(updated_my_func_yml, project.project_root, "functions", "my_func.yml") + manifest = run_dbt(["parse"]) + assert isinstance(manifest, Manifest) + assert len(manifest.functions) == 1 + function = manifest.functions["function.test.my_func"] + assert function.raw_code == "number * 2.0" + assert function.description == "Doubles a float" + assert function.arguments == [ + FunctionArgument(name="number", data_type="float", description="A float to be doubled") + ] + assert function.returns == FunctionReturns(data_type="float") + + # if we parse again, partial parsing should be skipped + note_catcher = EventCatcher(Note) + manifest = run_dbt(["parse"], callbacks=[note_catcher.catch]) + assert isinstance(manifest, Manifest) + assert len(manifest.functions) == 1 + assert len(note_catcher.caught_events) == 1 + assert ( + note_catcher.caught_events[0].info.msg == "Nothing changed, skipping partial parsing." + )