[chore] Rewrite multiline list comprehensions as for-loops and enforce it as a linter rule (#2820)

nfx · web-flow · commit 674e39f16fe2 · 2024-10-03T18:53:57.000+02:00
Improve readability of parts of the codebase
diff --git a/pyproject.toml b/pyproject.toml
@@ -76,7 +76,7 @@ dependencies = [
     "mypy~=1.9.0",
     "pylint~=3.3.1",
     "pylint-pytest==2.0.0a0",
-    "databricks-labs-pylint~=0.4.0",
+    "databricks-labs-pylint~=0.5",
     "databricks-labs-pytester>=0.2.1",
     "pytest~=8.3.3",
     "pytest-cov~=4.1.0",
@@ -241,6 +241,7 @@ limit-inference-results = 100
 load-plugins = [
     "databricks.labs.pylint.mocking",
     "databricks.labs.pylint.eradicate",
+    "databricks.labs.pylint.readability",
     "pylint_pytest",
     "pylint.extensions.bad_builtin",
     "pylint.extensions.broad_try_clause",
diff --git a/src/databricks/labs/ucx/cli.py b/src/databricks/labs/ucx/cli.py
@@ -550,11 +550,14 @@ def revert_cluster_remap(w: WorkspaceClient, prompts: Prompts):
     """Reverting Re-mapping of  clusters from UC"""
     logger.info("Reverting the Remapping of the Clusters from UC")
     ctx = WorkspaceContext(w)
-    cluster_ids = [
-        cluster_files.path.split("/")[-1].split(".")[0]
-        for cluster_files in ctx.installation.files()
-        if cluster_files.path is not None and cluster_files.path.find("backup/clusters") > 0
-    ]
+    cluster_ids = []
+    for cluster_files in ctx.installation.files():
+        if cluster_files.path is None:
+            continue
+        if cluster_files.path.find("backup/clusters") == 0:
+            continue
+        cluster_id = cluster_files.path.split("/")[-1].split(".")[0]
+        cluster_ids.append(cluster_id)
     if not cluster_ids:
         logger.info("There is no cluster files in the backup folder. Skipping the reverting process")
         return
diff --git a/src/databricks/labs/ucx/hive_metastore/grants.py b/src/databricks/labs/ucx/hive_metastore/grants.py
@@ -623,9 +623,12 @@ def _get_privilege(self, table: Table, locations: dict[str, str], mounts: list[M
 
     def _get_database_grants(self, tables: list[Table], principals: list[str]) -> list[Grant]:
         databases = {table.database for table in tables}
-        return [
-            Grant(principal, "USAGE", "hive_metastore", database) for database in databases for principal in principals
-        ]
+        grants = []
+        for database in databases:
+            for principal in principals:
+                grant = Grant(principal, "USAGE", "hive_metastore", database)
+                grants.append(grant)
+        return grants
 
     def _get_grants(
         self, locations: dict[str, str], principals: list[str], tables: list[Table], mounts: list[Mount]
@@ -635,33 +638,21 @@ def _get_grants(
         for table in tables:
             privilege = self._get_privilege(table, locations, mounts)
             if privilege == "READ_FILES":
-                grants.extend(
-                    [Grant(principal, "SELECT", table.catalog, table.database, table.name) for principal in principals]
-                )
+                for principal in principals:
+                    grants.append(Grant(principal, "SELECT", table.catalog, table.database, table.name))
                 filtered_tables.append(table)
                 continue
             if privilege == "WRITE_FILES":
-                grants.extend(
-                    [
-                        Grant(principal, "ALL PRIVILEGES", table.catalog, table.database, table.name)
-                        for principal in principals
-                    ]
-                )
+                for principal in principals:
+                    grants.append(Grant(principal, "ALL PRIVILEGES", table.catalog, table.database, table.name))
                 filtered_tables.append(table)
                 continue
             if table.view_text is not None:
-                grants.extend(
-                    [
-                        Grant(principal, "ALL PRIVILEGES", table.catalog, table.database, view=table.name)
-                        for principal in principals
-                    ]
-                )
+                for principal in principals:
+                    grants.append(Grant(principal, "ALL PRIVILEGES", table.catalog, table.database, view=table.name))
                 filtered_tables.append(table)
-
         database_grants = self._get_database_grants(filtered_tables, principals)
-
         grants.extend(database_grants)
-
         return grants
 
     def _get_cluster_principal_mapping(self, cluster_id: str, object_type: str) -> list[str]:
diff --git a/src/databricks/labs/ucx/installer/logs.py b/src/databricks/labs/ucx/installer/logs.py
@@ -139,8 +139,11 @@ def _record_task(self, task_name: str, log: TextIO, log_creation_timestamp: dt.d
             log (TextIO): The task log
             log_creation_timestamp (datetime.datetime): The log creation timestamp.
         """
-        log_records = [
-            LogRecord(
+        log_records = []
+        for partial_log_record in parse_logs(log):
+            if logging.getLevelName(partial_log_record.level) < logging.WARNING:
+                continue
+            log_record = LogRecord(
                 timestamp=int(
                     log_creation_timestamp.replace(
                         hour=partial_log_record.time.hour,
@@ -156,9 +159,7 @@ def _record_task(self, task_name: str, log: TextIO, log_creation_timestamp: dt.d
                 component=partial_log_record.component,
                 message=partial_log_record.message,
             )
-            for partial_log_record in parse_logs(log)
-            if logging.getLevelName(partial_log_record.level) >= logging.WARNING
-        ]
+            log_records.append(log_record)
         return log_records
 
     def snapshot(self) -> list[LogRecord]:
diff --git a/src/databricks/labs/ucx/source_code/graph.py b/src/databricks/labs/ucx/source_code/graph.py
@@ -91,17 +91,12 @@ def register_dependency(self, dependency: Dependency) -> MaybeGraph:
         if not container:
             problem = DependencyProblem('dependency-register-failed', 'Failed to register dependency', dependency.path)
             return MaybeGraph(child_graph, [problem])
-        problems = container.build_dependency_graph(child_graph)
-        return MaybeGraph(
-            child_graph,
-            [
-                dataclasses.replace(
-                    problem,
-                    source_path=dependency.path if problem.is_path_missing() else problem.source_path,
-                )
-                for problem in problems
-            ],
-        )
+        problems = []
+        for problem in container.build_dependency_graph(child_graph):
+            if problem.is_path_missing():
+                problem = dataclasses.replace(problem, source_path=dependency.path)
+            problems.append(problem)
+        return MaybeGraph(child_graph, problems)
 
     def locate_dependency(self, path: Path) -> MaybeGraph:
         # need a list since unlike JS, Python won't let you assign closure variables
diff --git a/src/databricks/labs/ucx/source_code/notebooks/cells.py b/src/databricks/labs/ucx/source_code/notebooks/cells.py
@@ -103,16 +103,17 @@ def is_runnable(self) -> bool:
     def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProblem]:
         context = parent.new_dependency_graph_context()
         analyzer = PythonCodeAnalyzer(context, self._original_code)
-        python_dependency_problems = analyzer.build_graph()
-        # Position information for the Python code is within the code and needs to be mapped to the location within the parent nodebook.
-        return [
-            dataclasses.replace(
+        # Position information for the Python code is within the code and needs to be mapped to the location within
+        # the parent nodebook.
+        problems = []
+        for problem in analyzer.build_graph():
+            problem = dataclasses.replace(
                 problem,
                 start_line=self.original_offset + problem.start_line,
                 end_line=self.original_offset + problem.end_line,
             )
-            for problem in python_dependency_problems
-        ]
+            problems.append(problem)
+        return problems
 
     def build_inherited_context(self, graph: DependencyGraph, child_path: Path) -> InheritedContext:
         context = graph.new_dependency_graph_context()
@@ -178,11 +179,17 @@ def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProb
         path, idx, line = self._read_notebook_path()
         if path is not None:
             start_line = self._original_offset + idx
-            problems = parent.register_notebook(path, True)
-            return [
-                dataclasses.replace(problem, start_line=start_line, start_col=0, end_line=start_line, end_col=len(line))
-                for problem in problems
-            ]
+            problems = []
+            for problem in parent.register_notebook(path, True):
+                problem = dataclasses.replace(
+                    problem,
+                    start_line=start_line,
+                    start_col=0,
+                    end_line=start_line,
+                    end_col=len(line),
+                )
+                problems.append(problem)
+            return problems
         start_line = self._original_offset
         problem = DependencyProblem(
             'invalid-run-cell',
diff --git a/src/databricks/labs/ucx/source_code/queries.py b/src/databricks/labs/ucx/source_code/queries.py
@@ -78,13 +78,15 @@ def refresh_report(self, sql_backend: SqlBackend, inventory_database: str):
         )
         # dump dfsas
         assessment_end = datetime.now(timezone.utc)
-        all_dfsas = [
-            dataclasses.replace(
-                dfsa, assessment_start_timestamp=assessment_start, assessment_end_timestamp=assessment_end
+        processed = []
+        for dfsa in all_dfsas:
+            dfsa = dataclasses.replace(
+                dfsa,
+                assessment_start_timestamp=assessment_start,
+                assessment_end_timestamp=assessment_end,
             )
-            for dfsa in all_dfsas
-        ]
-        self._directfs_crawler.dump_all(all_dfsas)
+            processed.append(dfsa)
+        self._directfs_crawler.dump_all(processed)
 
     def _dashboard_ids_in_scope(self) -> list[str]:
         if self._include_dashboard_ids is not None:  # an empty list is accepted
diff --git a/src/databricks/labs/ucx/workspace_access/clusters.py b/src/databricks/labs/ucx/workspace_access/clusters.py
@@ -16,11 +16,13 @@ def __init__(self, installation: Installation, ws: WorkspaceClient, prompts: Pro
         self._installation = installation
 
     def list_cluster(self):
-        clusters = [
-            clusters
-            for clusters in self._ws.clusters.list()
-            if clusters.cluster_source is not None and clusters.cluster_source.name != "JOB"
-        ]
+        clusters = []
+        for cluster_info in self._ws.clusters.list():
+            if cluster_info.cluster_source is None:
+                continue
+            if cluster_info.cluster_source.name == "JOB":
+                continue
+            clusters.append(cluster_info)
         return clusters
 
     def _get_access_mode(self, access_mode: str):
diff --git a/src/databricks/labs/ucx/workspace_access/generic.py b/src/databricks/labs/ucx/workspace_access/generic.py
@@ -414,16 +414,21 @@ def inner() -> Iterator[ml.ModelDatabricks]:
 
 
 def experiments_listing(ws: WorkspaceClient):
+    def _get_repo_nb_tag(experiment):
+        repo_nb_tag = []
+        for tag in experiment.tags:
+            if tag.key == "mlflow.experiment.sourceType" and tag.value == "REPO_NOTEBOOK":
+                repo_nb_tag.append(tag)
+        return repo_nb_tag
+
     def inner() -> Iterator[ml.Experiment]:
         for experiment in ws.experiments.list_experiments():
             # We filter-out notebook-based experiments, because they are covered by notebooks listing in
             # workspace-based notebook experiment
             if experiment.tags:
                 nb_tag = [t for t in experiment.tags if t.key == "mlflow.experimentType" and t.value == "NOTEBOOK"]
                 # repo-based notebook experiment
-                repo_nb_tag = [
-                    t for t in experiment.tags if t.key == "mlflow.experiment.sourceType" and t.value == "REPO_NOTEBOOK"
-                ]
+                repo_nb_tag = _get_repo_nb_tag(experiment)
                 if nb_tag or repo_nb_tag:
                     continue
 
diff --git a/src/databricks/labs/ucx/workspace_access/groups.py b/src/databricks/labs/ucx/workspace_access/groups.py
@@ -857,11 +857,10 @@ def _check_for_deleted_workspace_groups(
     ) -> None:
         attributes = "id,displayName"
         expected_deletions = {group.id_in_workspace for group in deleted_workspace_groups}
-        pending_deletions = [
-            GroupDeletionIncompleteError(group.id, group.display_name)
-            for group in self._list_workspace_groups("WorkspaceGroup", attributes)
-            if group.id in expected_deletions
-        ]
+        pending_deletions = []
+        for group in self._list_workspace_groups("WorkspaceGroup", attributes):
+            if group.id in expected_deletions:
+                pending_deletions.append(GroupDeletionIncompleteError(group.id, group.display_name))
         if pending_deletions:
             if logger.isEnabledFor(still_present_log_level):
                 logger.log(
diff --git a/tests/integration/aws/test_access.py b/tests/integration/aws/test_access.py
@@ -56,14 +56,13 @@ def test_create_external_location(ws, env_or_skip, make_random, inventory_schema
         aws_cli_ctx.principal_acl,
     )
     external_location_migration.run()
-    external_location = [
-        external_location
-        for external_location in list(ws.external_locations.list())
-        if external_location.name == f"bucket{rand}_folder1"
-    ]
-    assert len(external_location) == 1
-    assert external_location[0].url == f"s3://bucket{rand}/FOLDER1"
-    assert external_location[0].credential_name == f"ucx_{rand}"
+    out = []
+    for external_location in list(ws.external_locations.list()):
+        if external_location.name == f"bucket{rand}_folder1":
+            out.append(external_location)
+    assert len(out) == 1
+    assert out[0].url == f"s3://bucket{rand}/FOLDER1"
+    assert out[0].credential_name == f"ucx_{rand}"
 
 
 def test_create_uber_instance_profile(ws, env_or_skip, make_random, make_cluster_policy, aws_cli_ctx):
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
@@ -334,19 +334,20 @@ def get_azure_spark_conf():
 class StaticTablesCrawler(TablesCrawler):
     def __init__(self, sb: SqlBackend, schema: str, tables: list[TableInfo]):
         super().__init__(sb, schema)
-        self._tables = [
-            Table(
-                catalog=_.catalog_name,
-                database=_.schema_name,
-                name=_.name,
-                object_type=f"{_.table_type.value}",
-                view_text=_.view_definition,
-                location=_.storage_location,
-                table_format=f"{_.data_source_format.value}" if _.table_type.value != "VIEW" else "",
-                # type: ignore[arg-type]
+        self._tables = []
+        for _ in tables:
+            self._tables.append(
+                Table(
+                    catalog=_.catalog_name,
+                    database=_.schema_name,
+                    name=_.name,
+                    object_type=f"{_.table_type.value}",
+                    view_text=_.view_definition,
+                    location=_.storage_location,
+                    table_format=f"{_.data_source_format.value}" if _.table_type.value != "VIEW" else "",
+                    # type: ignore[arg-type]
+                )
             )
-            for _ in tables
-        ]
 
     def snapshot(self, *, force_refresh: bool = False) -> list[Table]:
         return self._tables
diff --git a/tests/integration/hive_metastore/test_migrate.py b/tests/integration/hive_metastore/test_migrate.py
@@ -457,9 +457,9 @@ def test_mapping_skips_tables_databases(ws, sql_backend, runtime_ctx, make_catal
     )
 
     dst_catalog = make_catalog()
-    dst_schemas = [
-        runtime_ctx.make_schema(catalog_name=dst_catalog.name, name=src_schema.name) for src_schema in src_schemas
-    ]
+    dst_schemas = []
+    for src_schema in src_schemas:
+        dst_schemas.append(runtime_ctx.make_schema(catalog_name=dst_catalog.name, name=src_schema.name))
 
     rules = [
         Rule.from_src_dst(src_tables[0], dst_schemas[0]),
diff --git a/tests/integration/hive_metastore/test_table_move.py b/tests/integration/hive_metastore/test_table_move.py
@@ -18,11 +18,10 @@ def test_move_tables_no_from_schema(ws, sql_backend, make_random, make_catalog,
     to_catalog = make_catalog()
     table_move = TableMove(ws, sql_backend)
     table_move.move(from_catalog.name, from_schema, "*", to_catalog.name, from_schema, del_table=False)
-    rec_results = [
-        rec.message
-        for rec in caplog.records
-        if f"schema {from_schema} not found in catalog {from_catalog.name}" in rec.message
-    ]
+    rec_results = []
+    for rec in caplog.records:
+        if f"schema {from_schema} not found in catalog {from_catalog.name}" in rec.message:
+            rec_results.append(rec.message)
     assert len(rec_results) == 1
 
 
diff --git a/tests/integration/source_code/test_jobs.py b/tests/integration/source_code/test_jobs.py
@@ -681,13 +681,8 @@ def test_job_dependency_problem_egg_dbr14plus(make_job, make_directory, simple_c
     j = make_job(libraries=[library])
 
     problems, *_ = simple_ctx.workflow_linter.lint_job(j.job_id)
-    assert (
-        len(
-            [
-                problem
-                for problem in problems
-                if problem.message == "Installing eggs is no longer supported on Databricks 14.0 or higher"
-            ]
-        )
-        == 1
-    )
+    actual = []
+    for problem in problems:
+        if problem.message == "Installing eggs is no longer supported on Databricks 14.0 or higher":
+            actual.append(problem)
+    assert len(actual) == 1
diff --git a/tests/integration/workspace_access/test_permissions_manager.py b/tests/integration/workspace_access/test_permissions_manager.py
@@ -27,13 +27,12 @@ def object_types(self) -> set[str]:
     ]
     assert snapshot == expected
 
-    saved = [
-        Permissions(*row)
-        for row in sql_backend.fetch(
-            f"SELECT object_id, object_type, raw\n"
-            f"FROM {inventory_schema}.permissions\n"
-            f"ORDER BY object_id\n"
-            f"LIMIT {len(expected)+1}"
-        )
-    ]
+    saved = []
+    for row in sql_backend.fetch(
+        f"SELECT object_id, object_type, raw\n"
+        f"FROM {inventory_schema}.permissions\n"
+        f"ORDER BY object_id\n"
+        f"LIMIT {len(expected)+1}"
+    ):
+        saved.append(Permissions(**row))
     assert saved == expected
diff --git a/tests/unit/aws/test_credentials.py b/tests/unit/aws/test_credentials.py
diff --git a/tests/unit/installer/test_logs.py b/tests/unit/installer/test_logs.py
diff --git a/tests/unit/source_code/notebooks/test_cells.py b/tests/unit/source_code/notebooks/test_cells.py
diff --git a/tests/unit/workspace_access/test_redash.py b/tests/unit/workspace_access/test_redash.py
diff --git a/tests/unit/workspace_access/test_workflows.py b/tests/unit/workspace_access/test_workflows.py