databrickslabs
diff --git a/‎src/databricks/labs/ucx/source_code/linters/pyspark.py‎
Lines changed: 0 additions & 1 deletion b/‎src/databricks/labs/ucx/source_code/linters/pyspark.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎tests/unit/source_code/linters/test_pyspark.py‎
Lines changed: 3 additions & 206 deletions b/‎tests/unit/source_code/linters/test_pyspark.py‎
Lines changed: 3 additions & 206 deletions
diff --git a/‎tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-cache-table.py‎
Lines changed: 25 additions & 0 deletions b/‎tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-cache-table.py‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-create-external-table.py‎
Lines changed: 41 additions & 0 deletions b/‎tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-create-external-table.py‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-create-table.py‎
Lines changed: 41 additions & 0 deletions b/‎tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-create-table.py‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-get-table.py‎
Lines changed: 30 additions & 0 deletions b/‎tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-get-table.py‎
Lines changed: 30 additions & 0 deletions
@@ -235,7 +235,6 @@ def __init__(self):
             TableNameMatcher("createTable", 1, 1000, 0, "tableName"),
             TableNameMatcher("createExternalTable", 1, 1000, 0, "tableName"),
             TableNameMatcher("getTable", 1, 1, 0),
-            TableNameMatcher("table", 1, 1, 0),
             TableNameMatcher("isCached", 1, 1, 0),
             TableNameMatcher("listColumns", 1, 2, 0, "tableName"),
             TableNameMatcher("tableExists", 1, 2, 0, "tableName"),
 
@@ -2,8 +2,8 @@
 
 import pytest
 
-from databricks.labs.ucx.source_code.base import Advisory, Deprecation, CurrentSessionState
-from databricks.labs.ucx.source_code.linters.pyspark import SparkMatchers, SparkSql, AstHelper, TableNameMatcher
+from databricks.labs.ucx.source_code.base import Deprecation, CurrentSessionState
+from databricks.labs.ucx.source_code.linters.pyspark import SparkSql, AstHelper, TableNameMatcher
 from databricks.labs.ucx.source_code.queries import FromTable
 
 
@@ -87,214 +87,11 @@ def test_spark_sql_match_named(migration_index):
     ] == list(sqf.lint(old_code))
 
 
-METHOD_NAMES = [
-    "cacheTable",
-    "createTable",
-    "createExternalTable",
-    "getTable",
-    "isCached",
-    "listColumns",
-    "tableExists",
-    "recoverPartitions",
-    "refreshTable",
-    "uncacheTable",
-    "table",
-    "insertInto",
-    "saveAsTable",
-]
-
-
-@pytest.mark.parametrize("method_name", METHOD_NAMES)
-def test_spark_table_match(migration_index, method_name):
-    spark_matchers = SparkMatchers()
-    ftf = FromTable(migration_index, CurrentSessionState())
-    sqf = SparkSql(ftf, migration_index)
-    matcher = spark_matchers.matchers[method_name]
-    args_list = ["a"] * min(5, matcher.max_args)
-    args_list[matcher.table_arg_index] = '"old.things"'
-    args = ",".join(args_list)
-    old_code = f"""
-spark.read.csv("s3://bucket/path")
-for i in range(10):
-    df = spark.{method_name}({args})
-    do_stuff_with_df(df)
-"""
-    assert [
-        Deprecation(
-            code='direct-filesystem-access',
-            message='The use of direct filesystem references is deprecated: ' 's3://bucket/path',
-            start_line=2,
-            start_col=0,
-            end_line=2,
-            end_col=34,
-        ),
-        Deprecation(
-            code='table-migrate',
-            message='Table old.things is migrated to brand.new.stuff in Unity Catalog',
-            start_line=4,
-            start_col=9,
-            end_line=4,
-            end_col=17 + len(method_name) + len(args),
-        ),
-    ] == list(sqf.lint(old_code))
-
-
-@pytest.mark.parametrize("method_name", METHOD_NAMES)
-def test_spark_table_no_match(migration_index, method_name):
-    spark_matchers = SparkMatchers()
-    ftf = FromTable(migration_index, CurrentSessionState())
-    sqf = SparkSql(ftf, migration_index)
-    matcher = spark_matchers.matchers[method_name]
-    args_list = ["a"] * min(5, matcher.max_args)
-    args_list[matcher.table_arg_index] = '"table.we.know.nothing.about"'
-    args = ",".join(args_list)
-    old_code = f"""
-for i in range(10):
-    df = spark.{method_name}({args})
-    do_stuff_with_df(df)
-"""
-    assert not list(sqf.lint(old_code))
-
-
-@pytest.mark.parametrize("method_name", METHOD_NAMES)
-def test_spark_table_too_many_args(migration_index, method_name):
-    spark_matchers = SparkMatchers()
-    ftf = FromTable(migration_index, CurrentSessionState())
-    sqf = SparkSql(ftf, migration_index)
-    matcher = spark_matchers.matchers[method_name]
-    if matcher.max_args > 100:
-        return
-    args_list = ["a"] * (matcher.max_args + 1)
-    args_list[matcher.table_arg_index] = '"table.we.know.nothing.about"'
-    args = ",".join(args_list)
-    old_code = f"""
-for i in range(10):
-    df = spark.{method_name}({args})
-    do_stuff_with_df(df)
-"""
-    assert not list(sqf.lint(old_code))
-
-
-def test_spark_table_named_args(migration_index):
-    ftf = FromTable(migration_index, CurrentSessionState())
-    sqf = SparkSql(ftf, migration_index)
-    old_code = """
-spark.read.csv("s3://bucket/path")
-for i in range(10):
-    df = spark.saveAsTable(format="xyz", name="old.things")
-    do_stuff_with_df(df)
-"""
-    assert [
-        Deprecation(
-            code='direct-filesystem-access',
-            message='The use of direct filesystem references is deprecated: ' 's3://bucket/path',
-            start_line=2,
-            start_col=0,
-            end_line=2,
-            end_col=34,
-        ),
-        Deprecation(
-            code='table-migrate',
-            message='Table old.things is migrated to brand.new.stuff in Unity Catalog',
-            start_line=4,
-            start_col=9,
-            end_line=4,
-            end_col=59,
-        ),
-    ] == list(sqf.lint(old_code))
-
-
-def test_spark_table_variable_arg(migration_index):
-    ftf = FromTable(migration_index, CurrentSessionState())
-    sqf = SparkSql(ftf, migration_index)
-    old_code = """
-spark.read.csv("s3://bucket/path")
-for i in range(10):
-    df = spark.saveAsTable(name)
-    do_stuff_with_df(df)
-"""
-    assert [
-        Deprecation(
-            code='direct-filesystem-access',
-            message='The use of direct filesystem references is deprecated: ' 's3://bucket/path',
-            start_line=2,
-            start_col=0,
-            end_line=2,
-            end_col=34,
-        ),
-        Advisory(
-            code='table-migrate',
-            message="Can't migrate 'saveAsTable' because its table name argument is not a constant",
-            start_line=4,
-            start_col=9,
-            end_line=4,
-            end_col=32,
-        ),
-    ] == list(sqf.lint(old_code))
-
-
-def test_spark_table_fstring_arg(migration_index):
-    ftf = FromTable(migration_index, CurrentSessionState())
-    sqf = SparkSql(ftf, migration_index)
-    old_code = """
-spark.read.csv("s3://bucket/path")
-for i in range(10):
-    df = spark.saveAsTable(f"boop{stuff}")
-    do_stuff_with_df(df)
-"""
-    assert [
-        Deprecation(
-            code='direct-filesystem-access',
-            message='The use of direct filesystem references is deprecated: ' 's3://bucket/path',
-            start_line=2,
-            start_col=0,
-            end_line=2,
-            end_col=34,
-        ),
-        Advisory(
-            code='table-migrate',
-            message="Can't migrate 'saveAsTable' because its table name argument is not a constant",
-            start_line=4,
-            start_col=9,
-            end_line=4,
-            end_col=42,
-        ),
-    ] == list(sqf.lint(old_code))
-
-
-def test_spark_table_return_value(migration_index):
-    ftf = FromTable(migration_index, CurrentSessionState())
-    sqf = SparkSql(ftf, migration_index)
-    old_code = """
-spark.read.csv("s3://bucket/path")
-for table in spark.listTables():
-    do_stuff_with_table(table)
-"""
-    assert [
-        Deprecation(
-            code='direct-filesystem-access',
-            message='The use of direct filesystem references is deprecated: ' 's3://bucket/path',
-            start_line=2,
-            start_col=0,
-            end_line=2,
-            end_col=34,
-        ),
-        Advisory(
-            code='table-migrate',
-            message="Call to 'listTables' will return a list of <catalog>.<database>.<table> instead of <database>.<table>.",
-            start_line=3,
-            start_col=13,
-            end_line=3,
-            end_col=31,
-        ),
-    ] == list(sqf.lint(old_code))
-
-
 def test_spark_table_return_value_apply(migration_index):
     ftf = FromTable(migration_index, CurrentSessionState())
     sqf = SparkSql(ftf, migration_index)
     old_code = """spark.read.csv('s3://bucket/path')
-for table in spark.listTables():
+for table in spark.catalog.listTables():
     do_stuff_with_table(table)"""
     fixed_code = sqf.apply(old_code)
     # no transformations to apply, only lint messages
 
@@ -0,0 +1,25 @@
+## Check a literal reference to a known table that is migrated.
+# ucx[table-migrate:+1:0:+1:0] Table old.things is migrated to brand.new.stuff in Unity Catalog
+spark.catalog.cacheTable("old.things")
+
+## Check a literal reference to an unknown table (that is not migrated); we expect no warning.
+spark.catalog.cacheTable("table.we.know.nothing.about")
+
+## Check that a call with too many positional arguments is ignored as (presumably) something else; we expect no warning.
+spark.catalog.cacheTable("old.things", None, "extra-argument")
+
+## Check a call with an out-of-position named argument referencing a table known to be migrated.
+# ucx[table-migrate:+1:0:+1:0] Table old.things is migrated to brand.new.stuff in Unity Catalog
+spark.catalog.cacheTable(storageLevel=None, tableName="old.things")
+
+## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
+# ucx[table-migrate:+1:0:+1:0] Can't migrate 'cacheTable' because its table name argument is not a constant
+spark.catalog.cacheTable(name)
+# ucx[table-migrate:+1:0:+1:0] Can't migrate 'cacheTable' because its table name argument is not a constant
+spark.catalog.cacheTable(f"boop{stuff}")
+
+## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
+# FIXME: This is a false positive; any method named 'cacheTable' is triggering the warning.
+# ucx[table-migrate:+1:0:+1:0] Table old.things is migrated to brand.new.stuff in Unity Catalog
+something_else.cacheTable("old.things")
+a_function("old.things")
@@ -0,0 +1,41 @@
+# ucx[direct-filesystem-access:+1:0:+1:0] The use of direct filesystem references is deprecated: s3://bucket/path
+spark.read.csv("s3://bucket/path")
+for i in range(10):
+
+    ## Check a literal reference to a known table that is migrated.
+    # ucx[table-migrate:+3:0:+3:0] Table old.things is migrated to brand.new.stuff in Unity Catalog
+    # TODO: Implement missing migration warning (on the source argument):
+    # #ucx[table-migrate:+1:0:+1:0] The default format changed in Databricks Runtime 8.0, from Parquet to Delta
+    df = spark.catalog.createExternalTable("old.things")
+    do_stuff_with(df)
+
+    ## Check a literal reference to an unknown table (that is not migrated); we expect no warning.
+    # TODO: Implement missing migration warning (on the source argument):
+    # #ucx[table-migrate:+1:0:+1:0] The default format changed in Databricks Runtime 8.0, from Parquet to Delta
+    df = spark.catalog.createExternalTable("table.we.know.nothing.about")
+    do_stuff_with(df)
+
+    ## Check that a call with too many positional arguments is ignored as (presumably) something else; we expect no warning.
+    # FIXME: This is a false positive due to an error in the matching specification; only 4 positional args are allowed.
+    # ucx[table-migrate:+1:0:+1:0] Table old.things is migrated to brand.new.stuff in Unity Catalog
+    df = spark.catalog.createExternalTable("old.things", None, None, None, "extra-argument")
+    do_stuff_with(df)
+
+    ## Check a call with an out-of-position named argument referencing a table known to be migrated.
+    # ucx[table-migrate:+1:0:+1:0] Table old.things is migrated to brand.new.stuff in Unity Catalog
+    df = spark.catalog.createExternalTable(path="foo", tableName="old.things", source="delta")
+    do_stuff_with(df)
+
+    ## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
+    # ucx[table-migrate:+1:0:+1:0] Can't migrate 'createExternalTable' because its table name argument is not a constant
+    df = spark.catalog.createExternalTable(name)
+    do_stuff_with(df)
+    # ucx[table-migrate:+1:0:+1:0] Can't migrate 'createExternalTable' because its table name argument is not a constant
+    df = spark.catalog.createExternalTable(f"boop{stuff}")
+    do_stuff_with(df)
+
+    ## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
+    # FIXME: This is a false positive; any method named 'createExternalTable' is triggering the warning.
+    # ucx[table-migrate:+1:0:+1:0] Table old.things is migrated to brand.new.stuff in Unity Catalog
+    something_else.createExternalTable("old.things")
+    a_function("old.things")
@@ -0,0 +1,41 @@
+# ucx[direct-filesystem-access:+1:0:+1:0] The use of direct filesystem references is deprecated: s3://bucket/path
+spark.read.csv("s3://bucket/path")
+for i in range(10):
+
+    ## Check a literal reference to a known table that is migrated.
+    # ucx[table-migrate:+3:0:+3:0] Table old.things is migrated to brand.new.stuff in Unity Catalog
+    # TODO: Implement missing migration warning (on the source argument):
+    # #ucx[table-migrate:+1:0:+1:0] The default format changed in Databricks Runtime 8.0, from Parquet to Delta
+    df = spark.catalog.createTable("old.things")
+    do_stuff_with(df)
+
+    ## Check a literal reference to an unknown table (that is not migrated); we expect no warning.
+    # TODO: Implement missing migration warning (on the source argument):
+    # #ucx[table-migrate:+1:0:+1:0] The default format changed in Databricks Runtime 8.0, from Parquet to Delta
+    df = spark.catalog.createTable("table.we.know.nothing.about")
+    do_stuff_with(df)
+
+    ## Check that a call with too many positional arguments is ignored as (presumably) something else; we expect no warning.
+    # FIXME: This is a false positive due to an error in the matching specification; only 5 positional args are allowed.
+    # ucx[table-migrate:+1:0:+1:0] Table old.things is migrated to brand.new.stuff in Unity Catalog
+    df = spark.catalog.createTable("old.things", None, None, None, None, "extra-argument")
+    do_stuff_with(df)
+
+    ## Check a call with an out-of-position named argument referencing a table known to be migrated.
+    # ucx[table-migrate:+1:0:+1:0] Table old.things is migrated to brand.new.stuff in Unity Catalog
+    df = spark.catalog.createTable(path="foo", tableName="old.things", source="delta")
+    do_stuff_with(df)
+
+    ## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
+    # ucx[table-migrate:+1:0:+1:0] Can't migrate 'createTable' because its table name argument is not a constant
+    df = spark.catalog.createTable(name)
+    do_stuff_with(df)
+    # ucx[table-migrate:+1:0:+1:0] Can't migrate 'createTable' because its table name argument is not a constant
+    df = spark.catalog.createTable(f"boop{stuff}")
+    do_stuff_with(df)
+
+    ## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
+    # FIXME: This is a false positive; any method named 'createTable' is triggering the warning.
+    # ucx[table-migrate:+1:0:+1:0] Table old.things is migrated to brand.new.stuff in Unity Catalog
+    something_else.createTable("old.things")
+    a_function("old.things")
@@ -0,0 +1,30 @@
+# ucx[direct-filesystem-access:+1:0:+1:0] The use of direct filesystem references is deprecated: s3://bucket/path
+spark.read.csv("s3://bucket/path")
+for i in range(10):
+
+    ## Check a literal reference to a known table that is migrated.
+    # ucx[table-migrate:+1:0:+1:0] Table old.things is migrated to brand.new.stuff in Unity Catalog
+    table = spark.catalog.getTable("old.things")
+    do_stuff_with(table)
+
+    ## Check a literal reference to an unknown table (that is not migrated); we expect no warning.
+    table = spark.catalog.getTable("table.we.know.nothing.about")
+    do_stuff_with(table)
+
+    ## Check that a call with too many positional arguments is ignored as (presumably) something else; we expect no warning.
+    table = spark.catalog.getTable("old.things", "extra-argument")
+    do_stuff_with(table)
+
+    ## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
+    # ucx[table-migrate:+1:0:+1:0] Can't migrate 'getTable' because its table name argument is not a constant
+    table = spark.catalog.getTable(name)
+    do_stuff_with(table)
+    # ucx[table-migrate:+1:0:+1:0] Can't migrate 'getTable' because its table name argument is not a constant
+    table = spark.catalog.getTable(f"boop{stuff}")
+    do_stuff_with(table)
+
+    ## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
+    # FIXME: This is a false positive; any method named 'getTable' is triggering the warning.
+    # ucx[table-migrate:+1:0:+1:0] Table old.things is migrated to brand.new.stuff in Unity Catalog
+    something_else.getTable("old.things")
+    a_function("old.things")