Skip to content

Commit d8f78a4

Browse files
Improved HMS grants (#151)
Resolves #147
1 parent a1f31e6 commit d8f78a4

File tree

4 files changed

+141
-29
lines changed

4 files changed

+141
-29
lines changed

src/databricks/labs/ucx/tacl/grants.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,13 @@ def type_and_key(
3939
if database is not None:
4040
catalog = "hive_metastore" if catalog is None else catalog
4141
return "DATABASE", f"{catalog}.{database}"
42-
if catalog is not None:
43-
return "CATALOG", catalog
4442
if any_file:
4543
return "ANY FILE", ""
4644
if anonymous_function:
4745
return "ANONYMOUS FUNCTION", ""
46+
# Must come last, as it has lowest priority here but is a required parameter
47+
if catalog is not None:
48+
return "CATALOG", catalog
4849
msg = "invalid grant keys"
4950
raise ValueError(msg)
5051

@@ -69,7 +70,7 @@ def hive_grant_sql(self) -> str:
6970

7071
def hive_revoke_sql(self) -> str:
7172
object_type, object_key = self._this_type_and_key()
72-
return f"REVOKE {self.action_type} ON {object_type} {object_key} TO {self.principal}"
73+
return f"REVOKE {self.action_type} ON {object_type} {object_key} FROM {self.principal}"
7374

7475
def _set_owner(self, object_type, object_key):
7576
return f"ALTER {object_type} {object_key} OWNER TO {self.principal}"

tests/unit/mocks.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import logging
2+
import re
3+
from collections.abc import Iterator
4+
5+
from databricks.labs.ucx.tacl._internal import SqlBackend
6+
7+
logger = logging.getLogger(__name__)
8+
9+
10+
class MockBackend(SqlBackend):
11+
def __init__(self, *, fails_on_first: dict | None = None, rows: dict | None = None):
12+
self._fails_on_first = fails_on_first
13+
if not rows:
14+
rows = {}
15+
self._rows = rows
16+
self.queries = []
17+
18+
def _sql(self, sql):
19+
logger.debug(f"Mock backend.sql() received SQL: {sql}")
20+
seen_before = sql in self.queries
21+
self.queries.append(sql)
22+
if not seen_before and self._fails_on_first is not None:
23+
for match, failure in self._fails_on_first.items():
24+
if match in sql:
25+
raise RuntimeError(failure)
26+
27+
def execute(self, sql):
28+
self._sql(sql)
29+
30+
def fetch(self, sql) -> Iterator[any]:
31+
self._sql(sql)
32+
rows = []
33+
if self._rows:
34+
for pattern in self._rows.keys():
35+
logger.debug(f"Checking pattern: {pattern}")
36+
r = re.compile(pattern)
37+
if r.match(sql):
38+
logger.debug(f"Found match: {sql}")
39+
rows.extend(self._rows[pattern])
40+
logger.debug(f"Returning rows: {rows}")
41+
return iter(rows)

tests/unit/test_crawler_base.py

Lines changed: 2 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,10 @@
1-
from collections.abc import Iterator
21
from dataclasses import dataclass
32

43
import pytest
54

6-
from databricks.labs.ucx.tacl._internal import CrawlerBase, SqlBackend
5+
from databricks.labs.ucx.tacl._internal import CrawlerBase
76

8-
9-
class MockBackend(SqlBackend):
10-
def __init__(self, *, fails_on_first: dict | None = None, rows: list[any] | None = None):
11-
self._fails_on_first = fails_on_first
12-
if not rows:
13-
rows = []
14-
self._rows = rows
15-
self.queries = []
16-
17-
def _sql(self, sql):
18-
seen_before = sql in self.queries
19-
self.queries.append(sql)
20-
if not seen_before and self._fails_on_first is not None:
21-
for match, failure in self._fails_on_first.items():
22-
if match in sql:
23-
raise RuntimeError(failure)
24-
25-
def execute(self, sql):
26-
self._sql(sql)
27-
28-
def fetch(self, sql) -> Iterator[any]:
29-
self._sql(sql)
30-
return self._rows
7+
from .mocks import MockBackend
318

329

3310
@dataclass

tests/unit/test_grants.py

Lines changed: 94 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,65 @@
11
import pytest
22

3-
from databricks.labs.ucx.tacl.grants import Grant
3+
from databricks.labs.ucx.providers.mixins.sql import Row
4+
from databricks.labs.ucx.tacl.grants import Grant, GrantsCrawler
5+
from databricks.labs.ucx.tacl.tables import TablesCrawler
6+
7+
from .mocks import MockBackend
48

59

610
def test_type_and_key_table():
711
grant = Grant.type_and_key(catalog="hive_metastore", database="mydb", table="mytable")
812
assert grant == ("TABLE", "hive_metastore.mydb.mytable")
913

14+
grant = Grant(principal="user", action_type="SELECT", catalog="hive_metastore", database="mydb", table="mytable")
15+
assert grant._this_type_and_key()[0] == "TABLE"
16+
assert grant.object_key == "hive_metastore.mydb.mytable"
17+
1018

1119
def test_type_and_key_view():
1220
grant = Grant.type_and_key(catalog="hive_metastore", database="mydb", view="myview")
1321
assert grant == ("VIEW", "hive_metastore.mydb.myview")
1422

23+
grant = Grant(principal="user", action_type="SELECT", catalog="hive_metastore", database="mydb", view="myview")
24+
assert grant._this_type_and_key()[0] == "VIEW"
25+
assert grant.object_key == "hive_metastore.mydb.myview"
26+
1527

1628
def test_type_and_key_database():
1729
grant = Grant.type_and_key(catalog="hive_metastore", database="mydb")
1830
assert grant == ("DATABASE", "hive_metastore.mydb")
1931

32+
grant = Grant(principal="user", action_type="SELECT", catalog="hive_metastore", database="mydb")
33+
assert grant._this_type_and_key()[0] == "DATABASE"
34+
assert grant.object_key == "hive_metastore.mydb"
35+
2036

2137
def test_type_and_key_catalog():
2238
grant = Grant.type_and_key(catalog="mycatalog")
2339
assert grant == ("CATALOG", "mycatalog")
2440

41+
grant = Grant(principal="user", action_type="SELECT", catalog="mycatalog")
42+
assert grant._this_type_and_key()[0] == "CATALOG"
43+
assert grant.object_key == "mycatalog"
44+
2545

2646
def test_type_and_key_any_file():
2747
grant = Grant.type_and_key(any_file=True)
2848
assert grant == ("ANY FILE", "")
2949

50+
grant = Grant(principal="user", action_type="SELECT", catalog="hive_metastore", any_file=True)
51+
assert grant._this_type_and_key()[0] == "ANY FILE"
52+
assert grant.object_key == ""
53+
3054

3155
def test_type_and_key_anonymous_function():
3256
grant = Grant.type_and_key(anonymous_function=True)
3357
assert grant == ("ANONYMOUS FUNCTION", "")
3458

59+
grant = Grant(principal="user", action_type="SELECT", catalog="hive_metastore", anonymous_function=True)
60+
assert grant._this_type_and_key()[0] == "ANONYMOUS FUNCTION"
61+
assert grant.object_key == ""
62+
3563

3664
def test_type_and_key_invalid():
3765
with pytest.raises(ValueError):
@@ -41,6 +69,7 @@ def test_type_and_key_invalid():
4169
def test_hive_sql():
4270
grant = Grant(principal="user", action_type="SELECT", catalog="hive_metastore", database="mydb", table="mytable")
4371
assert grant.hive_grant_sql() == "GRANT SELECT ON TABLE hive_metastore.mydb.mytable TO user"
72+
assert grant.hive_revoke_sql() == "REVOKE SELECT ON TABLE hive_metastore.mydb.mytable FROM user"
4473

4574

4675
@pytest.mark.parametrize(
@@ -58,7 +87,71 @@ def test_hive_sql():
5887
Grant("me", "USAGE", catalog="hive_metastore", database="mydb"),
5988
"GRANT USE SCHEMA ON DATABASE hive_metastore.mydb TO me",
6089
),
90+
(
91+
Grant("me", "INVALID", catalog="hive_metastore", database="mydb"),
92+
None,
93+
),
6194
],
6295
)
6396
def test_uc_sql(grant, query):
6497
assert grant.uc_grant_sql() == query
98+
99+
100+
def make_row(data, columns):
101+
row = Row(data)
102+
row.__columns__ = columns
103+
return row
104+
105+
106+
SELECT_COLS = ["catalog", "database", "table", "object_type", "table_format", "location", "view_text"]
107+
SHOW_COLS = ["principal", "action_type", "object_type", "ignored"]
108+
DESCRIBE_COLS = ["key", "value", "ignored"]
109+
ROWS = {
110+
"SELECT.*": [
111+
make_row(("foo", "bar", "test_table", "type", "DELTA", "/foo/bar/test", None), SELECT_COLS),
112+
make_row(("foo", "bar", "test_view", "type", "VIEW", None, "SELECT * FROM table"), SELECT_COLS),
113+
make_row(("foo", None, None, "type", "CATALOG", None, None), SELECT_COLS),
114+
],
115+
"SHOW.*": [
116+
make_row(("princ1", "SELECT", "TABLE", "ignored"), SHOW_COLS),
117+
make_row(("princ1", "SELECT", "VIEW", "ignored"), SHOW_COLS),
118+
make_row(("princ1", "USE", "CATALOG$", "ignored"), SHOW_COLS),
119+
],
120+
"DESCRIBE.*": [
121+
make_row(("Catalog", "foo", "ignored"), DESCRIBE_COLS),
122+
make_row(("Type", "TABLE", "ignored"), DESCRIBE_COLS),
123+
make_row(("Provider", "", "ignored"), DESCRIBE_COLS),
124+
make_row(("Location", "/foo/bar/test", "ignored"), DESCRIBE_COLS),
125+
make_row(("View Text", "SELECT * FROM table", "ignored"), DESCRIBE_COLS),
126+
],
127+
}
128+
129+
130+
def test_crawler_crawl():
131+
# Test with no data
132+
b = MockBackend()
133+
table = TablesCrawler(b, "hive_metastore", "schema")
134+
crawler = GrantsCrawler(table)
135+
grants = crawler._crawl("hive_metastore", "schema")
136+
assert len(grants) == 0
137+
# Test with test data
138+
b = MockBackend(rows=ROWS)
139+
table = TablesCrawler(b, "hive_metastore", "schema")
140+
crawler = GrantsCrawler(table)
141+
grants = crawler._crawl("hive_metastore", "schema")
142+
assert len(grants) == 3
143+
144+
145+
def test_crawler_snapshot():
146+
# Test with no data
147+
b = MockBackend()
148+
table = TablesCrawler(b, "hive_metastore", "schema")
149+
crawler = GrantsCrawler(table)
150+
snapshot = crawler.snapshot("hive_metastore", "schema")
151+
assert len(snapshot) == 0
152+
# Test with test data
153+
b = MockBackend(rows=ROWS)
154+
table = TablesCrawler(b, "hive_metastore", "schema")
155+
crawler = GrantsCrawler(table)
156+
snapshot = crawler.snapshot("hive_metastore", "schema")
157+
assert len(snapshot) == 3

0 commit comments

Comments
 (0)