Skip to content

Commit 3753c87

Browse files
Copilotjoocer
andcommitted
Add permission controls for protocol prefixes (file:, gs:, s3:)
Co-authored-by: joocer <[email protected]>
1 parent 5e993f1 commit 3753c87

File tree

3 files changed

+225
-0
lines changed

3 files changed

+225
-0
lines changed

testdata/PERMISSIONS_README.md

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# Protocol Prefix Permissions
2+
3+
This directory contains example permission configurations for controlling access to different data sources in Opteryx.
4+
5+
## permissions.json Format
6+
7+
The `permissions.json` file contains one JSON object per line, each defining a permission rule:
8+
9+
```json
10+
{"role":"role_name", "permission": "READ", "table": "pattern"}
11+
```
12+
13+
- **role**: The name of the role that has this permission
14+
- **permission**: The type of permission (currently only "READ" is supported)
15+
- **table**: A pattern (supporting wildcards) that matches table names
16+
17+
## Protocol Prefix Permissions
18+
19+
Starting with the wildcard support for cloud storage paths, you can now control access to different storage protocols using permission patterns:
20+
21+
### File System Access
22+
```json
23+
{"role":"file_access", "permission": "READ", "table": "file://*"}
24+
```
25+
Grants read access to all local file system paths using the `file://` protocol.
26+
27+
### Google Cloud Storage Access
28+
```json
29+
{"role":"gcs_access", "permission": "READ", "table": "gs://*"}
30+
```
31+
Grants read access to all Google Cloud Storage paths using the `gs://` protocol.
32+
33+
### Amazon S3 Access
34+
```json
35+
{"role":"s3_access", "permission": "READ", "table": "s3://*"}
36+
```
37+
Grants read access to all Amazon S3 paths using the `s3://` protocol.
38+
39+
## Examples
40+
41+
### Restrict Access to Specific Protocols
42+
43+
A user with only the `restricted` role can only access tables in the `opteryx.*` namespace:
44+
```json
45+
{"role":"restricted", "permission": "READ", "table": "opteryx.*"}
46+
```
47+
48+
### Grant Multi-Protocol Access
49+
50+
A user can have multiple roles to access different protocols:
51+
- Role `file_access` + role `gcs_access` → can access both `file://` and `gs://` paths
52+
- Role `restricted` + role `s3_access` → can access `opteryx.*` tables and `s3://` paths
53+
54+
### Default Access
55+
56+
The system includes a default role `opteryx` that has access to everything:
57+
```json
58+
{"role":"opteryx", "permission": "READ", "table": "*"}
59+
```
60+
61+
## Usage in Queries
62+
63+
When you query using protocol prefixes, the permission system checks the full table name:
64+
65+
```sql
66+
-- Requires 'gcs_access' role or 'opteryx' role
67+
SELECT * FROM gs://my-bucket/data/*.parquet
68+
69+
-- Requires 's3_access' role or 'opteryx' role
70+
SELECT * FROM s3://my-bucket/logs/2024-01-??.csv
71+
72+
-- Requires 'file_access' role or 'opteryx' role
73+
SELECT * FROM file://path/to/data/*.csv
74+
75+
-- Requires 'restricted' role or 'opteryx' role
76+
SELECT * FROM opteryx.space_missions
77+
```
78+
79+
## Security Best Practices
80+
81+
1. **Least Privilege**: Only grant the minimum permissions needed for each role
82+
2. **Separate Roles**: Create separate roles for different data sources (file, GCS, S3, databases)
83+
3. **Monitor Access**: Log and review which roles access which data sources
84+
4. **Audit Regularly**: Review and update permissions as access requirements change
85+
86+
## Testing
87+
88+
See `tests/unit/security/test_protocol_permissions.py` for comprehensive tests of the protocol prefix permission system.

testdata/permissions.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,4 @@
11
{"role":"restricted", "permission": "READ", "table": "opteryx.*"}
2+
{"role":"file_access", "permission": "READ", "table": "file://*"}
3+
{"role":"gcs_access", "permission": "READ", "table": "gs://*"}
4+
{"role":"s3_access", "permission": "READ", "table": "s3://*"}
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
"""
2+
Test permissions for protocol prefixes (file:, gs:, s3:)
3+
"""
4+
5+
import os
6+
import sys
7+
8+
sys.path.insert(1, os.path.join(sys.path[0], "../.."))
9+
10+
import pytest
11+
12+
from opteryx.managers.permissions import can_read_table
13+
14+
# Test cases for protocol prefix permissions
15+
test_cases = [
16+
# File protocol tests
17+
(["file_access"], "file://path/to/file.parquet", True, "file_access can read file://"),
18+
(["file_access"], "file://path/to/*.parquet", True, "file_access can read file:// with wildcards"),
19+
(["file_access"], "gs://bucket/path/file.parquet", False, "file_access cannot read gs://"),
20+
(["file_access"], "s3://bucket/path/file.parquet", False, "file_access cannot read s3://"),
21+
(["file_access"], "opteryx.table", False, "file_access cannot read regular tables"),
22+
23+
# GCS protocol tests
24+
(["gcs_access"], "gs://bucket/path/file.parquet", True, "gcs_access can read gs://"),
25+
(["gcs_access"], "gs://bucket/path/*.parquet", True, "gcs_access can read gs:// with wildcards"),
26+
(["gcs_access"], "gs://bucket/data/file[0-9].csv", True, "gcs_access can read gs:// with range wildcards"),
27+
(["gcs_access"], "file://path/to/file.parquet", False, "gcs_access cannot read file://"),
28+
(["gcs_access"], "s3://bucket/path/file.parquet", False, "gcs_access cannot read s3://"),
29+
(["gcs_access"], "opteryx.table", False, "gcs_access cannot read regular tables"),
30+
31+
# S3 protocol tests
32+
(["s3_access"], "s3://bucket/path/file.parquet", True, "s3_access can read s3://"),
33+
(["s3_access"], "s3://bucket/path/*.parquet", True, "s3_access can read s3:// with wildcards"),
34+
(["s3_access"], "s3://bucket/logs/2024-01-??.csv", True, "s3_access can read s3:// with ? wildcards"),
35+
(["s3_access"], "file://path/to/file.parquet", False, "s3_access cannot read file://"),
36+
(["s3_access"], "gs://bucket/path/file.parquet", False, "s3_access cannot read gs://"),
37+
(["s3_access"], "opteryx.table", False, "s3_access cannot read regular tables"),
38+
39+
# Multiple roles tests
40+
(["file_access", "gcs_access"], "file://path/file.parquet", True, "multiple roles allow file://"),
41+
(["file_access", "gcs_access"], "gs://bucket/file.parquet", True, "multiple roles allow gs://"),
42+
(["file_access", "gcs_access"], "s3://bucket/file.parquet", False, "multiple roles without s3_access deny s3://"),
43+
(["file_access", "gcs_access", "s3_access"], "s3://bucket/file.parquet", True, "all protocol roles allow s3://"),
44+
45+
# Restricted role tests (only has access to opteryx.*)
46+
(["restricted"], "file://path/to/file.parquet", False, "restricted cannot read file://"),
47+
(["restricted"], "gs://bucket/path/file.parquet", False, "restricted cannot read gs://"),
48+
(["restricted"], "s3://bucket/path/file.parquet", False, "restricted cannot read s3://"),
49+
(["restricted"], "opteryx.space_missions", True, "restricted can read opteryx.*"),
50+
(["restricted"], "opteryx.schema.table", True, "restricted can read nested opteryx paths"),
51+
52+
# Opteryx role tests (default role with access to everything)
53+
(["opteryx"], "file://path/to/file.parquet", True, "opteryx role can read file://"),
54+
(["opteryx"], "gs://bucket/path/file.parquet", True, "opteryx role can read gs://"),
55+
(["opteryx"], "s3://bucket/path/file.parquet", True, "opteryx role can read s3://"),
56+
(["opteryx"], "any.table.name", True, "opteryx role can read any table"),
57+
58+
# Combined restricted + protocol access
59+
(["restricted", "file_access"], "file://path/file.parquet", True, "restricted+file_access can read file://"),
60+
(["restricted", "file_access"], "opteryx.table", True, "restricted+file_access can read opteryx.*"),
61+
(["restricted", "file_access"], "gs://bucket/file.parquet", False, "restricted+file_access cannot read gs://"),
62+
63+
# No roles
64+
([], "file://path/to/file.parquet", False, "no roles cannot read file://"),
65+
([], "gs://bucket/path/file.parquet", False, "no roles cannot read gs://"),
66+
([], "s3://bucket/path/file.parquet", False, "no roles cannot read s3://"),
67+
([], "opteryx.table", False, "no roles cannot read any table"),
68+
69+
# Edge cases with protocol-like table names
70+
(["restricted"], "file_like.table", False, "restricted cannot read file_like table without proper prefix"),
71+
(["restricted"], "gs_data.table", False, "restricted cannot read gs_data table without proper prefix"),
72+
(["file_access"], "file_data.table", False, "file_access only matches file:// protocol"),
73+
74+
# Wildcard paths with protocol prefixes
75+
(["gcs_access"], "gs://bucket/*/data.parquet", True, "gcs_access can read gs:// with wildcard in middle"),
76+
(["s3_access"], "s3://bucket/path/file[0-9].parquet", True, "s3_access can read s3:// with range wildcards"),
77+
(["file_access"], "file://data/*.csv", True, "file_access can read file:// with wildcards"),
78+
]
79+
80+
81+
@pytest.mark.parametrize("roles, table, expected, description", test_cases)
82+
def test_protocol_prefix_permissions(roles, table, expected, description):
83+
"""Test that protocol prefix permissions work correctly"""
84+
result = can_read_table(roles, table)
85+
assert result == expected, f"{description}: expected {expected}, got {result}"
86+
87+
88+
if __name__ == "__main__": # pragma: no cover
89+
import time
90+
import shutil
91+
92+
start_suite = time.monotonic_ns()
93+
passed = 0
94+
failed = 0
95+
96+
width = shutil.get_terminal_size((80, 20))[0] - 15
97+
98+
print(f"RUNNING BATTERY OF {len(test_cases)} PROTOCOL PREFIX PERMISSION TESTS")
99+
for index, (roles, table, expected, description) in enumerate(test_cases):
100+
print(
101+
f"\033[38;2;255;184;108m{(index + 1):04}\033[0m"
102+
f" {', '.join(roles) if roles else 'no roles':35.35} {table:30.30}",
103+
end="",
104+
flush=True,
105+
)
106+
try:
107+
start = time.monotonic_ns()
108+
test_protocol_prefix_permissions(roles, table, expected, description)
109+
print(
110+
f"\033[38;2;26;185;67m{str(int((time.monotonic_ns() - start)/1e6)).rjust(4)}ms\033[0m ✅",
111+
end="",
112+
)
113+
passed += 1
114+
if failed > 0:
115+
print(" \033[0;31m*\033[0m")
116+
else:
117+
print()
118+
except Exception as err:
119+
print(f"\033[0;31m{str(int((time.monotonic_ns() - start)/1e6)).rjust(4)}ms ❌ *\033[0m")
120+
print(f"> {description}")
121+
print(f"> Roles: {roles}, Table: {table}, Expected: {expected}")
122+
print(f"> Error: {err}")
123+
failed += 1
124+
125+
print("--- ✅ \033[0;32mdone\033[0m")
126+
127+
if failed > 0:
128+
print("\n\033[38;2;139;233;253m\033[3mFAILURES\033[0m")
129+
130+
print(
131+
f"\n\033[38;2;139;233;253m\033[3mCOMPLETE\033[0m ({((time.monotonic_ns() - start_suite) / 1e9):.2f} seconds)\n"
132+
f" \033[38;2;26;185;67m{passed} passed ({(passed * 100) // (passed + failed) if (passed + failed) > 0 else 0}%)\033[0m\n"
133+
f" \033[38;2;255;121;198m{failed} failed\033[0m"
134+
)

0 commit comments

Comments
 (0)