Skip to content

Commit 4d142e3

Browse files
Add server validation against whitelist (#93)
* Add Whitelist validation by reading the whitelist file on origin/main
1 parent d0107d1 commit 4d142e3

File tree

17 files changed

+363
-27
lines changed

17 files changed

+363
-27
lines changed

pyproject.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,13 @@ addopts = """
6666
"""
6767
markers = """
6868
requires_local_server: mark a test which requires locally hosting the config server
69+
use_threading: mark a test which will kickoff the background thread in WhitelistFetcher
6970
"""
7071

7172
# https://iscinumpy.gitlab.io/post/bound-version-constraints/#watch-for-warnings
72-
filterwarnings = "error"
73+
filterwarnings = ["error",
74+
"ignore:Exception ignored in.*sqlite3.Connection:pytest.PytestUnraisableExceptionWarning"] #See https://github.com/pytest-dev/pytest-cov/issues/694
75+
7376
# Doctest python code in docs, python code in src docstrings, test functions in tests
7477
testpaths = "docs src tests"
7578

src/daq_config_server/app.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99
from fastapi.responses import JSONResponse, Response
1010
from starlette import status
1111

12-
from daq_config_server.constants import ENDPOINTS
12+
from daq_config_server.constants import (
13+
ENDPOINTS,
14+
)
15+
from daq_config_server.whitelist import get_whitelist
1316

1417
app = FastAPI(
1518
title="DAQ config server",
@@ -33,6 +36,13 @@ class ValidAcceptHeaders(StrEnum):
3336
RAW_BYTES = "application/octet-stream"
3437

3538

39+
def path_is_whitelisted(file_path: Path) -> bool:
40+
whitelist = get_whitelist()
41+
return file_path in whitelist.whitelist_files or any(
42+
file_path.is_relative_to(dir) for dir in whitelist.whitelist_dirs
43+
)
44+
45+
3646
@app.get(
3747
ENDPOINTS.CONFIG + "/{file_path:path}",
3848
responses={
@@ -69,7 +79,24 @@ def get_configuration(
6979
):
7080
"""
7181
Read a file and return its contents in a format specified by the accept header.
82+
83+
Check the file against the whitelist on the current main branch on GitHub.
7284
"""
85+
86+
if not file_path.is_absolute():
87+
raise HTTPException(
88+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
89+
detail=(f"Requested filepath {file_path} must be an absolute path"),
90+
)
91+
92+
if not path_is_whitelisted(file_path):
93+
raise HTTPException(
94+
status_code=status.HTTP_403_FORBIDDEN,
95+
detail=f"{file_path} is not a whitelisted file. Please make sure it \
96+
exists in https://raw.githubusercontent.com/DiamondLightSource/\
97+
daq-config-server/refs/heads/main/whitelist.yaml",
98+
)
99+
73100
if not file_path.is_file():
74101
raise HTTPException(
75102
status_code=status.HTTP_404_NOT_FOUND,

src/daq_config_server/constants.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,7 @@
55
class ENDPOINTS:
66
CONFIG = "/config"
77
HEALTH = "/healthz"
8+
9+
10+
WHITELIST_REFRESH_RATE_S = 300
11+
WHITELIST_URL = "https://raw.githubusercontent.com/DiamondLightSource/daq-config-server/refs/heads/main/whitelist.yaml"

src/daq_config_server/log.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import logging
2+
3+
4+
# See https://github.com/DiamondLightSource/daq-config-server/issues/73
5+
# for making the logging configurable
6+
def get_default_logger(name: str = __name__) -> logging.Logger:
7+
logger = logging.getLogger(name)
8+
logger.setLevel(logging.INFO)
9+
10+
# Prevent adding handlers multiple times
11+
if not logger.handlers:
12+
console_handler = logging.StreamHandler()
13+
formatter = logging.Formatter(
14+
"%(asctime)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
15+
)
16+
console_handler.setFormatter(formatter)
17+
logger.addHandler(console_handler)
18+
19+
return logger
20+
21+
22+
# For now use a basic console-writing logger. Integrate properly with kubernetes in the
23+
# future
24+
LOGGER = get_default_logger("daq-config-server")
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from ._utils import make_test_response
2+
3+
__all__ = ["make_test_response"]
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from unittest.mock import MagicMock
2+
3+
from httpx import Response
4+
from requests import RequestException
5+
6+
from daq_config_server.app import ValidAcceptHeaders
7+
8+
9+
def make_test_response(
10+
content: str,
11+
status_code: int = 200,
12+
raise_exc: type[RequestException] | None = None,
13+
json_value: str | None = None,
14+
content_type: ValidAcceptHeaders = ValidAcceptHeaders.PLAIN_TEXT,
15+
):
16+
r = Response(
17+
json=json_value,
18+
status_code=status_code,
19+
headers={"content-type": content_type},
20+
content=content,
21+
)
22+
r.raise_for_status = MagicMock()
23+
24+
if raise_exc:
25+
r.raise_for_status.side_effect = raise_exc
26+
else:
27+
r.raise_for_status.return_value = None
28+
return r

src/daq_config_server/whitelist.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import atexit
2+
import time
3+
from functools import cache
4+
from pathlib import Path
5+
from threading import Event, Thread
6+
7+
import requests
8+
import yaml
9+
10+
from daq_config_server.constants import (
11+
WHITELIST_REFRESH_RATE_S,
12+
WHITELIST_URL,
13+
)
14+
from daq_config_server.log import LOGGER
15+
16+
17+
class WhitelistFetcher:
18+
"""Read the whitelist from the main branch of this repo from github, and check for
19+
updates every 5 minutes. This lets the deployed server see updates to the whitelist
20+
without requiring a new release or a restart"""
21+
22+
def __init__(self):
23+
self._initial_load()
24+
self._stop = Event()
25+
self.update_in_background_thread = Thread(
26+
target=self._periodically_update_whitelist, daemon=True
27+
)
28+
self.update_in_background_thread.start()
29+
30+
def _fetch_and_update(self):
31+
response = requests.get(WHITELIST_URL)
32+
response.raise_for_status()
33+
data = yaml.safe_load(response.text)
34+
self.whitelist_files = {Path(p) for p in data.get("whitelist_files")}
35+
self.whitelist_dirs = {Path(p) for p in data.get("whitelist_dirs")}
36+
37+
def _initial_load(self):
38+
try:
39+
self._fetch_and_update()
40+
LOGGER.info("Successfully read whitelist from GitHub.")
41+
except Exception as e:
42+
LOGGER.error(f"Initial whitelist load failed: {e}")
43+
raise RuntimeError("Failed to load whitelist during initialization.") from e
44+
45+
def _periodically_update_whitelist(self):
46+
while not self._stop.is_set():
47+
time.sleep(WHITELIST_REFRESH_RATE_S)
48+
try:
49+
self._fetch_and_update()
50+
LOGGER.info("Whitelist updated successfully.")
51+
except Exception as e:
52+
LOGGER.error(f"Failed to update whitelist: {e}")
53+
54+
def stop(self):
55+
self._stop.set()
56+
self.update_in_background_thread.join(timeout=1)
57+
58+
59+
@cache
60+
def get_whitelist() -> WhitelistFetcher:
61+
fetcher = WhitelistFetcher()
62+
atexit.register(fetcher.stop)
63+
return fetcher

tests/constants.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,24 @@
77
TEST_BAD_JSON_PATH = TEST_DATA_DIR_PATH.joinpath("test_bad_json")
88

99
TEST_GOOD_JSON_PATH = TEST_DATA_DIR_PATH.joinpath("test_good_json.json")
10+
11+
TEST_FILE_NOT_ON_WHITELIST_PATH = TEST_DATA_DIR_PATH.joinpath(
12+
"test_file_not_on_whitelist.json"
13+
)
14+
15+
TEST_FILE_IN_GOOD_DIR = TEST_DATA_DIR_PATH.joinpath("good_dir/test_file")
16+
17+
TEST_FILE_IN_BAD_DIR = TEST_DATA_DIR_PATH.joinpath("bad_dir/test_file")
18+
19+
TEST_INVALID_FILE_PATH = TEST_DATA_DIR_PATH.joinpath("invalid_file")
20+
21+
TEST_WHITELIST_RESPONSE = f"""\
22+
whitelist_files:
23+
- {TEST_GOOD_JSON_PATH}
24+
- {TEST_BAD_JSON_PATH}
25+
- {TEST_BEAMLINE_PARAMETERS_PATH}
26+
- {TEST_INVALID_FILE_PATH}
27+
28+
whitelist_dirs:
29+
- {TEST_DATA_DIR_PATH.joinpath("good_dir")}
30+
"""

tests/system_tests/test_client.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,27 @@ def test_bad_json_gives_http_error_with_details(server: ConfigServer):
9090
dict[Any, Any],
9191
)
9292
server._log.error.assert_called_once_with(expected_detail)
93+
94+
95+
# See https://github.com/DiamondLightSource/daq-config-server/issues/96 for writing
96+
# these tests
97+
98+
99+
@pytest.mark.xfail
100+
@pytest.mark.requires_local_server
101+
def test_request_with_file_not_on_whitelist(): ...
102+
103+
104+
@pytest.mark.xfail
105+
@pytest.mark.requires_local_server
106+
def test_request_with_file_on_whitelist(): ...
107+
108+
109+
@pytest.mark.xfail
110+
@pytest.mark.requires_local_server
111+
def test_request_with_file_in_whitelist_dirs(): ...
112+
113+
114+
@pytest.mark.xfail
115+
@pytest.mark.requires_local_server
116+
def test_request_with_file_not_in_whitelist_dirs(): ...

tests/test_data/bad_dir/test_file

Whitespace-only changes.

0 commit comments

Comments
 (0)