|
| 1 | +import logging |
1 | 2 | import os |
| 3 | +import re |
| 4 | +import string as mstring |
2 | 5 |
|
| 6 | +import pytest |
3 | 7 | import requests |
4 | 8 |
|
5 | | -from cads_adaptors.adaptors import mars |
| 9 | +from cads_adaptors.adaptors import Context, mars, multi |
| 10 | +from cads_adaptors.exceptions import InvalidRequest |
6 | 11 |
|
7 | 12 | TEST_GRIB_FILE = "https://sites.ecmwf.int/repository/earthkit-data/test-data/era5-levels-members.grib" |
| 13 | +logger = logging.getLogger(__name__) |
| 14 | + |
| 15 | +WHITESPACE_CHARS = set(" \t") |
| 16 | +EXTENDED_ASCII_CHARS = set(chr(i) for i in range(256)) |
| 17 | + |
| 18 | +VALID_KEY_CHARS = ( |
| 19 | + set(x for x in EXTENDED_ASCII_CHARS if re.match(r"\S", x)) - set(mstring.whitespace) |
| 20 | +) | {" "} |
| 21 | +INVALID_KEY_CHARS = set(mstring.whitespace) - WHITESPACE_CHARS |
| 22 | +VALID_VALUE_CHARS = ( |
| 23 | + set(x for x in EXTENDED_ASCII_CHARS if re.match(r"\S", x)) - set(mstring.whitespace) |
| 24 | +) | {" "} |
| 25 | +INVALID_VALUE_CHARS = set(mstring.whitespace) - WHITESPACE_CHARS |
8 | 26 |
|
9 | 27 |
|
10 | 28 | def test_get_mars_servers(): |
@@ -63,3 +81,163 @@ def test_convert_format(tmp_path, monkeypatch): |
63 | 81 | _, out_ext = os.path.splitext(converted_files[0]) |
64 | 82 | assert out_ext == ".nc" |
65 | 83 | assert "/test_subdir/" in converted_files[0] |
| 84 | + |
| 85 | + |
| 86 | +def test_schema_null(): |
| 87 | + """Test that null request inputs don't pass the schema.""" |
| 88 | + # Not a dict |
| 89 | + _check_schema_fail("", "request: '' is not of type 'dict'") |
| 90 | + |
| 91 | + # Null dict |
| 92 | + _check_schema_fail({}, "request: {} should be non-empty") |
| 93 | + |
| 94 | + # Null/whitespace keys and values |
| 95 | + for string in [""] + sorted(WHITESPACE_CHARS): |
| 96 | + string_repr = repr(string).strip("'") |
| 97 | + |
| 98 | + # Null key |
| 99 | + _check_schema_fail( |
| 100 | + {string: "1"}, f"request: '{string_repr}' is an invalid key name" |
| 101 | + ) |
| 102 | + |
| 103 | + # Null value |
| 104 | + _check_schema_fail( |
| 105 | + {"param": string}, f"request['param'][0]: invalid value: '{string}'" |
| 106 | + ) |
| 107 | + |
| 108 | + |
| 109 | +def test_schema_whitespace(): |
| 110 | + """Test the presence of whitespace (space/tab) in keys and values.""" |
| 111 | + for badchar in sorted(WHITESPACE_CHARS): |
| 112 | + # Test them at the beginning, middle and end of the string |
| 113 | + for pos in [0, 1, 2]: |
| 114 | + string = "ab" |
| 115 | + string = string[:pos] + badchar + string[pos:] |
| 116 | + string_repr = repr(string).strip("'") |
| 117 | + |
| 118 | + # Tabs are allowed at the start and end of the string, but not in |
| 119 | + # the middle |
| 120 | + if pos in [0, 2] or badchar != "\t": |
| 121 | + _check_schema_pass({string: "1"}, {string: ["1"]}) |
| 122 | + _check_schema_pass({"param": string}, {"param": [string]}) |
| 123 | + else: |
| 124 | + _check_schema_fail( |
| 125 | + {string: "1"}, f"request: '{string_repr}' is an invalid key name" |
| 126 | + ) |
| 127 | + _check_schema_fail( |
| 128 | + {"param": string}, f"request['param'][0]: invalid value: '{string}'" |
| 129 | + ) |
| 130 | + |
| 131 | + |
| 132 | +def test_schema_invalid_key_chars(): |
| 133 | + """Test that invalid key characters don't pass the schema.""" |
| 134 | + for badchar in sorted(INVALID_KEY_CHARS): |
| 135 | + # Test them at the beginning, middle and end of the string |
| 136 | + for pos in [0, 1, 2]: |
| 137 | + string = "ab" |
| 138 | + string = string[:pos] + badchar + string[pos:] |
| 139 | + string_repr = repr(string)[1:-1] |
| 140 | + |
| 141 | + # Check the request is rejected because of the bad character |
| 142 | + _check_schema_fail( |
| 143 | + {string: "1"}, f"request: '{string_repr}' is an invalid key name" |
| 144 | + ) |
| 145 | + |
| 146 | + # Check we can allow the character with config |
| 147 | + _check_schema_pass( |
| 148 | + {string: "1"}, {string: ["1"]}, key_regex=re.escape(string) |
| 149 | + ) |
| 150 | + |
| 151 | + |
| 152 | +def test_schema_invalid_value_chars(): |
| 153 | + """Test that invalid value characters don't pass the schema.""" |
| 154 | + for badchar in sorted(INVALID_VALUE_CHARS): |
| 155 | + # Test them at the beginning, middle and end of the string |
| 156 | + for pos in [0, 1, 2]: |
| 157 | + string = "ab" |
| 158 | + string = string[:pos] + badchar + string[pos:] |
| 159 | + |
| 160 | + # Check the request is rejected because of the bad character |
| 161 | + _check_schema_fail( |
| 162 | + {"a": string}, f"request['a'][0]: invalid value: '{string}'" |
| 163 | + ) |
| 164 | + |
| 165 | + # ...but can be allowed by config |
| 166 | + _check_schema_pass( |
| 167 | + {"a": string}, {"a": [string]}, value_regex=re.escape(string) |
| 168 | + ) |
| 169 | + |
| 170 | + |
| 171 | +def test_good_requests(): |
| 172 | + """Check the schema allows a selection of "normal-looking" requests.""" |
| 173 | + _check_schema_pass({"a": 1}, {"a": ["1"]}) |
| 174 | + _check_schema_pass({"A": "a"}, {"A": ["a"]}) |
| 175 | + _check_schema_pass({"0": ["a"]}, {"0": ["a"]}) |
| 176 | + _check_schema_pass({"_": 1}, {"_": ["1"]}) |
| 177 | + _check_schema_pass( |
| 178 | + {" abc ": [3, 2, 1, "foo-bar"], "\txyz\t\t": "3/2/1/foo-bar"}, |
| 179 | + {" abc ": ["3", "2", "1", "foo-bar"], "\txyz\t\t": ["3/2/1/foo-bar"]}, |
| 180 | + ) |
| 181 | + _check_schema_pass( |
| 182 | + {"step": "1/to/24/by/3", "param_FOO": ["152.128", "203.210"]}, |
| 183 | + {"step": ["1/to/24/by/3"], "param_FOO": ["152.128", "203.210"]}, |
| 184 | + ) |
| 185 | + _check_schema_pass( |
| 186 | + {"area": [10, -10.0, -20.1, 10.1]}, {"area": ["10", "-10.0", "-20.1", "10.1"]} |
| 187 | + ) |
| 188 | + _check_schema_pass({"area": "10/-10./-20.1/10.1"}, {"area": ["10/-10./-20.1/10.1"]}) |
| 189 | + _check_schema_pass( |
| 190 | + {"x": ["1E+10", "-1.E-10", ".1E0", "-.1E0", "12.13e45", "-12.13.e-45"]}, |
| 191 | + {"x": ["1E+10", "-1.E-10", ".1E0", "-.1E0", "12.13e45", "-12.13.e-45"]}, |
| 192 | + ) |
| 193 | + kk = "".join(sorted(VALID_KEY_CHARS)) |
| 194 | + vv = "".join(sorted(VALID_VALUE_CHARS)) |
| 195 | + _check_schema_pass({kk: vv}, {kk: [vv]}) |
| 196 | + |
| 197 | + |
| 198 | +def test_schema_duplicates(): |
| 199 | + """Test behaviour with duplicate values in value lists.""" |
| 200 | + # Duplicate values are allowed for area and grid |
| 201 | + _check_schema_pass({"area": [1, 1]}, {"area": ["1", "1"]}) |
| 202 | + _check_schema_pass({"grid": ["1", "1"]}, {"grid": ["1", "1"]}) |
| 203 | + _check_schema_pass({"GriD": [1, 1]}, {"GriD": ["1", "1"]}) |
| 204 | + |
| 205 | + # They're not allowed for other keys |
| 206 | + _check_schema_fail( |
| 207 | + {"param": [1, 1]}, "request['param']: has repeated values in the list, e.g. '1'" |
| 208 | + ) |
| 209 | + |
| 210 | + # ... unless the key is configured to permit them |
| 211 | + _check_schema_pass( |
| 212 | + {"param": [1, 1]}, {"param": ["1", "1"]}, allow_duplicate_values_keys=["param"] |
| 213 | + ) |
| 214 | + |
| 215 | + # ... or they are automatically removed |
| 216 | + _check_schema_pass( |
| 217 | + {"param": [1, 1]}, {"param": ["1"]}, remove_duplicate_values=True |
| 218 | + ) |
| 219 | + |
| 220 | + |
| 221 | +def _check_schema_fail(request, error_msg): |
| 222 | + """Check a request fails the schema with the expected error message.""" |
| 223 | + for cls in [mars.MarsCdsAdaptor, multi.MultiMarsCdsAdaptor]: |
| 224 | + adp = cls(form=None, context=Context(logger=logger)) |
| 225 | + with pytest.raises(InvalidRequest) as einfo: |
| 226 | + output = adp.normalise_request(request) |
| 227 | + assert isinstance(output, dict) |
| 228 | + |
| 229 | + if einfo.value.args[0] != error_msg: |
| 230 | + raise Exception( |
| 231 | + "Schema error message not as expected: " |
| 232 | + f"{einfo.value.args[0]!r} != {error_msg!r}" |
| 233 | + ) |
| 234 | + |
| 235 | + |
| 236 | +def _check_schema_pass(req_in, req_out, **schema_options): |
| 237 | + """Check a request passes the schema and gives the expected output.""" |
| 238 | + for cls in [mars.MarsCdsAdaptor, multi.MultiMarsCdsAdaptor]: |
| 239 | + adp = cls( |
| 240 | + form=None, context=Context(logger=logger), schema_options=schema_options |
| 241 | + ) |
| 242 | + req_mod = adp.normalise_request(req_in) |
| 243 | + assert req_mod == req_out |
0 commit comments