Skip to content

Commit 85a65d2

Browse files
authored
Merge pull request #179 from ral-facilities/DSEGOG-412-Matching-Shot-numbers
Dsegog 412 matching shot numbers
2 parents 4755efa + 6adec64 commit 85a65d2

File tree

8 files changed

+316
-132
lines changed

8 files changed

+316
-132
lines changed

operationsgateway_api/src/records/ingestion/partial_import_checks.py

Lines changed: 21 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -40,56 +40,42 @@ def metadata_checks(self):
4040
ingested_metadata = self.ingested_record.metadata
4141
stored_metadata = self.stored_record.metadata
4242

43+
# Match timestamp with or without timezone info
4344
try:
44-
time_match = (ingested_metadata.timestamp).replace(
45+
time_match = ingested_metadata.timestamp.replace(
4546
tzinfo=None,
4647
) == stored_metadata.timestamp.replace(tzinfo=None)
4748
except Exception:
48-
time_match = (ingested_metadata.timestamp) == stored_metadata.timestamp
49+
time_match = ingested_metadata.timestamp == stored_metadata.timestamp
4950

50-
epac_match = (
51-
ingested_metadata.epac_ops_data_version
52-
== stored_metadata.epac_ops_data_version
53-
)
54-
shot_match = ingested_metadata.shotnum == stored_metadata.shotnum
5551
area_match = ingested_metadata.active_area == stored_metadata.active_area
5652
experiment_match = (
5753
ingested_metadata.active_experiment == stored_metadata.active_experiment
5854
)
5955

60-
if time_match and epac_match and shot_match and area_match and experiment_match:
56+
shot_match = ingested_metadata.shotnum == stored_metadata.shotnum
57+
58+
# Reject if shotnum matches but timestamp doesn't
59+
if shot_match and not time_match:
60+
raise RejectRecordError("a record with this shotnum already exists")
61+
62+
# Accept merge if everything matches
63+
if shot_match and time_match and area_match and experiment_match:
6164
log.info("record metadata matches existing record perfectly")
6265
return "accept_merge"
6366

64-
elif (
65-
time_match
66-
and not epac_match
67-
and not shot_match
68-
and not area_match
69-
and not experiment_match
70-
):
71-
raise RejectRecordError("timestamp matches, other metadata does not")
72-
73-
elif (
74-
shot_match
75-
and not time_match
76-
and not epac_match
77-
and not area_match
78-
and not experiment_match
79-
):
80-
raise RejectRecordError("shotnum matches, other metadata does not")
81-
82-
elif not time_match and not shot_match:
67+
# Accept as new record if both timestamp and shotnum are unique
68+
if not time_match and not shot_match:
8369
return "accept_new"
8470

85-
else:
86-
log.error(
87-
"Metadata for file being ingested: %s, metadata for file stored in"
88-
" database: %s",
89-
ingested_metadata,
90-
stored_metadata,
91-
)
92-
raise RejectRecordError("inconsistent metadata")
71+
# All other cases are inconsistent
72+
log.error(
73+
"Metadata for file being ingested: %s, metadata for file "
74+
"stored in database: %s",
75+
ingested_metadata,
76+
stored_metadata,
77+
)
78+
raise RejectRecordError("inconsistent metadata")
9379

9480
def channel_checks(
9581
self,

operationsgateway_api/src/records/record.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ async def update(self) -> None:
9797

9898
for metadata_key, value in self.record.metadata.model_dump(
9999
exclude_unset=True,
100+
exclude={"epac_ops_data_version"},
100101
).items():
101102
await MongoDBInterface.update_one(
102103
"records",
@@ -149,6 +150,30 @@ async def find_existing_record(self) -> Union[RecordModel, None]:
149150
else:
150151
return None
151152

153+
async def find_record_by_shotnum(self) -> Union[RecordModel, None]:
154+
"""
155+
Searches the MongoDB 'records' collection for a record with the given
156+
shot number. Returns a `RecordModel` if found, otherwise returns `None`.
157+
"""
158+
159+
log.debug(
160+
"Querying MongoDB to see if a shotnum is already stored in the database",
161+
)
162+
163+
record_dict = await MongoDBInterface.find_one(
164+
"records",
165+
filter_={"metadata.shotnum": self.record.metadata.shotnum},
166+
)
167+
168+
if record_dict:
169+
return RecordModel(
170+
_id=record_dict["_id"],
171+
metadata=record_dict["metadata"],
172+
channels=record_dict["channels"],
173+
)
174+
else:
175+
return None
176+
152177
@staticmethod
153178
async def find_record(
154179
conditions: Dict[str, Any],

operationsgateway_api/src/routes/ingest_data.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,10 @@ async def submit_hdf(
109109
) = await hdf_handler.extract_data()
110110

111111
record_original = Record(record_data)
112+
# a record is deemed existing in the db if the timestamp or shotnum exists
112113
stored_record = await record_original.find_existing_record()
114+
if stored_record is None and record_original.record.metadata.shotnum is not None:
115+
stored_record = await record_original.find_record_by_shotnum()
113116

114117
file_checker = FileChecks(record_data)
115118
warning = file_checker.epac_data_version_checks()

test/endpoints/test_submit_hdf.py

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -271,37 +271,6 @@ async def test_hdf_rejects(
271271
assert test_response.json() == expected_response
272272
assert test_response.status_code == 400
273273

274-
@pytest.mark.asyncio
275-
async def test_partial_import_record_reject(
276-
self,
277-
reset_record_storage,
278-
test_app: TestClient,
279-
login_and_get_token,
280-
):
281-
_ = await create_test_hdf_file()
282-
283-
test_file = "test.h5"
284-
files = {"file": (test_file, open(test_file, "rb"))}
285-
286-
test_app.post(
287-
"/submit/hdf",
288-
headers={"Authorization": f"Bearer {login_and_get_token}"},
289-
files=files,
290-
)
291-
292-
_ = await create_test_hdf_file(shotnum=["valid", "missing"])
293-
294-
files = {"file": (test_file, open(test_file, "rb"))}
295-
296-
test_response = test_app.post(
297-
"/submit/hdf",
298-
headers={"Authorization": f"Bearer {login_and_get_token}"},
299-
files=files,
300-
)
301-
302-
assert test_response.json() == {"detail": "inconsistent metadata"}
303-
assert test_response.status_code == 400
304-
305274
@pytest.mark.asyncio
306275
async def test_channel_all_fail(
307276
self,

test/records/ingestion/create_test_hdf.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,10 @@ async def create_test_hdf_file( # noqa: C901
127127
if shotnum[1] == "exists":
128128
if shotnum[0] == "invalid":
129129
record.attrs.create("shotnum", "string")
130-
else:
130+
elif shotnum[0] == "valid":
131131
record.attrs.create("shotnum", 366272, dtype="u8")
132+
elif shotnum[0]:
133+
record.attrs.create("shotnum", int(shotnum[0]), dtype="u8")
132134
if active_area[1] == "exists":
133135
record.attrs.create("active_area", active_area[0])
134136
if active_experiment[1] == "exists":

test/records/ingestion/test_channel.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,7 @@ async def test_required_attribute_checks(
403403
with h5py.File("test.h5", "w") as f:
404404
f.attrs.create("epac_ops_data_version", "1.2")
405405
record = f["/"]
406-
record.attrs.create("timestamp", "2020-04-07 14:28:16")
406+
record.attrs.create("timestamp", "2020-04-07T14:28:16Z")
407407
record.attrs.create("shotnum", 366272, dtype="u8")
408408
record.attrs.create("active_area", "ea1")
409409
record.attrs.create("active_experiment", "90097341")
@@ -564,7 +564,7 @@ async def test_optional_attribute_vector(
564564
with h5py.File("test.h5", "w") as f:
565565
f.attrs.create("epac_ops_data_version", "1.2")
566566
record = f["/"]
567-
record.attrs.create("timestamp", "2020-04-07 14:28:16")
567+
record.attrs.create("timestamp", "2020-04-07T14:28:16Z")
568568
record.attrs.create("shotnum", 366272, dtype="u8")
569569
record.attrs.create("active_area", "ea1")
570570
record.attrs.create("active_experiment", "90097341")
@@ -724,7 +724,7 @@ async def test_dataset_checks_float_image(self, remove_hdf_file: None):
724724
with h5py.File("test.h5", "w") as f:
725725
f.attrs.create("epac_ops_data_version", "1.2")
726726
record = f["/"]
727-
record.attrs.create("timestamp", "2020-04-07 14:28:16")
727+
record.attrs.create("timestamp", "2020-04-07T14:28:16Z")
728728
record.attrs.create("shotnum", 366272, dtype="u8")
729729
record.attrs.create("active_area", "ea1")
730730
record.attrs.create("active_experiment", "90097341")

test/records/ingestion/test_hdf_handler.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import h5py
44
import numpy as np
5-
from pydantic import ValidationError
65
import pytest
76

87
from operationsgateway_api.src.exceptions import HDFDataExtractionError
@@ -154,7 +153,7 @@ async def test_extract_channel_unexpected_group(
154153
with h5py.File("test.h5", "w") as f:
155154
f.attrs.create("epac_ops_data_version", "1.2")
156155
record = f["/"]
157-
record.attrs.create("timestamp", "2020-04-07 14:28:16")
156+
record.attrs.create("timestamp", "2020-04-07T14:28:16Z")
158157
record.attrs.create("shotnum", 366272, dtype="u8")
159158
record.attrs.create("active_area", "ea1")
160159
record.attrs.create("active_experiment", "90097341")
@@ -212,7 +211,7 @@ async def test_extract_channel_data_missing(
212211
with h5py.File("test.h5", "w") as f:
213212
f.attrs.create("epac_ops_data_version", "1.2")
214213
record = f["/"]
215-
record.attrs.create("timestamp", "2020-04-07 14:28:16")
214+
record.attrs.create("timestamp", "2020-04-07T14:28:16Z")
216215
record.attrs.create("shotnum", 366272, dtype="u8")
217216
record.attrs.create("active_area", "ea1")
218217
record.attrs.create("active_experiment", "90097341")

0 commit comments

Comments
 (0)