Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 31 additions & 5 deletions checks/remotesettings/attachments_integrity.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

import asyncio
import logging
import math

import aiohttp

Expand Down Expand Up @@ -68,17 +67,44 @@ async def run(server: str, slice_percent: tuple[int, int] = (0, 100)) -> CheckRe

# For each record that has an attachment, check the attachment content.
attachments = []
for entry, changeset in zip(entries, results):
total_size = 0
for changeset in results:
records = changeset["changes"]
for record in records:
if "attachment" not in record:
continue
attachment = record["attachment"]
attachment["location"] = base_url + attachment["location"]
attachments.append(attachment)

lower_idx = math.floor(slice_percent[0] / 100.0 * len(attachments))
upper_idx = math.ceil(slice_percent[1] / 100.0 * len(attachments))
total_size += attachment["size"]

# Spread the load of attachments integrity check based on size.
# Otherwise some slices may take much longer to complete than others.

# We sort by attachment size descending. It's not mandatory, but
# it gives us more observability of what slices are taking longer
# to complete (big files or lots of small files).
attachments.sort(key=lambda att: att["size"], reverse=True)
# Compute the slice using percent of total size.
slice_lower = (slice_percent[0] / 100.0) * total_size
slice_upper = (slice_percent[1] / 100.0) * total_size
lower_idx = 0
accumulated_size = 0
for attachment in attachments:
if accumulated_size >= slice_lower:
break
accumulated_size += attachment["size"]
lower_idx += 1
upper_idx = lower_idx
for attachment in attachments[lower_idx:]:
accumulated_size += attachment["size"]
upper_idx += 1
if accumulated_size >= slice_upper:
break
print(
f"Total size {total_size} Slice {slice_percent} => size {slice_lower:.0f}-{slice_upper:.0f} bytes"
)
print(f"Attachments slice indexes: {lower_idx}-{upper_idx} of {len(attachments)}")
sliced = attachments[lower_idx:upper_idx]

futures = [test_attachment(attachment) for attachment in sliced]
Expand Down
68 changes: 58 additions & 10 deletions tests/checks/remotesettings/test_attachments_integrity.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ async def test_negative(mock_aioresponses, no_sleep):
"location": "file1.jpg",
},
},
{"id": "efg", "attachment": {"location": "missing.jpg"}},
{"id": "efg", "attachment": {"location": "missing.jpg", "size": 7}},
{
"id": "ijk",
"attachment": {"size": 10, "hash": "foo", "location": "file2.jpg"},
Expand All @@ -113,20 +113,20 @@ async def test_negative(mock_aioresponses, no_sleep):
assert data == {
"bad": [
{
"error": "size differ (5!=7)",
"url": "http://cdn/file1.jpg",
},
{
"error": "Connection refused: GET http://cdn/missing.jpg",
"url": "http://cdn/missing.jpg",
"error": "timeout",
"url": "http://cdn/file3.jpg",
},
{
"error": "hash differ (bf2cb58a68f684d95a3b78ef8f661c9a4e5b09e82cc8f9cc88cce90528caeb27!=foo)",
"url": "http://cdn/file2.jpg",
},
{
"error": "timeout",
"url": "http://cdn/file3.jpg",
"error": "size differ (5!=7)",
"url": "http://cdn/file1.jpg",
},
{
"error": "Connection refused: GET http://cdn/missing.jpg",
"url": "http://cdn/missing.jpg",
},
],
"checked": 4,
Expand Down Expand Up @@ -169,7 +169,7 @@ async def test_urls_slicing(
records_url,
payload={
"changes": [
{"id": f"id{i}", "attachment": {"location": f"file{i}.jpg"}}
{"id": f"id{i}", "attachment": {"location": f"file{i}.jpg", "size": 10}}
for i in range(100)
]
},
Expand All @@ -183,3 +183,51 @@ async def test_urls_slicing(
calls = mocked.call_args_list
assert calls[0][0][0]["location"] == f"http://cdn/file{expected_lower}.jpg"
assert calls[-1][0][0]["location"] == f"http://cdn/file{expected_upper}.jpg"


async def test_urls_slicing_by_size(mock_aioresponses):
server_url = "http://fake.local/v1"
mock_aioresponses.get(
server_url + "/",
payload={"capabilities": {"attachments": {"base_url": "http://cdn/"}}},
)
changes_url = server_url + CHANGESET_URL.format("monitor", "changes")
mock_aioresponses.get(
changes_url,
payload={
"changes": [
{"id": "abc", "bucket": "bid", "collection": "cid", "last_modified": 42}
]
},
)
records_url = server_url + CHANGESET_URL.format("bid", "cid") + "?_expected=42"
mock_aioresponses.get(
records_url,
payload={
"changes": [
{"id": "id-0", "attachment": {"location": "file-big.jpg", "size": 100}},
{
"id": "id-1",
"attachment": {"location": "file-small1.jpg", "size": 10},
},
{
"id": "id-2",
"attachment": {"location": "file-small2.jpg", "size": 10},
},
{
"id": "id-3",
"attachment": {"location": "file-small3.jpg", "size": 10},
},
]
},
)

with mock.patch(
"checks.remotesettings.attachments_integrity.test_attachment"
) as mocked:
mocked.return_value = {}, True
await run(server_url, slice_percent=(0, 50))
calls = mocked.call_args_list
# Only the big file should be in the first 50% slice.
assert len(calls) == 1
assert calls[0][0][0]["location"] == "http://cdn/file-big.jpg"
Loading