Skip to content

Commit 563027c

Browse files
yeoldegroveNotTheEvilOne
authored andcommitted
add S3ObjectIndex Class
1 parent 71a1b1c commit 563027c

File tree

2 files changed

+142
-1
lines changed

2 files changed

+142
-1
lines changed

src/gardenlinux/s3/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,6 @@
66

77
from .bucket import Bucket
88
from .s3_artifacts import S3Artifacts
9+
from .s3_object_index import S3ObjectIndex
910

10-
__all__ = ["Bucket", "S3Artifacts"]
11+
__all__ = ["Bucket", "S3Artifacts", "S3ObjectIndex"]
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
# -*- coding: utf-8 -*-
2+
3+
"""
4+
S3 object index with flavors filtering
5+
"""
6+
7+
import base64
8+
import json
9+
import logging
10+
import os
11+
import subprocess
12+
import time
13+
import yaml
14+
from typing import Any, Optional
15+
16+
from ..flavors.parser import Parser
17+
from ..logger import LoggerSetup
18+
from .bucket import Bucket
19+
20+
21+
class S3ObjectIndex(object):
22+
"""
23+
S3 object index class with flavors filtering capabilities
24+
25+
:author: Garden Linux Maintainers
26+
:copyright: Copyright 2024 SAP SE
27+
:package: gardenlinux
28+
:subpackage: s3
29+
:since: 0.9.0
30+
:license: https://www.apache.org/licenses/LICENSE-2.0
31+
Apache License, Version 2.0
32+
"""
33+
34+
def __init__(
35+
self,
36+
bucket_name: str,
37+
endpoint_url: Optional[str] = None,
38+
s3_resource_config: Optional[dict[str, Any]] = None,
39+
logger: Optional[logging.Logger] = None,
40+
):
41+
"""
42+
Constructor __init__(S3ObjectIndex)
43+
44+
:param bucket_name: S3 bucket name
45+
:param endpoint_url: S3 endpoint URL
46+
:param s3_resource_config: Additional boto3 S3 config values
47+
:param logger: Logger instance
48+
49+
:since: 0.9.0
50+
"""
51+
52+
if logger is None or not logger.hasHandlers():
53+
logger = LoggerSetup.get_logger("gardenlinux.s3")
54+
55+
self._bucket = Bucket(bucket_name, endpoint_url, s3_resource_config)
56+
self._logger = logger
57+
58+
def get_index(
59+
self,
60+
prefix: str,
61+
cache_file: Optional[str] = None,
62+
cache_ttl: int = 3600,
63+
) -> dict[str, Any]:
64+
"""
65+
Get and cache S3 objects with an indexed list of objects.
66+
67+
:param prefix: Prefix for S3 objects
68+
:param cache_file: Path to cache file (optional, enables caching when provided)
69+
:param cache_ttl: Cache time-to-live in seconds
70+
71+
:returns: Dictionary containing 'index' and 'artifacts' keys
72+
73+
:since: 0.9.0
74+
"""
75+
76+
self._logger.debug(f"Getting object index for prefix: {prefix}")
77+
78+
# Fetch directly if no caching
79+
if cache_file is None:
80+
artifacts = [
81+
s3_object.key
82+
for s3_object in self._bucket.objects.filter(Prefix=prefix).all()
83+
]
84+
self._logger.debug(f"Fetched {len(artifacts)} artifacts without caching")
85+
return {"index": self._build_index(artifacts), "artifacts": artifacts}
86+
87+
# Check cache
88+
index_file = cache_file + ".index.json"
89+
if (
90+
os.path.exists(cache_file)
91+
and os.path.exists(index_file)
92+
and time.time() - os.path.getmtime(cache_file) < cache_ttl
93+
):
94+
try:
95+
with open(cache_file, "r") as f:
96+
artifacts = json.load(f)
97+
with open(index_file, "r") as f:
98+
index = json.load(f)
99+
self._logger.debug("Using cached object index")
100+
return {"index": index, "artifacts": artifacts}
101+
except (json.JSONDecodeError, IOError):
102+
self._logger.warning("Cache files corrupted, fetching fresh data")
103+
104+
# Fetch from S3 and cache
105+
artifacts = [
106+
s3_object.key
107+
for s3_object in self._bucket.objects.filter(Prefix=prefix).all()
108+
]
109+
index = self._build_index(artifacts)
110+
111+
self._logger.info(f"Fetched {len(artifacts)} artifacts from S3")
112+
113+
# Save cache
114+
try:
115+
with open(cache_file, "w") as f:
116+
json.dump(artifacts, f)
117+
with open(index_file, "w") as f:
118+
json.dump(index, f)
119+
self._logger.debug("Saved object index to cache")
120+
except IOError:
121+
self._logger.warning("Failed to save cache files")
122+
123+
return {"index": index, "artifacts": artifacts}
124+
125+
def _build_index(self, objects: list[str]) -> dict[str, list[str]]:
126+
"""
127+
Build an index of objects for faster searching.
128+
129+
:param objects: List of object keys
130+
:returns: Dictionary index with simple objects list
131+
:since: 0.9.0
132+
"""
133+
134+
cnames = {
135+
obj.split("/")[1]
136+
for obj in objects
137+
if obj.startswith("objects/") and len(obj.split("/")) >= 3
138+
}
139+
self._logger.debug(f"Built index with {len(cnames)} unique objects")
140+
return {"objects": sorted(cnames)}

0 commit comments

Comments
 (0)