Skip to content

Commit 33192c5

Browse files
committed
Restructuring directories.
1 parent cc0ae88 commit 33192c5

File tree

84 files changed

+1396
-462
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

84 files changed

+1396
-462
lines changed

extraction_methods/core/extraction_method.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ def __init__(self, **kwargs):
2929
# Override with specific processor settings
3030
self._set_attrs(kwargs)
3131

32+
if not hasattr(self, "exists_key"):
33+
self.exists_key = "$"
34+
3235
def _set_attrs(self, conf: dict) -> None:
3336
"""
3437
Set instance attributes
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# encoding: utf-8
22
"""
33
"""
4-
__author__ = "Rhys Evans"
4+
__author__ = "Richard Smith"
55
__date__ = "27 May 2021"
66
__copyright__ = "Copyright 2018 United Kingdom Research and Innovation"
77
__license__ = "BSD - see LICENSE file in top-level package directory"
8-
__contact__ = "rhys.r.evans@stfc.ac.uk"
8+
__contact__ = "richard.d.smith@stfc.ac.uk"
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
# encoding: utf-8
2+
"""
3+
.. _elasticsearch-extract:
4+
5+
Elasticsearch Extract
6+
------------------
7+
"""
8+
__author__ = "Rhys Evans"
9+
__date__ = "24 May 2022"
10+
__copyright__ = "Copyright 2018 United Kingdom Research and Innovation"
11+
__license__ = "BSD - see LICENSE file in top-level package directory"
12+
__contact__ = "[email protected]"
13+
14+
import logging
15+
16+
# Third party imports
17+
from elasticsearch import Elasticsearch
18+
19+
from extraction_methods.core.extraction_method import ExtractionMethod
20+
21+
LOGGER = logging.getLogger(__name__)
22+
23+
24+
class AssetAggregatorExtract(ExtractionMethod):
25+
"""
26+
Description:
27+
Using an ID. Generate a summary of information for higher level entities.
28+
29+
Configuration Options:
30+
- ``index``: Name of the index holding the STAC entities
31+
- ``id_term``: Term used for agregating the STAC entities
32+
- ``session_kwargs``: Session parameters passed to
33+
`elasticsearch.Elasticsearch<https://elasticsearch-py.readthedocs.io/en/7.10.0/api.html>`_
34+
- ``bbox``: list of terms for which their aggregate bbox should be returned.
35+
- ``min``: list of terms for which the minimum of their aggregate should be returned.
36+
- ``max``: list of terms for which the maximum of their aggregate should be returned.
37+
- ``sum``: list of terms for which the sum of their aggregate should be returned.
38+
- ``list``: list of terms for which a list of their aggregage should be returned.
39+
40+
Configuration Example:
41+
42+
.. code-block:: yaml
43+
44+
name: elasticsearch
45+
inputs:
46+
index: ceda-index
47+
id_term: item_id
48+
connection_kwargs:
49+
hosts: ['host1:9200','host2:9200']
50+
bbox:
51+
- bbox
52+
min:
53+
- start_time
54+
max:
55+
- end_time
56+
sum:
57+
- size
58+
list:
59+
- term1
60+
- term2
61+
"""
62+
63+
def __init__(self, **kwargs):
64+
super().__init__(**kwargs)
65+
66+
if not hasattr(self, "list_terms"):
67+
self.list_terms = []
68+
69+
if not hasattr(self, "sum_terms"):
70+
self.sum_terms = []
71+
72+
if not hasattr(self, "avg_terms"):
73+
self.avg_terms = []
74+
75+
if not hasattr(self, "min_terms"):
76+
self.min_terms = []
77+
78+
if not hasattr(self, "max_terms"):
79+
self.max_terms = []
80+
81+
82+
def run(self, body: dict, **kwargs) -> dict:
83+
for index, list_term in enumerate(self.list_terms):
84+
body[list_term["name"]] = []
85+
86+
if not hasattr(list_term, "key"):
87+
list_term["key"] = list_term["name"]
88+
89+
self.list_terms[index] = list_term
90+
91+
for index, sum_term in enumerate(self.sum_terms):
92+
body[sum_term["name"]] = 0
93+
94+
if not hasattr(sum_term, "key"):
95+
sum_term["key"] = sum_term["name"]
96+
97+
self.sum_terms[index] = sum_term
98+
99+
len_sum_terms = index + 1
100+
for index, avg_term in enumerate(self.avg_terms):
101+
body[avg_term["name"]] = 0
102+
103+
if not hasattr(avg_term, "key"):
104+
avg_term["key"] = avg_term["name"]
105+
106+
self.sum_terms.append(avg_term)
107+
self.avg_terms[index] = avg_term
108+
109+
for index, min_term in enumerate(self.min_terms):
110+
if not hasattr(min_term, "key"):
111+
min_term["key"] = min_term["name"]
112+
113+
min_terms[index] = min_term
114+
115+
body[min_term["name"]] = body["assets"].values()[0][min_term["key"]]
116+
117+
for index, max_term in enumerate(self.max_terms):
118+
if not hasattr(max_term, "key"):
119+
max_term["key"] = max_term["name"]
120+
121+
max_terms[index] = max_term
122+
123+
body[max_term["name"]] = body["assets"].values()[0][max_term["key"]]
124+
125+
for asset in body["assets"].values():
126+
for list_term in self.list_terms:
127+
if list_term["key"] in asset:
128+
body[list_term["name"]].append(asset[list_term["key"]])
129+
130+
for sum_term in self.sum_terms:
131+
if sum_term["key"] in asset:
132+
body[sum_term["name"]] += asset[sum_term["key"]]
133+
134+
for avg_term in self.avg_terms:
135+
body[avg_term["name"]] /= len(body["assets"])
136+
137+
for min_term in self.min_terms:
138+
if min_term["key"] in asset and asset[min_term["key"]] < body[min_term["name"]]:
139+
body[min_term["name"]] = asset[min_term["key"]]
140+
141+
for max_term in self.max_terms:
142+
if max_term["key"] in asset and asset[max_term["key"]] < body[max_term["name"]]:
143+
body[max_term["name"]] = asset[max_term["key"]]
144+
145+
return body

extraction_methods/plugins/extraction_methods/basename.py renamed to extraction_methods/plugins/basename.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
# encoding: utf-8
2-
__author__ = "Rhys Evans"
2+
__author__ = "Richard Smith"
33
__date__ = "11 Jun 2021"
44
__copyright__ = "Copyright 2018 United Kingdom Research and Innovation"
55
__license__ = "BSD - see LICENSE file in top-level package directory"
6-
__contact__ = "rhys.r.evans@stfc.ac.uk"
6+
__contact__ = "richard.d.smith@stfc.ac.uk"
77

88
import logging
99
import os

extraction_methods/plugins/extraction_methods/bbox.py renamed to extraction_methods/plugins/bbox.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
__author__ = "Rhys Evans"
1+
__author__ = "Richard Smith"
22
__date__ = "28 May 2021"
33
__copyright__ = "Copyright 2018 United Kingdom Research and Innovation"
44
__license__ = "BSD - see LICENSE file in top-level package directory"
5-
__contact__ = "rhys.r.evans@stfc.ac.uk"
5+
__contact__ = "richard.d.smith@stfc.ac.uk"
66

77

88
import logging
@@ -31,7 +31,7 @@ class BboxExtract(ExtractionMethod):
3131
3232
- method: bbox
3333
inputs:
34-
coordinate_keys:
34+
coordinate_keys:
3535
- west
3636
- south
3737
- east
@@ -41,14 +41,19 @@ class BboxExtract(ExtractionMethod):
4141

4242
def run(self, body: dict, **kwargs):
4343
try:
44+
west = body[self.coordinate_keys[0]]
45+
south = body[self.coordinate_keys[1]]
46+
east = body[self.coordinate_keys[2]]
47+
north = body[self.coordinate_keys[3]]
48+
4449
coordinates = [
4550
[
46-
float(body[self.coordinate_keys[0]]),
47-
float(body[self.coordinate_keys[1]]),
51+
float(west) if west is not None else west,
52+
float(south) if south is not None else south,
4853
],
4954
[
50-
float(body[self.coordinate_keys[2]]),
51-
float(body[self.coordinate_keys[3]]),
55+
float(east) if east is not None else east,
56+
float(north) if north is not None else north,
5257
],
5358
]
5459

@@ -57,6 +62,9 @@ def run(self, body: dict, **kwargs):
5762
"coordinates": coordinates,
5863
}
5964

65+
except TypeError:
66+
LOGGER.warning("Unable to convert bbox.", exc_info=True)
67+
6068
except KeyError:
6169
LOGGER.warning("Unable to convert bbox.", exc_info=True)
6270

extraction_methods/plugins/extraction_methods/boto_stats.py renamed to extraction_methods/plugins/boto_stats.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55
Regex
66
------
77
"""
8-
__author__ = "Rhys Evans"
8+
__author__ = "Richard Smith"
99
__date__ = "27 May 2021"
1010
__copyright__ = "Copyright 2018 United Kingdom Research and Innovation"
1111
__license__ = "BSD - see LICENSE file in top-level package directory"
12-
__contact__ = "rhys.r.evans@stfc.ac.uk"
12+
__contact__ = "richard.d.smith@stfc.ac.uk"
1313

1414

1515
import logging
@@ -24,9 +24,9 @@
2424
from boto3.session import Session as BotoSession
2525
from botocore import UNSIGNED
2626
from botocore.config import Config
27-
from stac_generator.core.utils import Stats
2827

2928
from extraction_methods.core.extraction_method import ExtractionMethod
29+
from extraction_methods.core.utils import Stats
3030

3131
LOGGER = logging.getLogger(__name__)
3232

extraction_methods/plugins/extraction_methods/categories.py renamed to extraction_methods/plugins/categories.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55
Regex
66
------
77
"""
8-
__author__ = "Rhys Evans"
8+
__author__ = "Richard Smith"
99
__date__ = "27 May 2021"
1010
__copyright__ = "Copyright 2018 United Kingdom Research and Innovation"
1111
__license__ = "BSD - see LICENSE file in top-level package directory"
12-
__contact__ = "rhys.r.evans@stfc.ac.uk"
12+
__contact__ = "richard.d.smith@stfc.ac.uk"
1313

1414

1515
# Python imports

extraction_methods/plugins/extraction_methods/ceda_observation.py renamed to extraction_methods/plugins/ceda_observation.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
# encoding: utf-8
2-
__author__ = "Rhys Evans"
2+
__author__ = "Richard Smith"
33
__date__ = "11 Jun 2021"
44
__copyright__ = "Copyright 2018 United Kingdom Research and Innovation"
55
__license__ = "BSD - see LICENSE file in top-level package directory"
6-
__contact__ = "rhys.r.evans@stfc.ac.uk"
6+
__contact__ = "richard.d.smith@stfc.ac.uk"
77

88
import logging
99
from string import Template

extraction_methods/plugins/extraction_methods/ceda_vocabulary.py renamed to extraction_methods/plugins/ceda_vocabulary.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55
Regex
66
------
77
"""
8-
__author__ = "Rhys Evans"
8+
__author__ = "Richard Smith"
99
__date__ = "27 May 2021"
1010
__copyright__ = "Copyright 2018 United Kingdom Research and Innovation"
1111
__license__ = "BSD - see LICENSE file in top-level package directory"
12-
__contact__ = "rhys.r.evans@stfc.ac.uk"
12+
__contact__ = "richard.d.smith@stfc.ac.uk"
1313

1414

1515
# Python imports

0 commit comments

Comments
 (0)