Skip to content

Commit 7674eee

Browse files
authored
redefine currentness result value calculation (#274)
* redefine currentness indicator
1 parent 12ca6f6 commit 7674eee

File tree

6 files changed

+219
-134
lines changed

6 files changed

+219
-134
lines changed

CHANGELOG.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
11
# Changelog
22

3-
# Current Main
3+
4+
## Current Main
45

56
## Breaking Changes
67

78
- Rename layer `ideal_vgi_infrastructure` to `infrastructure_lines` ([#416])
89

10+
### New Features
11+
12+
- Improve Currentness indicator ([#274])
13+
14+
[#274]: https://github.com/GIScience/ohsome-quality-analyst/pull/274
915
[#416]: https://github.com/GIScience/ohsome-quality-analyst/pull/416
1016

1117

workers/ohsome_quality_analyst/indicators/currentness/indicator.py

Lines changed: 152 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,17 @@ class Currentness(BaseIndicator):
1616
Ratio of all contributions that have been edited since 2008 until the
1717
current day in relation with years without mapping activities in the same
1818
time range
19+
20+
Attributes:
21+
threshold_4 (int): Number of years it should have been since 50%
22+
of the items were last edited to be in the second best class
23+
threshold_3 (int): Number of years it should have been since 50%
24+
of the items were last edited to be in the second best class
25+
threshold_2 (int): Number of years it should have been since 50%
26+
of the items were last edited to be in the third best class
27+
threshold_1 (int): Number of years it should have been since 50%
28+
of the items were last edited to be in the second worst class. If
29+
the result is lower than this threshold, it is assigned to the worst class
1930
"""
2031

2132
def __init__(
@@ -24,161 +35,205 @@ def __init__(
2435
feature: geojson.Feature,
2536
) -> None:
2637
super().__init__(layer=layer, feature=feature)
27-
self.threshold_yellow = 0.6
28-
self.threshold_red = 0.2
38+
self.threshold_4 = 1
39+
self.threshold_3 = 2
40+
self.threshold_2 = 4
41+
self.threshold_1 = 8
2942
self.element_count = None
30-
self.contribution_sum = 0
31-
self.contributions_rel = {}
32-
self.contributions_abs = {}
33-
self.ratio = {}
43+
self.contributions_sum = None
44+
self.contributions_rel = {} # yearly interval
45+
self.contributions_abs = {} # yearly interval
46+
self.start = "2008-01-01"
47+
self.end = None
48+
self.low_contributions_threshold = 0
3449

3550
async def preprocess(self) -> None:
51+
"""Get absolute number of contributions for each year since given start date"""
3652
latest_ohsome_stamp = await ohsome_client.get_latest_ohsome_timestamp()
37-
# time_range for all years since 2008 and curr_year_range for the ongoing year
38-
start = "2008-01-01"
3953
self.end = latest_ohsome_stamp.strftime("%Y-%m-%d")
40-
time_range = "{0}/{1}/{2}".format(start, self.end, "P1Y")
41-
curr_year_start = "{0}-01-01".format(latest_ohsome_stamp.year)
42-
curr_year_range = "{0}/{1}".format(curr_year_start, self.end)
43-
54+
past_years_interval = "{0}/{1}/{2}".format(self.start, self.end, "P1Y")
55+
current_year_interval = "{0}/{1}".format(
56+
"{0}-01-01".format(latest_ohsome_stamp.year),
57+
self.end,
58+
)
59+
# Fetch number of features
4460
response = await ohsome_client.query(self.layer, self.feature)
4561
self.element_count = response["result"][0]["value"]
4662
self.result.timestamp_osm = dateutil.parser.isoparse(
4763
response["result"][0]["timestamp"]
4864
)
49-
response_contributions = await ohsome_client.query(
65+
# Fetch all contributions of past years
66+
contributions_yearly = await ohsome_client.query(
5067
self.layer,
5168
self.feature,
52-
time=time_range,
69+
time=past_years_interval,
5370
count_latest_contributions=True,
5471
)
55-
for year in response_contributions["result"]:
56-
time = dateutil.parser.isoparse(year["fromTimestamp"])
57-
count = year["value"]
58-
self.contributions_abs[time.strftime("%Y")] = count
59-
60-
curr_year_response_contributions = await ohsome_client.query(
72+
# Fetch contributions of current year
73+
contributions_current_year = await ohsome_client.query(
6174
self.layer,
6275
self.feature,
63-
time=curr_year_range,
76+
time=current_year_interval,
6477
count_latest_contributions=True,
6578
)
66-
time = dateutil.parser.isoparse(
67-
curr_year_response_contributions["result"][0]["fromTimestamp"]
79+
# Merge contributions
80+
contributions = (
81+
contributions_yearly["result"] + contributions_current_year["result"]
6882
)
69-
count = curr_year_response_contributions["result"][0]["value"]
70-
self.contributions_abs[time.strftime("%Y")] = count
83+
for contrib in contributions:
84+
time = dateutil.parser.isoparse(contrib["fromTimestamp"])
85+
count = contrib["value"]
86+
self.contributions_abs[time.strftime("%Y")] = count
7187

7288
def calculate(self) -> None:
89+
"""Calculate the years since over 50% of the elements were last edited"""
7390
logging.info(f"Calculation for indicator: {self.metadata.name}")
7491

75-
self.contribution_sum = sum(self.contributions_abs.values())
76-
# It can be that features are counted, but have been deleted since.
92+
# It can be that features have been edited, but have been deleted since.
7793
if self.element_count == 0:
7894
self.result.description = (
7995
"In the area of interest no features "
8096
"matching the filter are present today."
8197
)
8298
return
83-
contributions_share = self.contribution_sum
84-
last_edited_year = ""
85-
# determine the percentage of elements that were last edited in that year
86-
for year in self.contributions_abs:
87-
self.ratio[year] = (contributions_share / self.contribution_sum) * 100
88-
contributions_share -= self.contributions_abs[year]
89-
self.contributions_rel[year] = (
90-
self.contributions_abs[year] / self.contribution_sum
91-
) * 100
92-
if self.contributions_rel[year] != 0:
93-
last_edited_year = year
94-
years_since_last_edit = int(self.result.timestamp_oqt.year) - int(
95-
last_edited_year
96-
)
97-
percentage_contributions = 0
98-
median_year = ""
99-
for year in self.contributions_rel:
100-
percentage_contributions += self.contributions_rel[year]
101-
if percentage_contributions < 50:
102-
continue
99+
100+
# calculate relative number of contributions for each year
101+
self.contributions_sum = sum(self.contributions_abs.values())
102+
contributions_rel = {}
103+
contrib_rel_cum_green = 0
104+
contrib_rel_cum_yellow = 0
105+
contrib_rel_cum_red = 0
106+
for num_of_years, (year, contrib_abs) in enumerate(
107+
reversed(self.contributions_abs.items()),
108+
start=1,
109+
):
110+
contrib_rel = contrib_abs / self.contributions_sum
111+
contributions_rel[year] = contrib_rel
112+
if num_of_years < self.threshold_2:
113+
contrib_rel_cum_green += contrib_rel
114+
elif num_of_years < self.threshold_1:
115+
contrib_rel_cum_yellow += contrib_rel
103116
else:
104-
median_year = year
105-
break
106-
median_diff = int(self.result.timestamp_oqt.year) - int(median_year)
107-
if median_diff <= 1:
108-
param_1 = 1
109-
elif median_diff <= 4:
110-
param_1 = 0.6
111-
else:
112-
param_1 = 0.2
113-
if years_since_last_edit <= 1:
114-
param_2 = 1
115-
elif years_since_last_edit <= 4:
116-
param_2 = 0.6
117-
else:
118-
param_2 = 0.2
119-
self.result.value = (param_1 + param_2) / 2
120-
if median_diff == 0:
121-
median_diff = "this year"
122-
elif median_diff == 1:
123-
median_diff = "in the last year"
124-
else:
125-
median_diff = "in the last {0} years".format(median_diff)
117+
contrib_rel_cum_red += contrib_rel
118+
self.contributions_rel = dict(sorted(contributions_rel.items()))
119+
# calculate the year in which 50% of the total edits have been made
120+
self.median_year = get_median_year(self.contributions_rel)
121+
# years since last edit has been made
122+
self.result.value = int(self.result.timestamp_oqt.year) - self.median_year
123+
126124
self.result.description = Template(self.metadata.result_description).substitute(
127-
years=median_diff,
125+
years=self.result.value,
128126
layer_name=self.layer.name,
129127
end=self.end,
130-
elements=self.contribution_sum,
128+
elements=int(self.contributions_sum),
129+
green=round(contrib_rel_cum_green * 100, 2),
130+
yellow=round(contrib_rel_cum_yellow * 100, 2),
131+
red=round(contrib_rel_cum_red * 100, 2),
132+
median_years=self.result.value,
133+
threshold_green=self.threshold_2 - 1,
134+
threshold_yellow_start=self.threshold_2,
135+
threshold_yellow_end=self.threshold_1 - 1,
136+
threshold_red=self.threshold_1,
131137
)
132-
133-
if self.result.value >= self.threshold_yellow:
134-
self.result.class_ = 5
138+
if self.result.value > self.threshold_1:
139+
self.result.class_ = 1
135140
self.result.description = (
136-
self.result.description + self.metadata.label_description["green"]
141+
self.result.description + self.metadata.label_description["red"]
137142
)
138-
elif self.result.value >= self.threshold_red:
139-
self.result.class_ = 3
143+
elif self.threshold_1 >= self.result.value > self.threshold_3:
144+
self.result.class_ = 2
140145
self.result.description = (
141146
self.result.description + self.metadata.label_description["yellow"]
142147
)
143-
elif self.result.value < self.threshold_red:
144-
self.result.class_ = 1
148+
elif self.threshold_3 >= self.result.value:
149+
self.result.class_ = 5
145150
self.result.description = (
146-
self.result.description + self.metadata.label_description["red"]
151+
self.result.description + self.metadata.label_description["green"]
147152
)
148153
else:
149154
raise ValueError("Ratio has an unexpected value.")
155+
last_edited_year = get_last_edited_year(self.contributions_abs)
156+
self.years_since_last_edit = (
157+
int(self.result.timestamp_oqt.year) - last_edited_year
158+
)
159+
if last_edited_year != self.result.timestamp_oqt.year:
160+
self.result.description += (
161+
"Attention: There was no mapping activity after "
162+
+ "{0} in this region.".format(last_edited_year)
163+
)
164+
if self.contributions_sum < self.low_contributions_threshold:
165+
self.result.description += (
166+
"Attention: In this region there are very few contributions "
167+
+ "({0}) with the given tags ".format(self.contributions_sum)
168+
)
150169

151170
def create_figure(self) -> None:
152171
"""Create a plot.
153172
154173
Shows the percentage of contributions for each year.
155174
"""
156-
if self.element_count == 0:
175+
if self.result.label == "undefined":
176+
logging.info("Result is undefined. Skipping figure creation.")
157177
return
158178
px = 1 / plt.rcParams["figure.dpi"] # Pixel in inches
159179
figsize = (400 * px, 400 * px)
160-
fig = plt.figure(figsize=figsize)
180+
fig = plt.figure(figsize=figsize, tight_layout=True)
161181
ax = fig.add_subplot()
162-
x = list(self.ratio.keys())
163-
ax.plot(
164-
x,
165-
self.ratio.values(),
166-
color="b",
167-
label="Percentage of contributions (cumulative)",
182+
patches = ax.bar(
183+
self.contributions_rel.keys(),
184+
height=[v * 100 for v in self.contributions_rel.values()],
185+
edgecolor="black",
168186
)
169-
ax.bar(
170-
list(self.contributions_rel.keys()),
171-
self.contributions_rel.values(),
172-
color=self.result.label,
173-
label="Percentage of contributions (year) ",
187+
year_range = len(self.contributions_rel)
188+
last_edited_year = get_last_edited_year(self.contributions_abs)
189+
years_since_last_edit = int(self.result.timestamp_oqt.year) - last_edited_year
190+
for patch in patches:
191+
if year_range <= years_since_last_edit:
192+
ax.text(
193+
patch.get_x(),
194+
max(self.contributions_rel.values()) * 100 / 2,
195+
"!",
196+
fontdict={"fontsize": 26},
197+
)
198+
if year_range >= self.threshold_1:
199+
patch.set_facecolor("red")
200+
year_range -= 1
201+
elif year_range >= self.threshold_2:
202+
patch.set_facecolor("yellow")
203+
year_range -= 1
204+
else:
205+
patch.set_facecolor("green")
206+
year_range -= 1
207+
plt.axvline(
208+
x=str(self.median_year),
209+
linestyle=":",
210+
color="black",
211+
label="Median Year: {0}".format(self.median_year),
174212
)
175-
ax.set_xticks(x[::2])
213+
plt.xticks(list(self.contributions_rel.keys())[::2])
214+
plt.xlabel("Year")
176215
plt.ylabel("Percentage of contributions")
177-
plt.title("Total Contributions: %i" % self.contribution_sum)
216+
plt.title("Total Contributions: %i" % self.contributions_sum)
178217
ax.legend(loc="lower center", bbox_to_anchor=(0.5, -0.45))
179218
fig.subplots_adjust(bottom=0.3)
180219
fig.tight_layout()
181220
img_data = StringIO()
182-
plt.savefig(img_data, format="svg")
183-
self.result.svg = img_data.getvalue() # this is svg data
221+
plt.savefig(img_data, format="svg", bbox_inches="tight")
222+
self.result.svg = img_data.getvalue()
184223
plt.close("all")
224+
225+
226+
def get_last_edited_year(contributions: dict) -> int:
227+
"""Get the year in which the last edit has been made"""
228+
for year, contrib in dict(reversed(sorted(contributions.items()))).items():
229+
if contrib != 0:
230+
return int(year)
231+
232+
233+
def get_median_year(contributions: dict) -> int:
234+
"""Get the year in which 50% of the total edits have been made since first edit"""
235+
contrib_rel_cum = 0
236+
for year, contrib in dict(sorted(contributions.items())).items():
237+
contrib_rel_cum += contrib
238+
if contrib_rel_cum >= 0.5:
239+
return int(year)

workers/ohsome_quality_analyst/indicators/currentness/metadata.yaml

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,26 +3,23 @@ Currentness:
33
name: Currentness
44
description: |
55
Ratio of all contributions that have been edited since 2008 until the current day in relation with years without mapping activities in the same
6-
time range.
6+
time range.
77
Refers to data quality in respect to currentness.
88
label_description:
99
red: |
10-
More than half of the $elements edited elements were last edited over 4 years ago.
11-
Be aware that there it is very likely that many map features are outdated.
12-
You should carefully check this before using the data as it indicates bad
13-
data quality in respect to currentness.
10+
It is likely that many features are outdated.
1411
yellow: |
15-
More than half of the edited elements were last edited between 1 and 4 years ago.
16-
This refers to medium data quality in respect to currentness.
12+
It is likely that some features are up-to-date and some features are outdated.
1713
green: |
18-
This is a rather high value and indicates that the map features
19-
are very unlike to be outdated. This refers to good data quality in
20-
respect to currentness.
14+
It is likely that most features are up-to-date.
2115
undefined: |
2216
The quality level could not be calculated for this indicator.
2317
This is most likely due to the fact that no features have been mapped
2418
for this area of interest. Refer to other indicators that rely on
2519
an extrinsic comparison to identify if this means that data quality is
2620
bad or if there is just nothing to map here.
2721
result_description: |
28-
Over 50% of the $elements features ($layer_name) were edited $years.
22+
In the last $threshold_green years $green % of the elements were edited the last time.
23+
In the period from $threshold_yellow_start to $threshold_yellow_end years ago $yellow % of the elements were edited the last time.
24+
The remaining $red % were last edited more than $threshold_red years ago.
25+
The median currentness of the $elements features ($layer_name) is $median_years year(s).

workers/tests/integrationtests/api_response_schema.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def get_indicator_properties_template():
2222
"result": {
2323
"timestamp_oqt": str,
2424
"timestamp_osm": Or(str),
25-
"value": Or(float, None),
25+
"value": Or(float, str, int, None),
2626
"label": str,
2727
"description": str,
2828
Opt("svg"): str,

0 commit comments

Comments
 (0)