Skip to content

Commit 6e2c75e

Browse files
authored
feat: duplicate_geography_id, duplicate_geo_json_key and forbidden_geography_id notices (#1953)
1 parent e84a9aa commit 6e2c75e

File tree

10 files changed

+425
-24
lines changed

10 files changed

+425
-24
lines changed
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/*
2+
* Copyright 2025 MobilityData LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.mobilitydata.gtfsvalidator.notice;
17+
18+
import static org.mobilitydata.gtfsvalidator.notice.SeverityLevel.ERROR;
19+
20+
import org.mobilitydata.gtfsvalidator.annotation.GtfsValidationNotice;
21+
22+
/**
23+
* A key in `locations.geojson` is duplicated.
24+
*
25+
* <p>The key must be unique for each feature in the GeoJSON file.
26+
*/
27+
@GtfsValidationNotice(severity = ERROR)
28+
public class DuplicateGeoJsonKeyNotice extends ValidationNotice {
29+
30+
/** The duplicated key. */
31+
private final String featureId;
32+
33+
/** The index of the first feature with the same key. */
34+
private final int firstIndex;
35+
36+
/** The index of the other feature with the same key. */
37+
private final int secondIndex;
38+
39+
public DuplicateGeoJsonKeyNotice(String featureId, int firstIndex, int secondIndex) {
40+
this.featureId = featureId;
41+
this.firstIndex = firstIndex;
42+
this.secondIndex = secondIndex;
43+
}
44+
}

main/src/main/java/org/mobilitydata/gtfsvalidator/table/GeoJsonFileLoader.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ public GtfsGeoJsonFeature extractFeature(
177177
gtfsGeoJsonFeature = new GtfsGeoJsonFeature();
178178
gtfsGeoJsonFeature.setFeatureId(
179179
featureObject.get(GtfsGeoJsonFeature.FEATURE_ID_FIELD_NAME).getAsString());
180+
gtfsGeoJsonFeature.setFeatureIndex(featureIndex);
180181

181182
String type = geometry.get(GtfsGeoJsonFeature.GEOMETRY_TYPE_FIELD_NAME).getAsString();
182183

main/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsGeoJsonFeature.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ public final class GtfsGeoJsonFeature implements GtfsEntity {
2828
private Geometry geometryDefinition; // The geometry of the feature.
2929
private String stopName; // The name of the location as displayed to the riders.
3030
private String stopDesc; // A description of the location.
31+
private int featureIndex;
3132

3233
public GtfsGeoJsonFeature() {}
3334

@@ -45,6 +46,10 @@ public int csvRowNumber() {
4546
return 0;
4647
}
4748

49+
public int featureIndex() {
50+
return featureIndex;
51+
}
52+
4853
@Nonnull
4954
public String featureId() {
5055
return featureId;
@@ -113,6 +118,10 @@ public void setStopDesc(@Nullable String stopDesc) {
113118
this.stopDesc = stopDesc;
114119
}
115120

121+
public void setFeatureIndex(int featureIndex) {
122+
this.featureIndex = featureIndex;
123+
}
124+
116125
/** Builder class for GtfsGeoJsonFeature. */
117126
public static class Builder {
118127
private String featureId;

main/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsGeoJsonFeaturesContainer.java

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import java.util.List;
2121
import java.util.Map;
2222
import java.util.Optional;
23+
import org.mobilitydata.gtfsvalidator.notice.DuplicateGeoJsonKeyNotice;
2324
import org.mobilitydata.gtfsvalidator.notice.NoticeContainer;
2425

2526
/**
@@ -76,14 +77,11 @@ private void setupIndices(NoticeContainer noticeContainer) {
7677
GtfsGeoJsonFeature oldEntity = byLocationIdMap.getOrDefault(newEntity.featureId(), null);
7778
if (oldEntity == null) {
7879
byLocationIdMap.put(newEntity.featureId(), newEntity);
80+
} else {
81+
noticeContainer.addValidationNotice(
82+
new DuplicateGeoJsonKeyNotice(
83+
oldEntity.featureId(), oldEntity.featureIndex(), newEntity.featureIndex()));
7984
}
80-
// TODO: Removed that code until the notice is supported.
81-
// else {
82-
// noticeContainer.addValidationNotice(
83-
// new JsonDuplicateKeyNotice(
84-
// gtfsFilename(), GtfsGeoJsonFeature.FEATURE_ID_FIELD_NAME,
85-
// newEntity.featureId()));
86-
// }
8785
}
8886
}
8987

main/src/main/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidator.java

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222
import org.mobilitydata.gtfsvalidator.table.GtfsStopTime;
2323

2424
/**
25-
* Validates that only one of stop_id, location_group_id or location_id is defined in a given record
26-
* of stop_times.txt
25+
* Validates that only one of `stop_id`, `location_group_id` or `location_id` is defined in a given
26+
* record of stop_times.txt
2727
*
2828
* <p>Generated notice: {@link MissingRequiredFieldNotice}.
2929
*
@@ -51,26 +51,21 @@ public void validate(GtfsStopTime stopTime, NoticeContainer noticeContainer) {
5151
noticeContainer.addValidationNotice(
5252
new MissingRequiredFieldNotice(
5353
GtfsStopTime.FILENAME, stopTime.csvRowNumber(), GtfsStopTime.STOP_ID_FIELD_NAME));
54+
} else if (presenceCount > 1) {
55+
// More than one geography ID is present, but only one is allowed
56+
noticeContainer.addValidationNotice(
57+
new ForbiddenGeographyIdNotice(
58+
stopTime.csvRowNumber(),
59+
stopTime.hasStopId() ? stopTime.stopId() : null,
60+
stopTime.hasLocationGroupId() ? stopTime.locationGroupId() : null,
61+
stopTime.hasLocationId() ? stopTime.locationId() : null));
5462
}
55-
// TODO: Put this back once we are ready to publish this notice.
56-
// else if (presenceCount > 1) {
57-
// // More than one geography ID is present, but only one is allowed
58-
// noticeContainer.addValidationNotice(
59-
// new ForbiddenGeographyIdNotice(
60-
// stopTime.csvRowNumber(),
61-
// stopTime.stopId(),
62-
// stopTime.locationGroupId(),
63-
// stopTime.locationId()));
64-
// }
6563
}
6664

6765
@Override
6866
public boolean shouldCallValidate(ColumnInspector header) {
69-
if (header.hasColumn(GtfsStopTime.STOP_ID_FIELD_NAME)
67+
return header.hasColumn(GtfsStopTime.STOP_ID_FIELD_NAME)
7068
|| header.hasColumn(GtfsStopTime.LOCATION_GROUP_ID_FIELD_NAME)
71-
|| header.hasColumn(GtfsStopTime.LOCATION_ID_FIELD_NAME)) {
72-
return true;
73-
}
74-
return false;
69+
|| header.hasColumn(GtfsStopTime.LOCATION_ID_FIELD_NAME);
7570
}
7671
}
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
/*
2+
* Copyright 2025 MobilityData
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.mobilitydata.gtfsvalidator.validator;
18+
19+
import static org.mobilitydata.gtfsvalidator.annotation.GtfsValidationNotice.SectionRef.FILE_REQUIREMENTS;
20+
import static org.mobilitydata.gtfsvalidator.notice.SeverityLevel.ERROR;
21+
22+
import java.util.*;
23+
import java.util.stream.Collectors;
24+
import java.util.stream.Stream;
25+
import javax.inject.Inject;
26+
import org.mobilitydata.gtfsvalidator.annotation.GtfsValidationNotice;
27+
import org.mobilitydata.gtfsvalidator.annotation.GtfsValidator;
28+
import org.mobilitydata.gtfsvalidator.notice.NoticeContainer;
29+
import org.mobilitydata.gtfsvalidator.notice.ValidationNotice;
30+
import org.mobilitydata.gtfsvalidator.table.*;
31+
32+
/**
33+
* Validates that the feature id from "locations.geojson" is not a duplicate of any stop_id from
34+
* "stops.txt" or location_group_id from "location_group_stops.txt"
35+
*
36+
* <p>Generated notice: {@link DuplicateGeographyIdNotice}.
37+
*/
38+
@GtfsValidator
39+
public class UniqueGeographyIdValidator extends FileValidator {
40+
private final GtfsStopTableContainer stopTable;
41+
private final GtfsLocationGroupsTableContainer locationGroupStopsTable;
42+
private final GtfsGeoJsonFeaturesContainer geoJsonFeatures;
43+
44+
@Inject
45+
UniqueGeographyIdValidator(
46+
GtfsGeoJsonFeaturesContainer geoJsonFeatures,
47+
GtfsStopTableContainer stopTable,
48+
GtfsLocationGroupsTableContainer locationGroupTable) {
49+
this.geoJsonFeatures = geoJsonFeatures;
50+
this.stopTable = stopTable;
51+
this.locationGroupStopsTable = locationGroupTable;
52+
}
53+
54+
@Override
55+
public void validate(NoticeContainer noticeContainer) {
56+
// Collect all ID entries from each file
57+
List<IdEntry> allEntries =
58+
Stream.concat(
59+
geoJsonFeatures.getEntities().stream()
60+
.map(
61+
f ->
62+
new IdEntry(
63+
f.featureId(), GtfsGeoJsonFeature.FILENAME, f.featureIndex())),
64+
Stream.concat(
65+
stopTable.getEntities().stream()
66+
.map(s -> new IdEntry(s.stopId(), GtfsStop.FILENAME, s.csvRowNumber())),
67+
locationGroupStopsTable.getEntities().stream()
68+
.map(
69+
g ->
70+
new IdEntry(
71+
g.locationGroupId(),
72+
GtfsLocationGroupStops.FILENAME,
73+
g.csvRowNumber()))))
74+
.collect(Collectors.toList());
75+
76+
// Group by ID and check for duplicates across files
77+
allEntries.stream()
78+
.collect(Collectors.groupingBy(IdEntry::id))
79+
.forEach(
80+
(id, entries) -> {
81+
if (entries.size() > 1) {
82+
Set<String> uniqueFilenames =
83+
entries.stream().map(IdEntry::filename).collect(Collectors.toSet());
84+
if (uniqueFilenames.size() == 1) return;
85+
noticeContainer.addValidationNotice(
86+
new DuplicateGeographyIdNotice(
87+
id,
88+
getRowNumber(entries, GtfsStop.FILENAME),
89+
getRowNumber(entries, GtfsLocationGroupStops.FILENAME),
90+
getRowNumber(entries, GtfsGeoJsonFeature.FILENAME)));
91+
}
92+
});
93+
}
94+
95+
// Utility method to extract row number by filename
96+
private Integer getRowNumber(List<IdEntry> entries, String filename) {
97+
return entries.stream()
98+
.filter(e -> e.filename().equals(filename))
99+
.map(IdEntry::instanceIndex)
100+
.findFirst()
101+
.orElse(null);
102+
}
103+
104+
// Helper record to hold ID entries
105+
private static class IdEntry {
106+
private final String id;
107+
private final String filename;
108+
private final int instanceIndex;
109+
110+
public IdEntry(String id, String filename, int instanceIndex) {
111+
this.id = id;
112+
this.filename = filename;
113+
this.instanceIndex = instanceIndex;
114+
}
115+
116+
public int instanceIndex() {
117+
return instanceIndex;
118+
}
119+
120+
public String id() {
121+
return id;
122+
}
123+
124+
public String filename() {
125+
return filename;
126+
}
127+
}
128+
129+
/**
130+
* Geography id is duplicated across multiple files.
131+
*
132+
* <p>ID must be unique across all `stops.stop_id`, `locations.geojson` `id`, and
133+
* `location_groups.location_group_id` values.
134+
*/
135+
@GtfsValidationNotice(
136+
severity = ERROR,
137+
files =
138+
@GtfsValidationNotice.FileRefs({
139+
GtfsLocationGroupsSchema.class,
140+
GtfsStopTimeSchema.class,
141+
GtfsLocationGroupsSchema.class
142+
}),
143+
sections = @GtfsValidationNotice.SectionRefs(FILE_REQUIREMENTS))
144+
public static class DuplicateGeographyIdNotice extends ValidationNotice {
145+
146+
/** The geography id that is duplicated. */
147+
private final String geographyId;
148+
149+
/** The csv row number in stops.txt */
150+
private final Integer csvRowNumberA;
151+
152+
/** The csv row number in location_group_stops.txt */
153+
private final Integer csvRowNumberB;
154+
155+
/** The feature index in locations.geojson */
156+
private final Integer featureIndex;
157+
158+
public DuplicateGeographyIdNotice(
159+
String geographyId, Integer csvRowNumberA, Integer csvRowNumberB, Integer featureIndex) {
160+
this.geographyId = geographyId;
161+
this.csvRowNumberA = csvRowNumberA;
162+
this.csvRowNumberB = csvRowNumberB;
163+
this.featureIndex = featureIndex;
164+
}
165+
}
166+
}

0 commit comments

Comments
 (0)