Skip to content

Commit c4bb9b3

Browse files
authored
Optimize some per-document hot paths in the geoip processor (#120824)
1 parent a0f1856 commit c4bb9b3

File tree

4 files changed

+46
-8
lines changed

4 files changed

+46
-8
lines changed

docs/changelog/120824.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 120824
2+
summary: Optimize some per-document hot paths in the geoip processor
3+
area: Ingest Node
4+
type: enhancement
5+
issues: []

modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderIT.java

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
import static org.elasticsearch.ingest.geoip.GeoIpTestUtils.copyDefaultDatabases;
7171
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
7272
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse;
73+
import static org.hamcrest.Matchers.allOf;
7374
import static org.hamcrest.Matchers.anEmptyMap;
7475
import static org.hamcrest.Matchers.contains;
7576
import static org.hamcrest.Matchers.containsInAnyOrder;
@@ -172,10 +173,15 @@ public void testInvalidTimestamp() throws Exception {
172173
for (Path geoIpTmpDir : geoIpTmpDirs) {
173174
try (Stream<Path> files = Files.list(geoIpTmpDir)) {
174175
Set<String> names = files.map(f -> f.getFileName().toString()).collect(Collectors.toSet());
175-
assertThat(names, not(hasItem("GeoLite2-ASN.mmdb")));
176-
assertThat(names, not(hasItem("GeoLite2-City.mmdb")));
177-
assertThat(names, not(hasItem("GeoLite2-Country.mmdb")));
178-
assertThat(names, not(hasItem("MyCustomGeoLite2-City.mmdb")));
176+
assertThat(
177+
names,
178+
allOf(
179+
not(hasItem("GeoLite2-ASN.mmdb")),
180+
not(hasItem("GeoLite2-City.mmdb")),
181+
not(hasItem("GeoLite2-Country.mmdb")),
182+
not(hasItem("MyCustomGeoLite2-City.mmdb"))
183+
)
184+
);
179185
}
180186
}
181187
});

modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseReaderLazyLoader.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,10 @@ public class DatabaseReaderLazyLoader implements IpDatabase {
5353
private volatile boolean deleteDatabaseFileOnShutdown;
5454
private final AtomicInteger currentUsages = new AtomicInteger(0);
5555

56+
// it seems insane, especially if you read the code for UnixPath, but calling toString on a path in advance here is faster enough
57+
// than calling it on every call to cache.putIfAbsent that it makes the slight additional internal complication worth it
58+
private final String cachedDatabasePathToString;
59+
5660
DatabaseReaderLazyLoader(GeoIpCache cache, Path databasePath, String md5) {
5761
this.cache = cache;
5862
this.databasePath = Objects.requireNonNull(databasePath);
@@ -61,6 +65,9 @@ public class DatabaseReaderLazyLoader implements IpDatabase {
6165
this.databaseReader = new SetOnce<>();
6266
this.databaseType = new SetOnce<>();
6367
this.buildDate = new SetOnce<>();
68+
69+
// cache the toString on construction
70+
this.cachedDatabasePathToString = databasePath.toString();
6471
}
6572

6673
/**
@@ -99,7 +106,7 @@ int current() {
99106
@Override
100107
@Nullable
101108
public <RESPONSE> RESPONSE getResponse(String ipAddress, CheckedBiFunction<Reader, String, RESPONSE, Exception> responseProvider) {
102-
return cache.putIfAbsent(ipAddress, databasePath.toString(), ip -> {
109+
return cache.putIfAbsent(ipAddress, cachedDatabasePathToString, ip -> {
103110
try {
104111
return responseProvider.apply(get(), ipAddress);
105112
} catch (Exception e) {

modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpTaskState.java

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -206,12 +206,32 @@ public static Metadata fromXContent(XContentParser parser) {
206206
}
207207

208208
public boolean isCloseToExpiration() {
209-
return Instant.ofEpochMilli(lastCheck).isBefore(Instant.now().minus(25, ChronoUnit.DAYS));
209+
final Instant now = Instant.ofEpochMilli(System.currentTimeMillis()); // millisecond precision is sufficient (and faster)
210+
return Instant.ofEpochMilli(lastCheck).isBefore(now.minus(25, ChronoUnit.DAYS));
210211
}
211212

213+
// these constants support the micro optimization below, see that note
214+
private static final TimeValue THIRTY_DAYS = TimeValue.timeValueDays(30);
215+
private static final long THIRTY_DAYS_MILLIS = THIRTY_DAYS.millis();
216+
212217
public boolean isNewEnough(Settings settings) {
213-
TimeValue valid = settings.getAsTime("ingest.geoip.database_validity", TimeValue.timeValueDays(30));
214-
return Instant.ofEpochMilli(lastCheck).isAfter(Instant.now().minus(valid.getMillis(), ChronoUnit.MILLIS));
218+
// micro optimization: this looks a little silly, but the expected case is that database_validity is only used in tests.
219+
// we run this code on every document, though, so the argument checking and other bits that getAsTime does is enough
220+
// to show up in a flame graph.
221+
222+
// if you grep for "ingest.geoip.database_validity" and you'll see that it's not a 'real' setting -- it's only defined in
223+
// AbstractGeoIpIT, that's why it's an inline string constant here and no some static final, and also why it cannot
224+
// be the case that this setting exists in a real running cluster
225+
226+
final long valid;
227+
if (settings.hasValue("ingest.geoip.database_validity")) {
228+
valid = settings.getAsTime("ingest.geoip.database_validity", THIRTY_DAYS).millis();
229+
} else {
230+
valid = THIRTY_DAYS_MILLIS;
231+
}
232+
233+
final Instant now = Instant.ofEpochMilli(System.currentTimeMillis()); // millisecond precision is sufficient (and faster)
234+
return Instant.ofEpochMilli(lastCheck).isAfter(now.minus(valid, ChronoUnit.MILLIS));
215235
}
216236

217237
@Override

0 commit comments

Comments
 (0)