Skip to content

Commit c143caf

Browse files
Make GeoIpProcessor backing database instance pluggable. (elastic#93285)
Introduces two new interfaces: GeoIpDatabase and GeoIpDatabaseProvider. GeoIpDatabaseProvider acts as a generic factory interface for GeoIpDatabase instances. This allows for specifying how database instances are obtained to the processor. GeoIpDatabase encompasses the API footprint for performing GeoIp lookups against a maxmind database. --------- Co-authored-by: Elastic Machine <[email protected]>
1 parent 7f7cf30 commit c143caf

File tree

11 files changed

+306
-147
lines changed

11 files changed

+306
-147
lines changed

docs/changelog/93285.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 93285
2+
summary: Make `GeoIpProcessor` backing database instance pluggable
3+
area: Ingest Node
4+
type: enhancement
5+
issues: []

modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/ReloadingDatabasesWhilePerformingGeoLookupsIT.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,10 @@ public class ReloadingDatabasesWhilePerformingGeoLookupsIT extends ESTestCase {
6464
public void test() throws Exception {
6565
Path geoIpConfigDir = createTempDir();
6666
Path geoIpTmpDir = createTempDir();
67-
DatabaseNodeService databaseNodeService = createRegistry(geoIpConfigDir, geoIpTmpDir);
6867
ClusterService clusterService = mock(ClusterService.class);
6968
when(clusterService.state()).thenReturn(ClusterState.EMPTY_STATE);
70-
GeoIpProcessor.Factory factory = new GeoIpProcessor.Factory(databaseNodeService, clusterService);
69+
DatabaseNodeService databaseNodeService = createRegistry(geoIpConfigDir, geoIpTmpDir, clusterService);
70+
GeoIpProcessor.Factory factory = new GeoIpProcessor.Factory(databaseNodeService);
7171
Files.copy(ConfigDatabases.class.getResourceAsStream("/GeoLite2-City-Test.mmdb"), geoIpTmpDir.resolve("GeoLite2-City.mmdb"));
7272
Files.copy(ConfigDatabases.class.getResourceAsStream("/GeoLite2-City-Test.mmdb"), geoIpTmpDir.resolve("GeoLite2-City-Test.mmdb"));
7373
databaseNodeService.updateDatabase("GeoLite2-City.mmdb", "md5", geoIpTmpDir.resolve("GeoLite2-City.mmdb"));
@@ -190,7 +190,8 @@ public void test() throws Exception {
190190
IOUtils.rm(geoIpConfigDir, geoIpTmpDir);
191191
}
192192

193-
private static DatabaseNodeService createRegistry(Path geoIpConfigDir, Path geoIpTmpDir) throws IOException {
193+
private static DatabaseNodeService createRegistry(Path geoIpConfigDir, Path geoIpTmpDir, ClusterService clusterService)
194+
throws IOException {
194195
GeoIpCache cache = new GeoIpCache(0);
195196
ConfigDatabases configDatabases = new ConfigDatabases(geoIpConfigDir, cache);
196197
copyDatabaseFiles(geoIpConfigDir, configDatabases);
@@ -199,9 +200,10 @@ private static DatabaseNodeService createRegistry(Path geoIpConfigDir, Path geoI
199200
mock(Client.class),
200201
cache,
201202
configDatabases,
202-
Runnable::run
203+
Runnable::run,
204+
clusterService
203205
);
204-
databaseNodeService.initialize("nodeId", mock(ResourceWatcherService.class), mock(IngestService.class), mock(ClusterService.class));
206+
databaseNodeService.initialize("nodeId", mock(ResourceWatcherService.class), mock(IngestService.class));
205207
return databaseNodeService;
206208
}
207209

modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseNodeService.java

Lines changed: 58 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.elasticsearch.cluster.routing.IndexRoutingTable;
2222
import org.elasticsearch.cluster.service.ClusterService;
2323
import org.elasticsearch.common.hash.MessageDigests;
24+
import org.elasticsearch.common.logging.HeaderWarning;
2425
import org.elasticsearch.core.CheckedConsumer;
2526
import org.elasticsearch.core.CheckedRunnable;
2627
import org.elasticsearch.core.IOUtils;
@@ -63,6 +64,7 @@
6364
import java.util.zip.GZIPInputStream;
6465

6566
import static org.elasticsearch.core.Strings.format;
67+
import static org.elasticsearch.persistent.PersistentTasksCustomMetadata.getTaskWithId;
6668

6769
/**
6870
* A component that is responsible for making the databases maintained by {@link GeoIpDownloader}
@@ -77,13 +79,13 @@
7779
* 2) For each database check whether the databases have changed
7880
* by comparing the local and remote md5 hash or are locally missing.
7981
* 3) For each database identified in step 2 start downloading the database
80-
* chunks. Each chunks is appended to a tmp file (inside geoip tmp dir) and
82+
* chunks. Each chunk is appended to a tmp file (inside geoip tmp dir) and
8183
* after all chunks have been downloaded, the database is uncompressed and
8284
* renamed to the final filename.After this the database is loaded and
8385
* if there is an old instance of this database then that is closed.
8486
* 4) Cleanup locally loaded databases that are no longer mentioned in {@link GeoIpTaskState}.
8587
*/
86-
public final class DatabaseNodeService implements Closeable {
88+
public final class DatabaseNodeService implements GeoIpDatabaseProvider, Closeable {
8789

8890
private static final Logger LOGGER = LogManager.getLogger(DatabaseNodeService.class);
8991

@@ -93,34 +95,45 @@ public final class DatabaseNodeService implements Closeable {
9395
private Path geoipTmpDirectory;
9496
private final ConfigDatabases configDatabases;
9597
private final Consumer<Runnable> genericExecutor;
98+
private final ClusterService clusterService;
9699
private IngestService ingestService;
97100

98101
private final ConcurrentMap<String, DatabaseReaderLazyLoader> databases = new ConcurrentHashMap<>();
99102

100-
DatabaseNodeService(Environment environment, Client client, GeoIpCache cache, Consumer<Runnable> genericExecutor) {
103+
DatabaseNodeService(
104+
Environment environment,
105+
Client client,
106+
GeoIpCache cache,
107+
Consumer<Runnable> genericExecutor,
108+
ClusterService clusterService
109+
) {
101110
this(
102111
environment.tmpFile(),
103112
new OriginSettingClient(client, IngestService.INGEST_ORIGIN),
104113
cache,
105114
new ConfigDatabases(environment, cache),
106-
genericExecutor
115+
genericExecutor,
116+
clusterService
107117
);
108118
}
109119

110-
DatabaseNodeService(Path tmpDir, Client client, GeoIpCache cache, ConfigDatabases configDatabases, Consumer<Runnable> genericExecutor) {
120+
DatabaseNodeService(
121+
Path tmpDir,
122+
Client client,
123+
GeoIpCache cache,
124+
ConfigDatabases configDatabases,
125+
Consumer<Runnable> genericExecutor,
126+
ClusterService clusterService
127+
) {
111128
this.client = client;
112129
this.cache = cache;
113130
this.geoipTmpBaseDirectory = tmpDir.resolve("geoip-databases");
114131
this.configDatabases = configDatabases;
115132
this.genericExecutor = genericExecutor;
133+
this.clusterService = clusterService;
116134
}
117135

118-
public void initialize(
119-
String nodeId,
120-
ResourceWatcherService resourceWatcher,
121-
IngestService ingestServiceArg,
122-
ClusterService clusterService
123-
) throws IOException {
136+
public void initialize(String nodeId, ResourceWatcherService resourceWatcher, IngestService ingestServiceArg) throws IOException {
124137
configDatabases.initialize(resourceWatcher);
125138
geoipTmpDirectory = geoipTmpBaseDirectory.resolve(nodeId);
126139
Files.walkFileTree(geoipTmpDirectory, new FileVisitor<>() {
@@ -161,7 +174,35 @@ public FileVisitResult postVisitDirectory(Path dir, IOException exc) {
161174
clusterService.addListener(event -> checkDatabases(event.state()));
162175
}
163176

164-
public DatabaseReaderLazyLoader getDatabase(String name) {
177+
@Override
178+
public Boolean isValid(String databaseFile) {
179+
ClusterState currentState = clusterService.state();
180+
assert currentState != null;
181+
182+
PersistentTasksCustomMetadata.PersistentTask<?> task = getTaskWithId(currentState, GeoIpDownloader.GEOIP_DOWNLOADER);
183+
if (task == null || task.getState() == null) {
184+
return true;
185+
}
186+
GeoIpTaskState state = (GeoIpTaskState) task.getState();
187+
GeoIpTaskState.Metadata metadata = state.getDatabases().get(databaseFile);
188+
// we never remove metadata from cluster state, if metadata is null we deal with built-in database, which is always valid
189+
if (metadata == null) {
190+
return true;
191+
}
192+
193+
boolean valid = metadata.isValid(currentState.metadata().settings());
194+
if (valid && metadata.isCloseToExpiration()) {
195+
HeaderWarning.addWarning(
196+
"database [{}] was not updated for over 25 days, geoip processor" + " will stop working if there is no update for 30 days",
197+
databaseFile
198+
);
199+
}
200+
201+
return valid;
202+
}
203+
204+
// for testing only:
205+
DatabaseReaderLazyLoader getDatabaseReaderLazyLoader(String name) {
165206
// There is a need for reference counting in order to avoid using an instance
166207
// that gets closed while using it. (this can happen during a database update)
167208
while (true) {
@@ -174,6 +215,11 @@ public DatabaseReaderLazyLoader getDatabase(String name) {
174215
}
175216
}
176217

218+
@Override
219+
public GeoIpDatabase getDatabase(String name) {
220+
return getDatabaseReaderLazyLoader(name);
221+
}
222+
177223
List<DatabaseReaderLazyLoader> getAllDatabases() {
178224
List<DatabaseReaderLazyLoader> all = new ArrayList<>(configDatabases.getConfigDatabases().values());
179225
this.databases.forEach((key, value) -> all.add(value));

modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseReaderLazyLoader.java

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
* Facilitates lazy loading of the database reader, so that when the geoip plugin is installed, but not used,
4242
* no memory is being wasted on the database reader.
4343
*/
44-
class DatabaseReaderLazyLoader implements Closeable {
44+
class DatabaseReaderLazyLoader implements GeoIpDatabase, Closeable {
4545

4646
private static final boolean LOAD_DATABASE_ON_HEAP = Booleans.parseBoolean(System.getProperty("es.geoip.load_db_on_heap", "false"));
4747

@@ -81,7 +81,8 @@ class DatabaseReaderLazyLoader implements Closeable {
8181
* @return the database type
8282
* @throws IOException if an I/O exception occurs reading the database type
8383
*/
84-
final String getDatabaseType() throws IOException {
84+
@Override
85+
public final String getDatabaseType() throws IOException {
8586
if (databaseType.get() == null) {
8687
synchronized (databaseType) {
8788
if (databaseType.get() == null) {
@@ -151,25 +152,29 @@ InputStream databaseInputStream() throws IOException {
151152
}
152153

153154
@Nullable
154-
CityResponse getCity(InetAddress ipAddress) {
155+
@Override
156+
public CityResponse getCity(InetAddress ipAddress) {
155157
return getResponse(ipAddress, DatabaseReader::tryCity);
156158
}
157159

158160
@Nullable
159-
CountryResponse getCountry(InetAddress ipAddress) {
161+
@Override
162+
public CountryResponse getCountry(InetAddress ipAddress) {
160163
return getResponse(ipAddress, DatabaseReader::tryCountry);
161164
}
162165

163166
@Nullable
164-
AsnResponse getAsn(InetAddress ipAddress) {
167+
@Override
168+
public AsnResponse getAsn(InetAddress ipAddress) {
165169
return getResponse(ipAddress, DatabaseReader::tryAsn);
166170
}
167171

168172
boolean preLookup() {
169173
return currentUsages.updateAndGet(current -> current < 0 ? current : current + 1) > 0;
170174
}
171175

172-
void postLookup() throws IOException {
176+
@Override
177+
public void release() throws IOException {
173178
if (currentUsages.updateAndGet(current -> current > 0 ? current - 1 : current + 1) == -1) {
174179
doClose();
175180
}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0 and the Server Side Public License, v 1; you may not use this file except
5+
* in compliance with, at your election, the Elastic License 2.0 or the Server
6+
* Side Public License, v 1.
7+
*/
8+
9+
package org.elasticsearch.ingest.geoip;
10+
11+
import com.maxmind.geoip2.model.AsnResponse;
12+
import com.maxmind.geoip2.model.CityResponse;
13+
import com.maxmind.geoip2.model.CountryResponse;
14+
15+
import org.elasticsearch.core.Nullable;
16+
17+
import java.io.IOException;
18+
import java.net.InetAddress;
19+
20+
/**
21+
* Provides a uniform interface for interacting with various GeoIP databases.
22+
*/
23+
public interface GeoIpDatabase {
24+
25+
/**
26+
* @return the database type as it is detailed in the database file metadata
27+
* @throws IOException if the database file could not be read or the data could not be accessed
28+
*/
29+
String getDatabaseType() throws IOException;
30+
31+
/**
32+
* @param ipAddress the IP address to look up
33+
* @return a response containing the city data for the given address if it exists, or <code>null</code> if it could not be found
34+
* @throws UnsupportedOperationException may be thrown if the implementation does not support retrieving city data
35+
*/
36+
@Nullable
37+
CityResponse getCity(InetAddress ipAddress);
38+
39+
/**
40+
* @param ipAddress the IP address to look up
41+
* @return a response containing the country data for the given address if it exists, or <code>null</code> if it could not be found
42+
* @throws UnsupportedOperationException may be thrown if the implementation does not support retrieving country data
43+
*/
44+
@Nullable
45+
CountryResponse getCountry(InetAddress ipAddress);
46+
47+
/**
48+
* @param ipAddress the IP address to look up
49+
* @return a response containing the Autonomous System Number for the given address if it exists, or <code>null</code> if it could not
50+
* be found
51+
* @throws UnsupportedOperationException may be thrown if the implementation does not support retrieving ASN data
52+
*/
53+
@Nullable
54+
AsnResponse getAsn(InetAddress ipAddress);
55+
56+
/**
57+
* Releases the current database object. Called after processing a single document. Databases should be closed or returned to a
58+
* resource pool. No further interactions should be expected.
59+
* @throws IOException if the implementation encounters any problem while cleaning up
60+
*/
61+
void release() throws IOException;
62+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0 and the Server Side Public License, v 1; you may not use this file except
5+
* in compliance with, at your election, the Elastic License 2.0 or the Server
6+
* Side Public License, v 1.
7+
*/
8+
9+
package org.elasticsearch.ingest.geoip;
10+
11+
/**
12+
* Provides construction and initialization logic for {@link GeoIpDatabase} instances.
13+
*/
14+
public interface GeoIpDatabaseProvider {
15+
16+
/**
17+
* Determines if the given database name corresponds to an expired database. Expired databases will not be loaded.
18+
* <br/><br/>
19+
* Verifying database expiration is left to each provider implementation to determine. A return value of <code>false</code> does not
20+
* preclude the possibility of a provider returning <code>true</code> in the future.
21+
*
22+
* @param name the name of the database to provide.
23+
* @return <code>false</code> IFF the requested database file is expired,
24+
* <code>true</code> for all other cases (including unknown file name, file missing, wrong database type, etc).
25+
*/
26+
Boolean isValid(String name);
27+
28+
/**
29+
* @param name the name of the database to provide. Default database names that should always be supported are listed in
30+
* {@link IngestGeoIpPlugin#DEFAULT_DATABASE_FILENAMES}.
31+
* @return a ready-to-use database instance, or <code>null</code> if no database could be loaded.
32+
*/
33+
GeoIpDatabase getDatabase(String name);
34+
}

0 commit comments

Comments
 (0)