From 29eae1ddbd41784697059b031a1185b3cb6909e0 Mon Sep 17 00:00:00 2001 From: Joe Gallo Date: Mon, 11 Nov 2024 15:45:27 -0500 Subject: [PATCH 1/8] Reword the geoip docs ever so slightly --- .../reference/ingest/processors/geoip.asciidoc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/reference/ingest/processors/geoip.asciidoc b/docs/reference/ingest/processors/geoip.asciidoc index 2eff56f87e826..d89bd9d7657bc 100644 --- a/docs/reference/ingest/processors/geoip.asciidoc +++ b/docs/reference/ingest/processors/geoip.asciidoc @@ -28,7 +28,7 @@ If you would like to have {es} download database files directly from Maxmind usi license key, see <>. If {es} can't connect to the endpoint for 30 days all updated databases will become -invalid. {es} will stop enriching documents with geoip data and will add `tags: ["_geoip_expired_database"]` +invalid. {es} will stop enriching documents with ip geolocation data and will add `tags: ["_geoip_expired_database"]` field instead. [[using-ingest-geoip]] @@ -40,11 +40,11 @@ field instead. |====== | Name | Required | Default | Description | `field` | yes | - | The field to get the IP address from for the geographical lookup. -| `target_field` | no | geoip | The field that will hold the geographical information looked up from the MaxMind database. +| `target_field` | no | geoip | The field that will hold the geographical information looked up from the database. | `database_file` | no | GeoLite2-City.mmdb | The database filename referring to one of the automatically downloaded GeoLite2 databases (GeoLite2-City.mmdb, GeoLite2-Country.mmdb, or GeoLite2-ASN.mmdb), or the name of a supported database file in the `ingest-geoip` config directory, or the name of a <> (with the `.mmdb` suffix appended). -| `properties` | no | [`continent_name`, `country_iso_code`, `country_name`, `region_iso_code`, `region_name`, `city_name`, `location`] * | Controls what properties are added to the `target_field` based on the geoip lookup. +| `properties` | no | [`continent_name`, `country_iso_code`, `country_name`, `region_iso_code`, `region_name`, `city_name`, `location`] * | Controls what properties are added to the `target_field` based on the ip geolocation lookup. | `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document -| `first_only` | no | `true` | If `true` only first found geoip data will be returned, even if `field` contains array +| `first_only` | no | `true` | If `true` only first found ip geolocation data, will be returned, even if `field` contains array | `download_database_on_pipeline_creation` | no | `true` | If `true` (and if `ingest.geoip.downloader.eager.download` is `false`), the missing database is downloaded when the pipeline is created. Else, the download is triggered by when the pipeline is used as the `default_pipeline` or `final_pipeline` in an index. |====== @@ -87,7 +87,7 @@ Here is an example that uses the default city database and adds the geographical -------------------------------------------------- PUT _ingest/pipeline/geoip { - "description" : "Add geoip info", + "description" : "Add ip geolocation info", "processors" : [ { "geoip" : { @@ -138,7 +138,7 @@ this database is downloaded automatically. So this: -------------------------------------------------- PUT _ingest/pipeline/geoip { - "description" : "Add geoip info", + "description" : "Add ip geolocation info", "processors" : [ { "geoip" : { @@ -190,7 +190,7 @@ cannot be found: -------------------------------------------------- PUT _ingest/pipeline/geoip { - "description" : "Add geoip info", + "description" : "Add ip geolocation info", "processors" : [ { "geoip" : { @@ -256,7 +256,7 @@ PUT my_ip_locations -------------------------------------------------- PUT _ingest/pipeline/geoip { - "description" : "Add geoip info", + "description" : "Add ip geolocation info", "processors" : [ { "geoip" : { @@ -458,7 +458,7 @@ each node's <> at `$ES_TMPDIR/geoip-databases/ Date: Mon, 11 Nov 2024 16:04:30 -0500 Subject: [PATCH 2/8] Reference the new APIs rather than the old ones --- docs/reference/ingest/processors/geoip.asciidoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/reference/ingest/processors/geoip.asciidoc b/docs/reference/ingest/processors/geoip.asciidoc index d89bd9d7657bc..b5817d8d636bf 100644 --- a/docs/reference/ingest/processors/geoip.asciidoc +++ b/docs/reference/ingest/processors/geoip.asciidoc @@ -25,7 +25,7 @@ If your cluster can't connect to the Elastic GeoIP endpoint or you want to manage your own updates, see <>. If you would like to have {es} download database files directly from Maxmind using your own provided -license key, see <>. +license key, see <>. If {es} can't connect to the endpoint for 30 days all updated databases will become invalid. {es} will stop enriching documents with ip geolocation data and will add `tags: ["_geoip_expired_database"]` @@ -41,7 +41,7 @@ field instead. | Name | Required | Default | Description | `field` | yes | - | The field to get the IP address from for the geographical lookup. | `target_field` | no | geoip | The field that will hold the geographical information looked up from the database. -| `database_file` | no | GeoLite2-City.mmdb | The database filename referring to one of the automatically downloaded GeoLite2 databases (GeoLite2-City.mmdb, GeoLite2-Country.mmdb, or GeoLite2-ASN.mmdb), or the name of a supported database file in the `ingest-geoip` config directory, or the name of a <> (with the `.mmdb` suffix appended). +| `database_file` | no | GeoLite2-City.mmdb | The database filename referring to one of the automatically downloaded GeoLite2 databases (GeoLite2-City.mmdb, GeoLite2-Country.mmdb, or GeoLite2-ASN.mmdb), or the name of a supported database file in the `ingest-geoip` config directory, or the name of a <> (with the `.mmdb` suffix appended). | `properties` | no | [`continent_name`, `country_iso_code`, `country_name`, `region_iso_code`, `region_name`, `city_name`, `location`] * | Controls what properties are added to the `target_field` based on the ip geolocation lookup. | `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document | `first_only` | no | `true` | If `true` only first found ip geolocation data, will be returned, even if `field` contains array From 02f18d540330d198b1d019cec1f47b5f6aa2d864 Mon Sep 17 00:00:00 2001 From: Joe Gallo Date: Mon, 11 Nov 2024 16:20:35 -0500 Subject: [PATCH 3/8] Some aspects of the system apply to both processors --- docs/reference/ingest/processors/geoip.asciidoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/reference/ingest/processors/geoip.asciidoc b/docs/reference/ingest/processors/geoip.asciidoc index b5817d8d636bf..455b254576809 100644 --- a/docs/reference/ingest/processors/geoip.asciidoc +++ b/docs/reference/ingest/processors/geoip.asciidoc @@ -13,7 +13,7 @@ ASN IP geolocation databases from http://dev.maxmind.com/geoip/geoip2/geolite2/[ CC BY-SA 4.0 license. It automatically downloads these databases if your nodes can connect to `storage.googleapis.com` domain and either: * `ingest.geoip.downloader.eager.download` is set to true -* your cluster has at least one pipeline with a `geoip` processor +* your cluster has at least one pipeline with a `geoip` or `ip_location` processor {es} automatically downloads updates for these databases from the Elastic GeoIP endpoint: @@ -429,7 +429,7 @@ The `geoip` processor supports the following setting: The maximum number of results that should be cached. Defaults to `1000`. -Note that these settings are node settings and apply to all `geoip` processors, i.e. there is one cache for all defined `geoip` processors. +Note that these settings are node settings and apply to all `geoip` and `ip_location` processors, i.e. there is a single cache for all such processors. [[geoip-cluster-settings]] ===== Cluster settings From bac31f99550285afa63eb594e1bf7aae0cd21880 Mon Sep 17 00:00:00 2001 From: Joe Gallo Date: Mon, 11 Nov 2024 15:37:43 -0500 Subject: [PATCH 4/8] Copy the geoip docs to a new name --- .../ingest/processors/ip-location.asciidoc | 468 ++++++++++++++++++ 1 file changed, 468 insertions(+) create mode 100644 docs/reference/ingest/processors/ip-location.asciidoc diff --git a/docs/reference/ingest/processors/ip-location.asciidoc b/docs/reference/ingest/processors/ip-location.asciidoc new file mode 100644 index 0000000000000..455b254576809 --- /dev/null +++ b/docs/reference/ingest/processors/ip-location.asciidoc @@ -0,0 +1,468 @@ +[[geoip-processor]] +=== GeoIP processor +++++ +GeoIP +++++ + +The `geoip` processor adds information about the geographical location of an +IPv4 or IPv6 address. + +[[geoip-automatic-updates]] +By default, the processor uses the GeoLite2 City, GeoLite2 Country, and GeoLite2 +ASN IP geolocation databases from http://dev.maxmind.com/geoip/geoip2/geolite2/[MaxMind], shared under the +CC BY-SA 4.0 license. It automatically downloads these databases if your nodes can connect to `storage.googleapis.com` domain and either: + +* `ingest.geoip.downloader.eager.download` is set to true +* your cluster has at least one pipeline with a `geoip` or `ip_location` processor + +{es} automatically downloads updates for these databases from the Elastic GeoIP +endpoint: +https://geoip.elastic.co/v1/database?elastic_geoip_service_tos=agree[https://geoip.elastic.co/v1/database]. +To get download statistics for these updates, use the <>. + +If your cluster can't connect to the Elastic GeoIP endpoint or you want to +manage your own updates, see <>. + +If you would like to have {es} download database files directly from Maxmind using your own provided +license key, see <>. + +If {es} can't connect to the endpoint for 30 days all updated databases will become +invalid. {es} will stop enriching documents with ip geolocation data and will add `tags: ["_geoip_expired_database"]` +field instead. + +[[using-ingest-geoip]] +==== Using the `geoip` Processor in a Pipeline + +[[ingest-geoip-options]] +.`geoip` options +[options="header"] +|====== +| Name | Required | Default | Description +| `field` | yes | - | The field to get the IP address from for the geographical lookup. +| `target_field` | no | geoip | The field that will hold the geographical information looked up from the database. +| `database_file` | no | GeoLite2-City.mmdb | The database filename referring to one of the automatically downloaded GeoLite2 databases (GeoLite2-City.mmdb, GeoLite2-Country.mmdb, or GeoLite2-ASN.mmdb), or the name of a supported database file in the `ingest-geoip` config directory, or the name of a <> (with the `.mmdb` suffix appended). +| `properties` | no | [`continent_name`, `country_iso_code`, `country_name`, `region_iso_code`, `region_name`, `city_name`, `location`] * | Controls what properties are added to the `target_field` based on the ip geolocation lookup. +| `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document +| `first_only` | no | `true` | If `true` only first found ip geolocation data, will be returned, even if `field` contains array +| `download_database_on_pipeline_creation` | no | `true` | If `true` (and if `ingest.geoip.downloader.eager.download` is `false`), the missing database is downloaded when the pipeline is created. Else, the download is triggered by when the pipeline is used as the `default_pipeline` or `final_pipeline` in an index. +|====== + +*Depends on what is available in `database_file`: + +* If a GeoLite2 City or GeoIP2 City database is used, then the following fields may be added under the `target_field`: `ip`, +`country_iso_code`, `country_name`, `country_in_european_union`, `registered_country_iso_code`, `registered_country_name`, `registered_country_in_european_union`, +`continent_code`, `continent_name`, `region_iso_code`, `region_name`, `city_name`, `postal_code`, `timezone`, +`location`, and `accuracy_radius`. The fields actually added depend on what has been found and which properties were configured in `properties`. +* If a GeoLite2 Country or GeoIP2 Country database is used, then the following fields may be added under the `target_field`: `ip`, +`country_iso_code`, `country_name`, `country_in_european_union`, `registered_country_iso_code`, `registered_country_name`, `registered_country_in_european_union`, +`continent_code`, and `continent_name`. The fields actually added depend on what has been found +and which properties were configured in `properties`. +* If the GeoLite2 ASN database is used, then the following fields may be added under the `target_field`: `ip`, +`asn`, `organization_name` and `network`. The fields actually added depend on what has been found and which properties were configured +in `properties`. +* If the GeoIP2 Anonymous IP database is used, then the following fields may be added under the `target_field`: `ip`, +`hosting_provider`, `tor_exit_node`, `anonymous_vpn`, `anonymous`, `public_proxy`, and `residential_proxy`. The fields actually added +depend on what has been found and which properties were configured in `properties`. +* If the GeoIP2 Connection Type database is used, then the following fields may be added under the `target_field`: `ip`, and +`connection_type`. The fields actually added depend on what has been found and which properties were configured in `properties`. +* If the GeoIP2 Domain database is used, then the following fields may be added under the `target_field`: `ip`, and `domain`. +The fields actually added depend on what has been found and which properties were configured in `properties`. +* If the GeoIP2 ISP database is used, then the following fields may be added under the `target_field`: `ip`, `asn`, +`organization_name`, `network`, `isp`, `isp_organization_name`, `mobile_country_code`, and `mobile_network_code`. The fields actually added +depend on what has been found and which properties were configured in `properties`. +* If the GeoIP2 Enterprise database is used, then the following fields may be added under the `target_field`: `ip`, +`country_iso_code`, `country_name`, `country_in_european_union`, `registered_country_iso_code`, `registered_country_name`, `registered_country_in_european_union`, +`continent_code`, `continent_name`, `region_iso_code`, `region_name`, `city_name`, `postal_code`, `timezone`, +`location`, `accuracy_radius`, `country_confidence`, `city_confidence`, `postal_confidence`, `asn`, `organization_name`, `network`, +`hosting_provider`, `tor_exit_node`, `anonymous_vpn`, `anonymous`, `public_proxy`, +`residential_proxy`, `domain`, `isp`, `isp_organization_name`, `mobile_country_code`, `mobile_network_code`, `user_type`, and +`connection_type`. The fields actually added depend on what has been found and which properties were configured in `properties`. + +preview::["Do not use the GeoIP2 Anonymous IP, GeoIP2 Connection Type, GeoIP2 Domain, GeoIP2 ISP, and GeoIP2 Enterprise databases in production environments. This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features."] + +Here is an example that uses the default city database and adds the geographical information to the `geoip` field based on the `ip` field: + +[source,console] +-------------------------------------------------- +PUT _ingest/pipeline/geoip +{ + "description" : "Add ip geolocation info", + "processors" : [ + { + "geoip" : { + "field" : "ip" + } + } + ] +} +PUT my-index-000001/_doc/my_id?pipeline=geoip +{ + "ip": "89.160.20.128" +} +GET my-index-000001/_doc/my_id +-------------------------------------------------- + +Which returns: + +[source,console-result] +-------------------------------------------------- +{ + "found": true, + "_index": "my-index-000001", + "_id": "my_id", + "_version": 1, + "_seq_no": 55, + "_primary_term": 1, + "_source": { + "ip": "89.160.20.128", + "geoip": { + "continent_name": "Europe", + "country_name": "Sweden", + "country_iso_code": "SE", + "city_name" : "Linköping", + "region_iso_code" : "SE-E", + "region_name" : "Östergötland County", + "location": { "lat": 58.4167, "lon": 15.6167 } + } + } +} +-------------------------------------------------- +// TESTRESPONSE[s/"_seq_no": \d+/"_seq_no" : $body._seq_no/ s/"_primary_term":1/"_primary_term" : $body._primary_term/] + +Here is an example that uses the default country database and adds the +geographical information to the `geo` field based on the `ip` field. Note that +this database is downloaded automatically. So this: + +[source,console] +-------------------------------------------------- +PUT _ingest/pipeline/geoip +{ + "description" : "Add ip geolocation info", + "processors" : [ + { + "geoip" : { + "field" : "ip", + "target_field" : "geo", + "database_file" : "GeoLite2-Country.mmdb" + } + } + ] +} +PUT my-index-000001/_doc/my_id?pipeline=geoip +{ + "ip": "89.160.20.128" +} +GET my-index-000001/_doc/my_id +-------------------------------------------------- + +returns this: + +[source,console-result] +-------------------------------------------------- +{ + "found": true, + "_index": "my-index-000001", + "_id": "my_id", + "_version": 1, + "_seq_no": 65, + "_primary_term": 1, + "_source": { + "ip": "89.160.20.128", + "geo": { + "continent_name": "Europe", + "country_name": "Sweden", + "country_iso_code": "SE" + } + } +} +-------------------------------------------------- +// TESTRESPONSE[s/"_seq_no": \d+/"_seq_no" : $body._seq_no/ s/"_primary_term" : 1/"_primary_term" : $body._primary_term/] + + +Not all IP addresses find geo information from the database, When this +occurs, no `target_field` is inserted into the document. + +Here is an example of what documents will be indexed as when information for "80.231.5.0" +cannot be found: + +[source,console] +-------------------------------------------------- +PUT _ingest/pipeline/geoip +{ + "description" : "Add ip geolocation info", + "processors" : [ + { + "geoip" : { + "field" : "ip" + } + } + ] +} + +PUT my-index-000001/_doc/my_id?pipeline=geoip +{ + "ip": "80.231.5.0" +} + +GET my-index-000001/_doc/my_id +-------------------------------------------------- + +Which returns: + +[source,console-result] +-------------------------------------------------- +{ + "_index" : "my-index-000001", + "_id" : "my_id", + "_version" : 1, + "_seq_no" : 71, + "_primary_term": 1, + "found" : true, + "_source" : { + "ip" : "80.231.5.0" + } +} +-------------------------------------------------- +// TESTRESPONSE[s/"_seq_no" : \d+/"_seq_no" : $body._seq_no/ s/"_primary_term" : 1/"_primary_term" : $body._primary_term/] + +[[ingest-geoip-mappings-note]] +===== Recognizing Location as a Geopoint +Although this processor enriches your document with a `location` field containing +the estimated latitude and longitude of the IP address, this field will not be +indexed as a {ref}/geo-point.html[`geo_point`] type in Elasticsearch without explicitly defining it +as such in the mapping. + +You can use the following mapping for the example index above: + +[source,console] +-------------------------------------------------- +PUT my_ip_locations +{ + "mappings": { + "properties": { + "geoip": { + "properties": { + "location": { "type": "geo_point" } + } + } + } + } +} +-------------------------------------------------- + +//// +[source,console] +-------------------------------------------------- +PUT _ingest/pipeline/geoip +{ + "description" : "Add ip geolocation info", + "processors" : [ + { + "geoip" : { + "field" : "ip" + } + } + ] +} + +PUT my_ip_locations/_doc/1?refresh=true&pipeline=geoip +{ + "ip": "89.160.20.128" +} + +GET /my_ip_locations/_search +{ + "query": { + "bool": { + "must": { + "match_all": {} + }, + "filter": { + "geo_distance": { + "distance": "1m", + "geoip.location": { + "lon": 15.6167, + "lat": 58.4167 + } + } + } + } + } +} +-------------------------------------------------- +// TEST[continued] + +[source,console-result] +-------------------------------------------------- +{ + "took" : 3, + "timed_out" : false, + "_shards" : { + "total" : 1, + "successful" : 1, + "skipped" : 0, + "failed" : 0 + }, + "hits" : { + "total" : { + "value": 1, + "relation": "eq" + }, + "max_score" : 1.0, + "hits" : [ + { + "_index" : "my_ip_locations", + "_id" : "1", + "_score" : 1.0, + "_source" : { + "geoip" : { + "continent_name" : "Europe", + "country_name" : "Sweden", + "country_iso_code" : "SE", + "city_name" : "Linköping", + "region_iso_code" : "SE-E", + "region_name" : "Östergötland County", + "location" : { + "lon" : 15.6167, + "lat" : 58.4167 + } + }, + "ip" : "89.160.20.128" + } + } + ] + } +} +-------------------------------------------------- +// TESTRESPONSE[s/"took" : 3/"took" : $body.took/] +//// + +[[manage-geoip-database-updates]] +==== Manage your own IP geolocation database updates + +If you can't <> your IP geolocation databases +from the Elastic endpoint, you have a few other options: + +* <> +* <> +* <> + +[[use-proxy-geoip-endpoint]] +**Use a proxy endpoint** + +If you can't connect directly to the Elastic GeoIP endpoint, consider setting up +a secure proxy. You can then specify the proxy endpoint URL in the +<> setting +of each node’s `elasticsearch.yml` file. + +In a strict setup the following domains may need to be added to the allowed +domains list: + +* `geoip.elastic.co` +* `storage.googleapis.com` + +[[use-custom-geoip-endpoint]] +**Use a custom endpoint** + +You can create a service that mimics the Elastic GeoIP endpoint. You can then +get automatic updates from this service. + +. Download your `.mmdb` database files from the +http://dev.maxmind.com/geoip/geoip2/geolite2[MaxMind site]. + +. Copy your database files to a single directory. + +. From your {es} directory, run: ++ +[source,sh] +---- +./bin/elasticsearch-geoip -s my/source/dir [-t target/directory] +---- + +. Serve the static database files from your directory. For example, you can use +Docker to serve the files from an nginx server: ++ +[source,sh] +---- +docker run -v my/source/dir:/usr/share/nginx/html:ro nginx +---- + +. Specify the service's endpoint URL in the +<> setting +of each node’s `elasticsearch.yml` file. ++ +By default, {es} checks the endpoint for updates every three days. To use +another polling interval, use the <> to set +<>. + +[[manually-update-geoip-databases]] +**Manually update your IP geolocation databases** + +. Use the <> to set +`ingest.geoip.downloader.enabled` to `false`. This disables automatic updates +that may overwrite your database changes. This also deletes all downloaded +databases. + +. Download your `.mmdb` database files from the +http://dev.maxmind.com/geoip/geoip2/geolite2[MaxMind site]. ++ +You can also use custom city, country, and ASN `.mmdb` files. These files must +be uncompressed. The type (city, country, or ASN) will be pulled from the file +metadata, so the filename does not matter. + +. On {ess} deployments upload database using +a {cloud}/ec-custom-bundles.html[custom bundle]. + +. On self-managed deployments copy the database files to `$ES_CONFIG/ingest-geoip`. + +. In your `geoip` processors, configure the `database_file` parameter to use a +custom database file. + +[[ingest-geoip-settings]] +===== Node Settings + +The `geoip` processor supports the following setting: + +`ingest.geoip.cache_size`:: + + The maximum number of results that should be cached. Defaults to `1000`. + +Note that these settings are node settings and apply to all `geoip` and `ip_location` processors, i.e. there is a single cache for all such processors. + +[[geoip-cluster-settings]] +===== Cluster settings + +[[ingest-geoip-downloader-enabled]] +`ingest.geoip.downloader.enabled`:: +(<>, Boolean) +If `true`, {es} automatically downloads and manages updates for IP geolocation databases +from the `ingest.geoip.downloader.endpoint`. If `false`, {es} does not download +updates and deletes all downloaded databases. Defaults to `true`. + +[[ingest-geoip-downloader-eager-download]] +`ingest.geoip.downloader.eager.download`:: +(<>, Boolean) +If `true`, {es} downloads IP geolocation databases immediately, regardless of whether a +pipeline exists with a geoip processor. If `false`, {es} only begins downloading +the databases if a pipeline with a geoip processor exists or is added. Defaults +to `false`. + +[[ingest-geoip-downloader-endpoint]] +`ingest.geoip.downloader.endpoint`:: +(<>, string) +Endpoint URL used to download updates for IP geolocation databases. For example, `https://myDomain.com/overview.json`. +Defaults to `https://geoip.elastic.co/v1/database`. {es} stores downloaded database files in +each node's <> at `$ES_TMPDIR/geoip-databases/`. +Note that {es} will make a GET request to `${ingest.geoip.downloader.endpoint}?elastic_geoip_service_tos=agree`, +expecting the list of metadata about databases typically found in `overview.json`. + +The downloader uses the JDK's builtin cacerts. If you're using a custom endpoint, add the custom https endpoint cacert(s) to the JDK's truststore. + +[[ingest-geoip-downloader-poll-interval]] +`ingest.geoip.downloader.poll.interval`:: +(<>, <>) +How often {es} checks for IP geolocation database updates at the +`ingest.geoip.downloader.endpoint`. Must be greater than `1d` (one day). Defaults +to `3d` (three days). From 8f16fcaa7ab68feb6f913a7bbcaea5b95429879a Mon Sep 17 00:00:00 2001 From: Joe Gallo Date: Mon, 11 Nov 2024 15:48:05 -0500 Subject: [PATCH 5/8] Add the new docs into the processor list --- docs/reference/ingest/processors.asciidoc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/reference/ingest/processors.asciidoc b/docs/reference/ingest/processors.asciidoc index 8f7cef06d12a0..f4fcc0fc84d0d 100644 --- a/docs/reference/ingest/processors.asciidoc +++ b/docs/reference/ingest/processors.asciidoc @@ -77,7 +77,10 @@ Computes a hash of the document’s content. Converts geo-grid definitions of grid tiles or cells to regular bounding boxes or polygons which describe their shape. <>:: -Adds information about the geographical location of an IPv4 or IPv6 address. +Adds information about the geographical location of an IPv4 or IPv6 address from a Maxmind database. + +<>:: +Adds information about the geographical location of an IPv4 or IPv6 address from an ip geolocation database. <>:: Calculates the network direction given a source IP address, destination IP address, and a list of internal networks. @@ -245,6 +248,7 @@ include::processors/grok.asciidoc[] include::processors/gsub.asciidoc[] include::processors/html_strip.asciidoc[] include::processors/inference.asciidoc[] +include::processors/ip-location.asciidoc[] include::processors/join.asciidoc[] include::processors/json.asciidoc[] include::processors/kv.asciidoc[] From 6ff9430dcd71c17464526cc6d3ed8230a887e56b Mon Sep 17 00:00:00 2001 From: Joe Gallo Date: Mon, 11 Nov 2024 16:00:24 -0500 Subject: [PATCH 6/8] Adjust slugs and code samples --- .../ingest/processors/ip-location.asciidoc | 54 +++++++++---------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/docs/reference/ingest/processors/ip-location.asciidoc b/docs/reference/ingest/processors/ip-location.asciidoc index 455b254576809..155092ebccdbf 100644 --- a/docs/reference/ingest/processors/ip-location.asciidoc +++ b/docs/reference/ingest/processors/ip-location.asciidoc @@ -1,13 +1,13 @@ -[[geoip-processor]] -=== GeoIP processor +[[ip-location-processor]] +=== IP location processor ++++ -GeoIP +IP Location ++++ -The `geoip` processor adds information about the geographical location of an +The `ip_location` processor adds information about the geographical location of an IPv4 or IPv6 address. -[[geoip-automatic-updates]] +[[ip-location-automatic-updates]] By default, the processor uses the GeoLite2 City, GeoLite2 Country, and GeoLite2 ASN IP geolocation databases from http://dev.maxmind.com/geoip/geoip2/geolite2/[MaxMind], shared under the CC BY-SA 4.0 license. It automatically downloads these databases if your nodes can connect to `storage.googleapis.com` domain and either: @@ -28,19 +28,19 @@ If you would like to have {es} download database files directly from Maxmind usi license key, see <>. If {es} can't connect to the endpoint for 30 days all updated databases will become -invalid. {es} will stop enriching documents with ip geolocation data and will add `tags: ["_geoip_expired_database"]` +invalid. {es} will stop enriching documents with ip geolocation data and will add `tags: ["_ip_location_expired_database"]` field instead. -[[using-ingest-geoip]] -==== Using the `geoip` Processor in a Pipeline +[[using-ingest-ip-location]] +==== Using the `ip_location` Processor in a Pipeline -[[ingest-geoip-options]] -.`geoip` options +[[ingest-ip-location-options]] +.`ip-location` options [options="header"] |====== | Name | Required | Default | Description | `field` | yes | - | The field to get the IP address from for the geographical lookup. -| `target_field` | no | geoip | The field that will hold the geographical information looked up from the database. +| `target_field` | no | ip_location | The field that will hold the geographical information looked up from the database. | `database_file` | no | GeoLite2-City.mmdb | The database filename referring to one of the automatically downloaded GeoLite2 databases (GeoLite2-City.mmdb, GeoLite2-Country.mmdb, or GeoLite2-ASN.mmdb), or the name of a supported database file in the `ingest-geoip` config directory, or the name of a <> (with the `.mmdb` suffix appended). | `properties` | no | [`continent_name`, `country_iso_code`, `country_name`, `region_iso_code`, `region_name`, `city_name`, `location`] * | Controls what properties are added to the `target_field` based on the ip geolocation lookup. | `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document @@ -81,22 +81,22 @@ depend on what has been found and which properties were configured in `propertie preview::["Do not use the GeoIP2 Anonymous IP, GeoIP2 Connection Type, GeoIP2 Domain, GeoIP2 ISP, and GeoIP2 Enterprise databases in production environments. This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features."] -Here is an example that uses the default city database and adds the geographical information to the `geoip` field based on the `ip` field: +Here is an example that uses the default city database and adds the geographical information to the `ip_location` field based on the `ip` field: [source,console] -------------------------------------------------- -PUT _ingest/pipeline/geoip +PUT _ingest/pipeline/ip_location { "description" : "Add ip geolocation info", "processors" : [ { - "geoip" : { + "ip_location" : { "field" : "ip" } } ] } -PUT my-index-000001/_doc/my_id?pipeline=geoip +PUT my-index-000001/_doc/my_id?pipeline=ip_location { "ip": "89.160.20.128" } @@ -116,7 +116,7 @@ Which returns: "_primary_term": 1, "_source": { "ip": "89.160.20.128", - "geoip": { + "ip_location": { "continent_name": "Europe", "country_name": "Sweden", "country_iso_code": "SE", @@ -136,12 +136,12 @@ this database is downloaded automatically. So this: [source,console] -------------------------------------------------- -PUT _ingest/pipeline/geoip +PUT _ingest/pipeline/ip_location { "description" : "Add ip geolocation info", "processors" : [ { - "geoip" : { + "ip_location" : { "field" : "ip", "target_field" : "geo", "database_file" : "GeoLite2-Country.mmdb" @@ -149,7 +149,7 @@ PUT _ingest/pipeline/geoip } ] } -PUT my-index-000001/_doc/my_id?pipeline=geoip +PUT my-index-000001/_doc/my_id?pipeline=ip_location { "ip": "89.160.20.128" } @@ -188,19 +188,19 @@ cannot be found: [source,console] -------------------------------------------------- -PUT _ingest/pipeline/geoip +PUT _ingest/pipeline/ip_location { "description" : "Add ip geolocation info", "processors" : [ { - "geoip" : { + "ip_location" : { "field" : "ip" } } ] } -PUT my-index-000001/_doc/my_id?pipeline=geoip +PUT my-index-000001/_doc/my_id?pipeline=ip_location { "ip": "80.231.5.0" } @@ -241,7 +241,7 @@ PUT my_ip_locations { "mappings": { "properties": { - "geoip": { + "ip_location": { "properties": { "location": { "type": "geo_point" } } @@ -254,19 +254,19 @@ PUT my_ip_locations //// [source,console] -------------------------------------------------- -PUT _ingest/pipeline/geoip +PUT _ingest/pipeline/ip_location { "description" : "Add ip geolocation info", "processors" : [ { - "geoip" : { + "ip_location" : { "field" : "ip" } } ] } -PUT my_ip_locations/_doc/1?refresh=true&pipeline=geoip +PUT my_ip_locations/_doc/1?refresh=true&pipeline=ip_location { "ip": "89.160.20.128" } @@ -316,7 +316,7 @@ GET /my_ip_locations/_search "_id" : "1", "_score" : 1.0, "_source" : { - "geoip" : { + "ip_location" : { "continent_name" : "Europe", "country_name" : "Sweden", "country_iso_code" : "SE", From eb2a79564efdfcd8feb7e2f3f2a0a2c74295e37a Mon Sep 17 00:00:00 2001 From: Joe Gallo Date: Mon, 11 Nov 2024 16:49:19 -0500 Subject: [PATCH 7/8] Drop the copied examples, the geoip examples suffice --- .../ingest/processors/ip-location.asciidoc | 241 ------------------ 1 file changed, 241 deletions(-) diff --git a/docs/reference/ingest/processors/ip-location.asciidoc b/docs/reference/ingest/processors/ip-location.asciidoc index 155092ebccdbf..3d5854fcc11d4 100644 --- a/docs/reference/ingest/processors/ip-location.asciidoc +++ b/docs/reference/ingest/processors/ip-location.asciidoc @@ -225,244 +225,3 @@ Which returns: } -------------------------------------------------- // TESTRESPONSE[s/"_seq_no" : \d+/"_seq_no" : $body._seq_no/ s/"_primary_term" : 1/"_primary_term" : $body._primary_term/] - -[[ingest-geoip-mappings-note]] -===== Recognizing Location as a Geopoint -Although this processor enriches your document with a `location` field containing -the estimated latitude and longitude of the IP address, this field will not be -indexed as a {ref}/geo-point.html[`geo_point`] type in Elasticsearch without explicitly defining it -as such in the mapping. - -You can use the following mapping for the example index above: - -[source,console] --------------------------------------------------- -PUT my_ip_locations -{ - "mappings": { - "properties": { - "ip_location": { - "properties": { - "location": { "type": "geo_point" } - } - } - } - } -} --------------------------------------------------- - -//// -[source,console] --------------------------------------------------- -PUT _ingest/pipeline/ip_location -{ - "description" : "Add ip geolocation info", - "processors" : [ - { - "ip_location" : { - "field" : "ip" - } - } - ] -} - -PUT my_ip_locations/_doc/1?refresh=true&pipeline=ip_location -{ - "ip": "89.160.20.128" -} - -GET /my_ip_locations/_search -{ - "query": { - "bool": { - "must": { - "match_all": {} - }, - "filter": { - "geo_distance": { - "distance": "1m", - "geoip.location": { - "lon": 15.6167, - "lat": 58.4167 - } - } - } - } - } -} --------------------------------------------------- -// TEST[continued] - -[source,console-result] --------------------------------------------------- -{ - "took" : 3, - "timed_out" : false, - "_shards" : { - "total" : 1, - "successful" : 1, - "skipped" : 0, - "failed" : 0 - }, - "hits" : { - "total" : { - "value": 1, - "relation": "eq" - }, - "max_score" : 1.0, - "hits" : [ - { - "_index" : "my_ip_locations", - "_id" : "1", - "_score" : 1.0, - "_source" : { - "ip_location" : { - "continent_name" : "Europe", - "country_name" : "Sweden", - "country_iso_code" : "SE", - "city_name" : "Linköping", - "region_iso_code" : "SE-E", - "region_name" : "Östergötland County", - "location" : { - "lon" : 15.6167, - "lat" : 58.4167 - } - }, - "ip" : "89.160.20.128" - } - } - ] - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took" : 3/"took" : $body.took/] -//// - -[[manage-geoip-database-updates]] -==== Manage your own IP geolocation database updates - -If you can't <> your IP geolocation databases -from the Elastic endpoint, you have a few other options: - -* <> -* <> -* <> - -[[use-proxy-geoip-endpoint]] -**Use a proxy endpoint** - -If you can't connect directly to the Elastic GeoIP endpoint, consider setting up -a secure proxy. You can then specify the proxy endpoint URL in the -<> setting -of each node’s `elasticsearch.yml` file. - -In a strict setup the following domains may need to be added to the allowed -domains list: - -* `geoip.elastic.co` -* `storage.googleapis.com` - -[[use-custom-geoip-endpoint]] -**Use a custom endpoint** - -You can create a service that mimics the Elastic GeoIP endpoint. You can then -get automatic updates from this service. - -. Download your `.mmdb` database files from the -http://dev.maxmind.com/geoip/geoip2/geolite2[MaxMind site]. - -. Copy your database files to a single directory. - -. From your {es} directory, run: -+ -[source,sh] ----- -./bin/elasticsearch-geoip -s my/source/dir [-t target/directory] ----- - -. Serve the static database files from your directory. For example, you can use -Docker to serve the files from an nginx server: -+ -[source,sh] ----- -docker run -v my/source/dir:/usr/share/nginx/html:ro nginx ----- - -. Specify the service's endpoint URL in the -<> setting -of each node’s `elasticsearch.yml` file. -+ -By default, {es} checks the endpoint for updates every three days. To use -another polling interval, use the <> to set -<>. - -[[manually-update-geoip-databases]] -**Manually update your IP geolocation databases** - -. Use the <> to set -`ingest.geoip.downloader.enabled` to `false`. This disables automatic updates -that may overwrite your database changes. This also deletes all downloaded -databases. - -. Download your `.mmdb` database files from the -http://dev.maxmind.com/geoip/geoip2/geolite2[MaxMind site]. -+ -You can also use custom city, country, and ASN `.mmdb` files. These files must -be uncompressed. The type (city, country, or ASN) will be pulled from the file -metadata, so the filename does not matter. - -. On {ess} deployments upload database using -a {cloud}/ec-custom-bundles.html[custom bundle]. - -. On self-managed deployments copy the database files to `$ES_CONFIG/ingest-geoip`. - -. In your `geoip` processors, configure the `database_file` parameter to use a -custom database file. - -[[ingest-geoip-settings]] -===== Node Settings - -The `geoip` processor supports the following setting: - -`ingest.geoip.cache_size`:: - - The maximum number of results that should be cached. Defaults to `1000`. - -Note that these settings are node settings and apply to all `geoip` and `ip_location` processors, i.e. there is a single cache for all such processors. - -[[geoip-cluster-settings]] -===== Cluster settings - -[[ingest-geoip-downloader-enabled]] -`ingest.geoip.downloader.enabled`:: -(<>, Boolean) -If `true`, {es} automatically downloads and manages updates for IP geolocation databases -from the `ingest.geoip.downloader.endpoint`. If `false`, {es} does not download -updates and deletes all downloaded databases. Defaults to `true`. - -[[ingest-geoip-downloader-eager-download]] -`ingest.geoip.downloader.eager.download`:: -(<>, Boolean) -If `true`, {es} downloads IP geolocation databases immediately, regardless of whether a -pipeline exists with a geoip processor. If `false`, {es} only begins downloading -the databases if a pipeline with a geoip processor exists or is added. Defaults -to `false`. - -[[ingest-geoip-downloader-endpoint]] -`ingest.geoip.downloader.endpoint`:: -(<>, string) -Endpoint URL used to download updates for IP geolocation databases. For example, `https://myDomain.com/overview.json`. -Defaults to `https://geoip.elastic.co/v1/database`. {es} stores downloaded database files in -each node's <> at `$ES_TMPDIR/geoip-databases/`. -Note that {es} will make a GET request to `${ingest.geoip.downloader.endpoint}?elastic_geoip_service_tos=agree`, -expecting the list of metadata about databases typically found in `overview.json`. - -The downloader uses the JDK's builtin cacerts. If you're using a custom endpoint, add the custom https endpoint cacert(s) to the JDK's truststore. - -[[ingest-geoip-downloader-poll-interval]] -`ingest.geoip.downloader.poll.interval`:: -(<>, <>) -How often {es} checks for IP geolocation database updates at the -`ingest.geoip.downloader.endpoint`. Must be greater than `1d` (one day). Defaults -to `3d` (three days). From 48b171e47ce30e679b40ff46a0646f213116b5ae Mon Sep 17 00:00:00 2001 From: Joe Gallo Date: Mon, 11 Nov 2024 16:53:07 -0500 Subject: [PATCH 8/8] Drop technical preview from these docs --- docs/reference/ingest/processors/geoip.asciidoc | 2 -- docs/reference/ingest/processors/ip-location.asciidoc | 2 -- 2 files changed, 4 deletions(-) diff --git a/docs/reference/ingest/processors/geoip.asciidoc b/docs/reference/ingest/processors/geoip.asciidoc index 455b254576809..78ebe3f5b5ee3 100644 --- a/docs/reference/ingest/processors/geoip.asciidoc +++ b/docs/reference/ingest/processors/geoip.asciidoc @@ -79,8 +79,6 @@ depend on what has been found and which properties were configured in `propertie `residential_proxy`, `domain`, `isp`, `isp_organization_name`, `mobile_country_code`, `mobile_network_code`, `user_type`, and `connection_type`. The fields actually added depend on what has been found and which properties were configured in `properties`. -preview::["Do not use the GeoIP2 Anonymous IP, GeoIP2 Connection Type, GeoIP2 Domain, GeoIP2 ISP, and GeoIP2 Enterprise databases in production environments. This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features."] - Here is an example that uses the default city database and adds the geographical information to the `geoip` field based on the `ip` field: [source,console] diff --git a/docs/reference/ingest/processors/ip-location.asciidoc b/docs/reference/ingest/processors/ip-location.asciidoc index 3d5854fcc11d4..e2ca9dbbe2eb3 100644 --- a/docs/reference/ingest/processors/ip-location.asciidoc +++ b/docs/reference/ingest/processors/ip-location.asciidoc @@ -79,8 +79,6 @@ depend on what has been found and which properties were configured in `propertie `residential_proxy`, `domain`, `isp`, `isp_organization_name`, `mobile_country_code`, `mobile_network_code`, `user_type`, and `connection_type`. The fields actually added depend on what has been found and which properties were configured in `properties`. -preview::["Do not use the GeoIP2 Anonymous IP, GeoIP2 Connection Type, GeoIP2 Domain, GeoIP2 ISP, and GeoIP2 Enterprise databases in production environments. This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features."] - Here is an example that uses the default city database and adds the geographical information to the `ip_location` field based on the `ip` field: [source,console]