|
8 | 8 | */ |
9 | 9 | package org.elasticsearch.ingest.common; |
10 | 10 |
|
| 11 | +import org.elasticsearch.common.network.NetworkAddress; |
11 | 12 | import org.elasticsearch.common.time.DateFormatters; |
12 | 13 | import org.elasticsearch.common.util.set.Sets; |
13 | 14 |
|
| 15 | +import java.net.InetAddress; |
| 16 | +import java.net.UnknownHostException; |
14 | 17 | import java.time.Instant; |
15 | 18 | import java.time.LocalDate; |
16 | 19 | import java.time.ZoneId; |
@@ -62,6 +65,20 @@ final class CefParser { |
62 | 65 | private static final Pattern EXTENSION_NEXT_KEY_VALUE_PATTERN = Pattern.compile( |
63 | 66 | "(" + EXTENSION_KEY_PATTERN + ")=(" + EXTENSION_VALUE_PATTERN + ")(?:\\s+|$)" |
64 | 67 | ); |
| 68 | + |
| 69 | + // Comprehensive regex pattern to match various MAC address formats |
| 70 | + public static final String MAC_ADDRESS_REGEX = "^(" + |
| 71 | + // Combined colon and hyphen separated 6-group patterns |
| 72 | + "(([0-9A-Fa-f]{2}[:|-]){5}[0-9A-Fa-f]{2})|" + |
| 73 | + // Dot-separated 6-group pattern |
| 74 | + "([0-9A-Fa-f]{4}\\.){2}[0-9A-Fa-f]{4}|" + |
| 75 | + // Combined colon and hyphen separated 8-group patterns |
| 76 | + "([0-9A-Fa-f]{2}[:|-]){7}[0-9A-Fa-f]{2}|" + |
| 77 | + // Dot-separated EUI-64 |
| 78 | + "([0-9A-Fa-f]{4}\\.){3}[0-9A-Fa-f]{4}" + ")$"; |
| 79 | + private static final int EUI48_HEX_LENGTH = 48 / 4; |
| 80 | + private static final int EUI64_HEX_LENGTH = 64 / 4; |
| 81 | + private static final int EUI64_HEX_WITH_SEPARATOR_MAX_LENGTH = EUI64_HEX_LENGTH + EUI64_HEX_LENGTH / 2 - 1; |
65 | 82 | private static final Map<String, String> EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = Map.ofEntries( |
66 | 83 | entry("\\\\", "\\"), |
67 | 84 | entry("\\=", "="), |
@@ -146,77 +163,77 @@ final class CefParser { |
146 | 163 | Sets.union(FIELD_MAPPINGS.keySet(), Set.copyOf(FIELD_MAPPINGS.values())) |
147 | 164 | ); |
148 | 165 |
|
149 | | - private static final Map<String, Class<?>> FIELDS = Map.<String, Class<?>>ofEntries( |
150 | | - entry("@timestamp", ZonedDateTime.class), |
151 | | - entry("destination.bytes", Long.class), |
152 | | - entry("destination.domain", String.class), |
153 | | - entry("destination.geo.location.lat", Double.class), |
154 | | - entry("destination.geo.location.lon", Double.class), |
155 | | - entry("destination.ip", String.class), |
156 | | - entry("destination.mac", String.class), |
157 | | - entry("destination.port", Long.class), |
158 | | - entry("destination.process.name", String.class), |
159 | | - entry("destination.process.pid", Long.class), |
160 | | - entry("destination.registered_domain", String.class), |
161 | | - entry("destination.user.group.name", String.class), |
162 | | - entry("destination.user.id", String.class), |
163 | | - entry("destination.user.name", String.class), |
164 | | - entry("device.event_class_id", String.class), |
165 | | - entry("device.product", String.class), |
166 | | - entry("device.vendor", String.class), |
167 | | - entry("device.version", String.class), |
168 | | - entry("event.action", String.class), |
169 | | - entry("event.code", String.class), |
170 | | - entry("event.end", ZonedDateTime.class), |
171 | | - entry("event.id", String.class), |
172 | | - entry("event.ingested", ZonedDateTime.class), |
173 | | - entry("event.outcome", String.class), |
174 | | - entry("event.reason", String.class), |
175 | | - entry("event.start", ZonedDateTime.class), |
176 | | - entry("event.timezone", String.class), |
177 | | - entry("file.created", ZonedDateTime.class), |
178 | | - entry("file.extension", String.class), |
179 | | - entry("file.group", String.class), |
180 | | - entry("file.hash", String.class), |
181 | | - entry("file.inode", String.class), |
182 | | - entry("file.mtime", ZonedDateTime.class), |
183 | | - entry("file.name", String.class), |
184 | | - entry("file.path", String.class), |
185 | | - entry("file.size", Long.class), |
186 | | - entry("host.nat.ip", String.class), |
187 | | - entry("http.request.method", String.class), |
188 | | - entry("http.request.referrer", String.class), |
189 | | - entry("log.syslog.facility.code", Long.class), |
190 | | - entry("message", String.class), |
191 | | - entry("network.direction", String.class), |
192 | | - entry("network.protocol", String.class), |
193 | | - entry("network.transport", String.class), |
194 | | - entry("observer.egress.interface.name", String.class), |
195 | | - entry("observer.hostname", String.class), |
196 | | - entry("observer.ingress.interface.name", String.class), |
197 | | - entry("observer.ip", String.class), |
198 | | - entry("observer.mac", String.class), |
199 | | - entry("observer.name", String.class), |
200 | | - entry("observer.registered_domain", String.class), |
201 | | - entry("observer.version", String.class), |
202 | | - entry("observer.vendor", String.class), |
203 | | - entry("observer.product", String.class), |
204 | | - entry("process.name", String.class), |
205 | | - entry("process.pid", Long.class), |
206 | | - entry("source.bytes", Long.class), |
207 | | - entry("source.domain", String.class), |
208 | | - entry("source.geo.location.lat", Double.class), |
209 | | - entry("source.geo.location.lon", Double.class), |
210 | | - entry("source.ip", String.class), |
211 | | - entry("source.mac", String.class), |
212 | | - entry("source.port", Long.class), |
213 | | - entry("source.process.name", String.class), |
214 | | - entry("source.process.pid", Long.class), |
215 | | - entry("source.registered_domain", String.class), |
216 | | - entry("source.service.name", String.class), |
217 | | - entry("source.user.name", String.class), |
218 | | - entry("url.original", String.class), |
219 | | - entry("user_agent.original", String.class) |
| 166 | + private static final Map<String, DataType> FIELDS = Map.<String, DataType>ofEntries( |
| 167 | + entry("@timestamp", DataType.TimestampType), |
| 168 | + entry("destination.bytes", DataType.LongType), |
| 169 | + entry("destination.domain", DataType.StringType), |
| 170 | + entry("destination.geo.location.lat", DataType.DoubleType), |
| 171 | + entry("destination.geo.location.lon", DataType.DoubleType), |
| 172 | + entry("destination.ip", DataType.IPType), |
| 173 | + entry("destination.mac", DataType.MACAddressType), |
| 174 | + entry("destination.port", DataType.LongType), |
| 175 | + entry("destination.process.name", DataType.StringType), |
| 176 | + entry("destination.process.pid", DataType.LongType), |
| 177 | + entry("destination.registered_domain", DataType.StringType), |
| 178 | + entry("destination.user.group.name", DataType.StringType), |
| 179 | + entry("destination.user.id", DataType.StringType), |
| 180 | + entry("destination.user.name", DataType.StringType), |
| 181 | + entry("device.event_class_id", DataType.StringType), |
| 182 | + entry("device.product", DataType.StringType), |
| 183 | + entry("device.vendor", DataType.StringType), |
| 184 | + entry("device.version", DataType.StringType), |
| 185 | + entry("event.action", DataType.StringType), |
| 186 | + entry("event.code", DataType.StringType), |
| 187 | + entry("event.end", DataType.TimestampType), |
| 188 | + entry("event.id", DataType.StringType), |
| 189 | + entry("event.ingested", DataType.TimestampType), |
| 190 | + entry("event.outcome", DataType.StringType), |
| 191 | + entry("event.reason", DataType.StringType), |
| 192 | + entry("event.start", DataType.TimestampType), |
| 193 | + entry("event.timezone", DataType.StringType), |
| 194 | + entry("file.created", DataType.TimestampType), |
| 195 | + entry("file.extension", DataType.StringType), |
| 196 | + entry("file.group", DataType.StringType), |
| 197 | + entry("file.hash", DataType.StringType), |
| 198 | + entry("file.inode", DataType.StringType), |
| 199 | + entry("file.mtime", DataType.TimestampType), |
| 200 | + entry("file.name", DataType.StringType), |
| 201 | + entry("file.path", DataType.StringType), |
| 202 | + entry("file.size", DataType.LongType), |
| 203 | + entry("host.nat.ip", DataType.IPType), |
| 204 | + entry("http.request.method", DataType.StringType), |
| 205 | + entry("http.request.referrer", DataType.StringType), |
| 206 | + entry("log.syslog.facility.code", DataType.LongType), |
| 207 | + entry("message", DataType.StringType), |
| 208 | + entry("network.direction", DataType.StringType), |
| 209 | + entry("network.protocol", DataType.StringType), |
| 210 | + entry("network.transport", DataType.StringType), |
| 211 | + entry("observer.egress.interface.name", DataType.StringType), |
| 212 | + entry("observer.hostname", DataType.StringType), |
| 213 | + entry("observer.ingress.interface.name", DataType.StringType), |
| 214 | + entry("observer.ip", DataType.IPType), |
| 215 | + entry("observer.mac", DataType.MACAddressType), |
| 216 | + entry("observer.name", DataType.StringType), |
| 217 | + entry("observer.registered_domain", DataType.StringType), |
| 218 | + entry("observer.version", DataType.StringType), |
| 219 | + entry("observer.vendor", DataType.StringType), |
| 220 | + entry("observer.product", DataType.StringType), |
| 221 | + entry("process.name", DataType.StringType), |
| 222 | + entry("process.pid", DataType.LongType), |
| 223 | + entry("source.bytes", DataType.LongType), |
| 224 | + entry("source.domain", DataType.StringType), |
| 225 | + entry("source.geo.location.lat", DataType.DoubleType), |
| 226 | + entry("source.geo.location.lon", DataType.DoubleType), |
| 227 | + entry("source.ip", DataType.IPType), |
| 228 | + entry("source.mac", DataType.MACAddressType), |
| 229 | + entry("source.port", DataType.LongType), |
| 230 | + entry("source.process.name", DataType.StringType), |
| 231 | + entry("source.process.pid", DataType.LongType), |
| 232 | + entry("source.registered_domain", DataType.StringType), |
| 233 | + entry("source.service.name", DataType.StringType), |
| 234 | + entry("source.user.name", DataType.StringType), |
| 235 | + entry("url.original", DataType.StringType), |
| 236 | + entry("user_agent.original", DataType.StringType) |
220 | 237 | ); |
221 | 238 |
|
222 | 239 | private static final Set<String> ERROR_MESSAGE_INCOMPLETE_CEF_HEADER = Set.of("incomplete CEF header"); |
@@ -256,6 +273,18 @@ final class CefParser { |
256 | 273 | MONTH_OF_YEAR |
257 | 274 | ); |
258 | 275 |
|
| 276 | + private enum DataType { |
| 277 | + IntegerType, |
| 278 | + LongType, |
| 279 | + FloatType, |
| 280 | + DoubleType, |
| 281 | + StringType, |
| 282 | + BooleanType, |
| 283 | + IPType, |
| 284 | + MACAddressType, |
| 285 | + TimestampType; |
| 286 | + } |
| 287 | + |
259 | 288 | CEFEvent process(String cefString) { |
260 | 289 | List<String> headers = new ArrayList<>(); |
261 | 290 | Matcher matcher = HEADER_NEXT_FIELD_PATTERN.matcher(cefString); |
@@ -323,7 +352,7 @@ private void processExtensions(String cefString, int extensionStart, CEFEvent ev |
323 | 352 | .stream() |
324 | 353 | .filter(entry -> FIELD_MAPPINGS.containsKey(entry.getKey())) |
325 | 354 | .collect(Collectors.toMap(entry -> FIELD_MAPPINGS.get(entry.getKey()), entry -> { |
326 | | - Class<?> fieldType = FIELDS.get(FIELD_MAPPINGS.get(entry.getKey())); |
| 355 | + DataType fieldType = FIELDS.get(FIELD_MAPPINGS.get(entry.getKey())); |
327 | 356 | return convertValueToType(entry.getValue(), fieldType); |
328 | 357 | })); |
329 | 358 | // Add ECS translations to the root of the document |
@@ -361,17 +390,21 @@ private static Map<String, String> parseExtensions(String extensionString) { |
361 | 390 | return extensions; |
362 | 391 | } |
363 | 392 |
|
364 | | - private Object convertValueToType(String value, Class<?> type) { |
365 | | - if (type == String.class) { |
| 393 | + private Object convertValueToType(String value, DataType type) { |
| 394 | + if (type == DataType.StringType) { |
366 | 395 | return value; |
367 | | - } else if (type == Long.class) { |
| 396 | + } else if (type == DataType.LongType) { |
368 | 397 | return Long.parseLong(value); |
369 | | - } else if (type == Double.class) { |
| 398 | + } else if (type == DataType.DoubleType) { |
370 | 399 | return Double.parseDouble(value); |
371 | | - } else if (type == Integer.class) { |
| 400 | + } else if (type == DataType.IntegerType) { |
372 | 401 | return Integer.parseInt(value); |
373 | | - } else if (type == ZonedDateTime.class) { |
| 402 | + } else if (type == DataType.TimestampType) { |
374 | 403 | return toTimestamp(value); |
| 404 | + } else if (type == DataType.MACAddressType) { |
| 405 | + return toMACAddress(value); |
| 406 | + } else if (type == DataType.IPType) { |
| 407 | + return toIP(value); |
375 | 408 | } else { |
376 | 409 | throw new IllegalArgumentException("Unsupported type: " + type); |
377 | 410 | } |
@@ -416,6 +449,50 @@ ZonedDateTime toTimestamp(String value) { |
416 | 449 | throw new IllegalArgumentException("Value is not a valid timestamp: " + value); |
417 | 450 | } |
418 | 451 |
|
| 452 | + String toMACAddress(String v) throws IllegalArgumentException { |
| 453 | + // Insert separators if necessary |
| 454 | + String macWithSeparators = insertMACSeparators(v); |
| 455 | + |
| 456 | + // Validate MAC address format |
| 457 | + // Compiled pattern for efficient matching |
| 458 | + Pattern macAddressPattern = Pattern.compile(MAC_ADDRESS_REGEX); |
| 459 | + Matcher matcher = macAddressPattern.matcher(macWithSeparators); |
| 460 | + if (matcher.matches() == false) { |
| 461 | + throw new IllegalArgumentException("Invalid MAC address format"); |
| 462 | + } |
| 463 | + // Convert to lowercase and return |
| 464 | + return macWithSeparators; |
| 465 | + } |
| 466 | + |
| 467 | + String toIP(String v) { |
| 468 | + InetAddress address; |
| 469 | + try { |
| 470 | + address = InetAddress.getByName(v); |
| 471 | + } catch (UnknownHostException e) { |
| 472 | + throw new IllegalArgumentException("Invalid IP address format"); |
| 473 | + } |
| 474 | + return NetworkAddress.format(address); |
| 475 | + } |
| 476 | + |
| 477 | + static String insertMACSeparators(String v) { |
| 478 | + // Check that the length is correct for a MAC address without separators. |
| 479 | + // And check that there isn't already a separator in the string. |
| 480 | + if ((v.length() != EUI48_HEX_LENGTH && v.length() != EUI64_HEX_LENGTH) |
| 481 | + || v.charAt(2) == ':' |
| 482 | + || v.charAt(2) == '-' |
| 483 | + || v.charAt(4) == '.') { |
| 484 | + return v; |
| 485 | + } |
| 486 | + StringBuilder sb = new StringBuilder(EUI64_HEX_WITH_SEPARATOR_MAX_LENGTH); |
| 487 | + for (int i = 0; i < v.length(); i++) { |
| 488 | + sb.append(v.charAt(i)); |
| 489 | + if (i < v.length() - 1 && i % 2 != 0) { |
| 490 | + sb.append(':'); |
| 491 | + } |
| 492 | + } |
| 493 | + return sb.toString(); |
| 494 | + } |
| 495 | + |
419 | 496 | private static void removeEmptyValue(Map<String, String> map) { |
420 | 497 | map.entrySet().removeIf(entry -> Objects.isNull(entry.getValue()) || entry.getValue().isEmpty()); |
421 | 498 | } |
|
0 commit comments