|
11 | 11 | import pytest |
12 | 12 | from docutils.core import publish_string |
13 | 13 | from docutils.utils import SystemMessage |
| 14 | +from flow.record import RecordDescriptor |
14 | 15 |
|
15 | 16 | from dissect.target.exceptions import PluginError, UnsupportedPluginError |
16 | 17 | from dissect.target.helpers.descriptor_extensions import UserRecordDescriptorExtension |
@@ -1394,6 +1395,99 @@ def test_exported_plugin_format(descriptor: FunctionDescriptor) -> None: |
1394 | 1395 | ) |
1395 | 1396 |
|
1396 | 1397 |
|
| 1398 | +def test_plugin_record_field_consistency() -> None: |
| 1399 | + """Test if exported plugin functions yielding records do not have conflicting field names and types. |
| 1400 | +
|
| 1401 | + For example, take the following TargetRecordDescriptors for plugin X, Y and Z:: |
| 1402 | +
|
| 1403 | + RecordX = TargetRecordDescriptor("record/x", [("varint", "my_field")]) |
| 1404 | + RecordY = TargetRecordDescriptor("record/y", [("path", "my_field")]) |
| 1405 | + RecordZ = TargetRecordDescriptor("record/y", [("string", "my_field")]) |
| 1406 | +
|
| 1407 | + The ``RecordX`` descriptor will fail in this test, since the field ``my_field`` cannot be of type ``varint`` |
| 1408 | + while also being used as ``string`` (and ``path``). The ``RecordY`` and ``RecordZ`` descriptors do not conflict, |
| 1409 | + since the types ``path`` and ``string`` translate to the same ``wildcard`` type. |
| 1410 | +
|
| 1411 | + Uses ``FIELD_TYPES_MAP`` which is loosely based on flow.record and ElasticSearch field types. |
| 1412 | +
|
| 1413 | + Resources: |
| 1414 | + - https://elastic.co/guide/en/elasticsearch/reference/current/mapping-types.html |
| 1415 | + - https://github.com/fox-it/flow.record/tree/main/flow/record/fieldtypes |
| 1416 | + - https://github.com/JSCU-NL/dissect-elastic |
| 1417 | + """ |
| 1418 | + |
| 1419 | + seen_field_names: set[str] = set() |
| 1420 | + seen_field_types: dict[str, tuple[str | None, RecordDescriptor]] = {} |
| 1421 | + inconsistencies: set[str] = set() |
| 1422 | + |
| 1423 | + FIELD_TYPES_MAP = { |
| 1424 | + # strings |
| 1425 | + "string": "string", |
| 1426 | + "stringlist": "string", |
| 1427 | + "wstring": "string", |
| 1428 | + "path": "string", |
| 1429 | + "uri": "string", |
| 1430 | + "command": "string", |
| 1431 | + "dynamic": "string", |
| 1432 | + # ints |
| 1433 | + "varint": "int", |
| 1434 | + "filesize": "int", |
| 1435 | + "uint32": "int", |
| 1436 | + "uint16": "int", |
| 1437 | + "float": "float", |
| 1438 | + # ip / cidr |
| 1439 | + "net.ipaddress": "ip", |
| 1440 | + "net.ipnetwork": "ip_range", |
| 1441 | + "net.ipinterface": "ip_range", |
| 1442 | + # dates |
| 1443 | + "datetime": "datetime", |
| 1444 | + # other |
| 1445 | + "boolean": "boolean", |
| 1446 | + "bytes": "binary", |
| 1447 | + "digest": "keyword", |
| 1448 | + } |
| 1449 | + |
| 1450 | + for descriptor in find_functions("*", Target(), compatibility=False, show_hidden=True)[0]: |
| 1451 | + # Test if plugin function record fields make sense and do not conflict with other records. |
| 1452 | + if descriptor.output == "record" and hasattr(descriptor, "record"): |
| 1453 | + # Functions can yield a single record or a list of records. |
| 1454 | + records = descriptor.record if isinstance(descriptor.record, list) else [descriptor.record] |
| 1455 | + |
| 1456 | + for record in records: |
| 1457 | + assert isinstance(record, RecordDescriptor), ( |
| 1458 | + f"{record!r} of function {descriptor!r} is not of type RecordDescriptor" |
| 1459 | + ) |
| 1460 | + if record.name != "empty": |
| 1461 | + assert record.fields, f"{record!r} has no fields" |
| 1462 | + |
| 1463 | + for name, field in record.fields.items(): |
| 1464 | + # Make sure field names have the same type when translated. This check does not save multiple field |
| 1465 | + # name and typenames, this is a bare-minumum check only. |
| 1466 | + |
| 1467 | + # We only care about the field type, not if it is a list of that type. |
| 1468 | + field_typename = field.typename.replace("[]", "") |
| 1469 | + |
| 1470 | + assert field_typename in FIELD_TYPES_MAP, ( |
| 1471 | + f"Field type {field_typename} is not mapped in FIELD_TYPES_MAP, please add it manually." |
| 1472 | + ) |
| 1473 | + |
| 1474 | + if name in seen_field_names: |
| 1475 | + seen_typename, seen_record = seen_field_types[name] |
| 1476 | + if FIELD_TYPES_MAP[seen_typename] != FIELD_TYPES_MAP[field_typename]: |
| 1477 | + inconsistencies.add( |
| 1478 | + f"<{record.name} ({field.typename!r}, '{name}')> is duplicate mismatch of <{seen_record.name} ({seen_typename!r}, '{name}')>" # noqa: E501 |
| 1479 | + ) |
| 1480 | + |
| 1481 | + else: |
| 1482 | + seen_field_names.add(name) |
| 1483 | + seen_field_types[name] = (field_typename, record) |
| 1484 | + |
| 1485 | + if inconsistencies: |
| 1486 | + pytest.fail( |
| 1487 | + f"Found {len(inconsistencies)} inconsistencies in RecordDescriptors:\n" + "\n".join(inconsistencies) |
| 1488 | + ) |
| 1489 | + |
| 1490 | + |
1397 | 1491 | def assert_valid_rst(src: str) -> None: |
1398 | 1492 | """Attempts to compile the given string to rst.""" |
1399 | 1493 |
|
|
0 commit comments