|
| 1 | +"""Unit tests for registry dataset inspection methods.""" |
| 2 | + |
| 3 | +import pytest |
| 4 | + |
| 5 | +from amp.registry.datasets import RegistryDatasetsClient |
| 6 | +from amp.utils.manifest_inspector import format_arrow_type |
| 7 | + |
| 8 | + |
| 9 | +class MockRegistryClient: |
| 10 | + """Mock registry client for testing.""" |
| 11 | + |
| 12 | + def __init__(self, manifest): |
| 13 | + self.manifest = manifest |
| 14 | + |
| 15 | + def _request(self, method, path, params=None): |
| 16 | + """Mock HTTP request.""" |
| 17 | + |
| 18 | + class MockResponse: |
| 19 | + def json(self): |
| 20 | + return manifest |
| 21 | + |
| 22 | + return MockResponse() |
| 23 | + |
| 24 | + |
| 25 | +# Sample manifest for testing |
| 26 | +manifest = { |
| 27 | + 'kind': 'manifest', |
| 28 | + 'dependencies': {}, |
| 29 | + 'tables': { |
| 30 | + 'blocks': { |
| 31 | + 'schema': { |
| 32 | + 'arrow': { |
| 33 | + 'fields': [ |
| 34 | + {'name': 'block_num', 'type': 'UInt64', 'nullable': False}, |
| 35 | + {'name': 'timestamp', 'type': {'Timestamp': ['Nanosecond', '+00:00']}, 'nullable': False}, |
| 36 | + {'name': 'hash', 'type': {'FixedSizeBinary': 32}, 'nullable': False}, |
| 37 | + {'name': 'base_fee_per_gas', 'type': {'Decimal128': [38, 0]}, 'nullable': True}, |
| 38 | + ] |
| 39 | + } |
| 40 | + }, |
| 41 | + }, |
| 42 | + 'transactions': { |
| 43 | + 'schema': { |
| 44 | + 'arrow': { |
| 45 | + 'fields': [ |
| 46 | + {'name': 'tx_hash', 'type': {'FixedSizeBinary': 32}, 'nullable': False}, |
| 47 | + {'name': 'from', 'type': {'FixedSizeBinary': 20}, 'nullable': False}, |
| 48 | + {'name': 'value', 'type': {'Decimal128': [38, 0]}, 'nullable': True}, |
| 49 | + ] |
| 50 | + } |
| 51 | + }, |
| 52 | + }, |
| 53 | + }, |
| 54 | +} |
| 55 | + |
| 56 | + |
| 57 | +@pytest.mark.unit |
| 58 | +class TestDatasetInspection: |
| 59 | + """Test dataset inspection methods.""" |
| 60 | + |
| 61 | + def test_format_arrow_type_primitive(self): |
| 62 | + """Test formatting primitive Arrow types.""" |
| 63 | + assert format_arrow_type('UInt64') == 'UInt64' |
| 64 | + assert format_arrow_type('Binary') == 'Binary' |
| 65 | + assert format_arrow_type('Boolean') == 'Boolean' |
| 66 | + |
| 67 | + def test_format_arrow_type_timestamp(self): |
| 68 | + """Test formatting Timestamp types.""" |
| 69 | + result = format_arrow_type({'Timestamp': ['Nanosecond', '+00:00']}) |
| 70 | + assert result == 'Timestamp(Nanosecond)' |
| 71 | + |
| 72 | + result = format_arrow_type({'Timestamp': ['Microsecond', '+00:00']}) |
| 73 | + assert result == 'Timestamp(Microsecond)' |
| 74 | + |
| 75 | + def test_format_arrow_type_fixed_binary(self): |
| 76 | + """Test formatting FixedSizeBinary types.""" |
| 77 | + result = format_arrow_type({'FixedSizeBinary': 32}) |
| 78 | + assert result == 'FixedSizeBinary(32)' |
| 79 | + |
| 80 | + result = format_arrow_type({'FixedSizeBinary': 20}) |
| 81 | + assert result == 'FixedSizeBinary(20)' |
| 82 | + |
| 83 | + def test_format_arrow_type_decimal(self): |
| 84 | + """Test formatting Decimal128 types.""" |
| 85 | + result = format_arrow_type({'Decimal128': [38, 0]}) |
| 86 | + assert result == 'Decimal128(38,0)' |
| 87 | + |
| 88 | + result = format_arrow_type({'Decimal128': [18, 6]}) |
| 89 | + assert result == 'Decimal128(18,6)' |
| 90 | + |
| 91 | + def test_describe_returns_correct_structure(self): |
| 92 | + """Test that describe returns the expected structure.""" |
| 93 | + # Create mock client with test manifest |
| 94 | + mock_registry = MockRegistryClient(manifest) |
| 95 | + client = RegistryDatasetsClient(mock_registry) |
| 96 | + |
| 97 | + # Mock get_manifest to return our test manifest |
| 98 | + client.get_manifest = lambda ns, name, ver: manifest |
| 99 | + |
| 100 | + # Call describe |
| 101 | + schema = client.describe('test', 'dataset', 'latest') |
| 102 | + |
| 103 | + # Verify structure |
| 104 | + assert 'blocks' in schema |
| 105 | + assert 'transactions' in schema |
| 106 | + |
| 107 | + # Check blocks table |
| 108 | + blocks = schema['blocks'] |
| 109 | + assert len(blocks) == 4 |
| 110 | + assert blocks[0]['name'] == 'block_num' |
| 111 | + assert blocks[0]['type'] == 'UInt64' |
| 112 | + assert blocks[0]['nullable'] is False |
| 113 | + |
| 114 | + # Check formatted complex types |
| 115 | + assert blocks[1]['name'] == 'timestamp' |
| 116 | + assert blocks[1]['type'] == 'Timestamp(Nanosecond)' |
| 117 | + |
| 118 | + assert blocks[2]['name'] == 'hash' |
| 119 | + assert blocks[2]['type'] == 'FixedSizeBinary(32)' |
| 120 | + |
| 121 | + assert blocks[3]['name'] == 'base_fee_per_gas' |
| 122 | + assert blocks[3]['type'] == 'Decimal128(38,0)' |
| 123 | + assert blocks[3]['nullable'] is True |
| 124 | + |
| 125 | + def test_describe_handles_empty_manifest(self): |
| 126 | + """Test that describe handles manifests with no tables.""" |
| 127 | + empty_manifest = {'kind': 'manifest', 'dependencies': {}, 'tables': {}} |
| 128 | + |
| 129 | + mock_registry = MockRegistryClient(empty_manifest) |
| 130 | + client = RegistryDatasetsClient(mock_registry) |
| 131 | + client.get_manifest = lambda ns, name, ver: empty_manifest |
| 132 | + |
| 133 | + schema = client.describe('test', 'dataset', 'latest') |
| 134 | + assert schema == {} |
| 135 | + |
| 136 | + def test_describe_handles_nullable_field(self): |
| 137 | + """Test that describe correctly identifies nullable fields.""" |
| 138 | + mock_registry = MockRegistryClient(manifest) |
| 139 | + client = RegistryDatasetsClient(mock_registry) |
| 140 | + client.get_manifest = lambda ns, name, ver: manifest |
| 141 | + |
| 142 | + schema = client.describe('test', 'dataset', 'latest') |
| 143 | + |
| 144 | + # Check nullable fields |
| 145 | + transactions = schema['transactions'] |
| 146 | + value_field = next(col for col in transactions if col['name'] == 'value') |
| 147 | + assert value_field['nullable'] is True |
| 148 | + |
| 149 | + from_field = next(col for col in transactions if col['name'] == 'from') |
| 150 | + assert from_field['nullable'] is False |
0 commit comments