Skip to content

Commit cf5bdc2

Browse files
committed
feat(admin): Add inspect() and describe() methods to datasets client
- describe(namespace, name, revision): Returns structured schema dictionary - inspect(namespace, name, revision): Pretty-prints dataset structure
1 parent b1c3616 commit cf5bdc2

File tree

1 file changed

+74
-1
lines changed

1 file changed

+74
-1
lines changed

src/amp/admin/datasets.py

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
including registration, deployment, versioning, and manifest operations.
55
"""
66

7-
from typing import TYPE_CHECKING, Optional
7+
from typing import TYPE_CHECKING, Dict, Optional
8+
9+
from amp.utils.manifest_inspector import describe_manifest, print_schema
810

911
from . import models
1012

@@ -198,6 +200,77 @@ def get_manifest(self, namespace: str, name: str, revision: str) -> dict:
198200
response = self._admin._request('GET', path)
199201
return response.json()
200202

203+
def describe(self, namespace: str, name: str, revision: str = 'latest') -> Dict[str, list[Dict[str, str | bool]]]:
204+
"""Get a structured summary of tables and columns in a dataset.
205+
206+
Returns a dictionary mapping table names to lists of column information,
207+
making it easy to programmatically inspect the dataset schema.
208+
209+
Args:
210+
namespace: Dataset namespace
211+
name: Dataset name
212+
revision: Version tag (default: 'latest')
213+
214+
Returns:
215+
dict: Mapping of table names to column information. Each column is a dict with:
216+
- name: Column name (str)
217+
- type: Arrow type (str, simplified representation)
218+
- nullable: Whether the column allows NULL values (bool)
219+
220+
Example:
221+
>>> client = AdminClient('http://localhost:8080')
222+
>>> schema = client.datasets.describe('_', 'eth_firehose', 'latest')
223+
>>> for table_name, columns in schema.items():
224+
... print(f"\\nTable: {table_name}")
225+
... for col in columns:
226+
... nullable = "NULL" if col['nullable'] else "NOT NULL"
227+
... print(f" {col['name']}: {col['type']} {nullable}")
228+
"""
229+
manifest = self.get_manifest(namespace, name, revision)
230+
return describe_manifest(manifest)
231+
232+
def inspect(self, namespace: str, name: str, revision: str = 'latest') -> None:
233+
"""Pretty-print the structure of a dataset for easy inspection.
234+
235+
Displays tables and their columns in a human-readable format.
236+
This is perfect for exploring datasets interactively.
237+
238+
Args:
239+
namespace: Dataset namespace
240+
name: Dataset name
241+
revision: Version tag (default: 'latest')
242+
243+
Example:
244+
>>> client = AdminClient('http://localhost:8080')
245+
>>> client.datasets.inspect('_', 'eth_firehose')
246+
Dataset: _/eth_firehose@latest
247+
248+
blocks (21 columns)
249+
block_num UInt64 NOT NULL
250+
timestamp Timestamp NOT NULL
251+
hash FixedSizeBinary(32) NOT NULL
252+
...
253+
254+
transactions (24 columns)
255+
tx_hash FixedSizeBinary(32) NOT NULL
256+
from FixedSizeBinary(20) NOT NULL
257+
to FixedSizeBinary(20) NULL
258+
...
259+
"""
260+
header = f'Dataset: {namespace}/{name}@{revision}'
261+
262+
# Try to get version info for additional context (optional, might not always work)
263+
try:
264+
version_info = self.get_version(namespace, name, revision)
265+
if hasattr(version_info, 'kind'):
266+
header += f'\nKind: {version_info.kind}'
267+
except Exception:
268+
# If we can't get version info, that's okay - just continue
269+
pass
270+
271+
schema = self.describe(namespace, name, revision)
272+
print_schema(schema, header=header)
273+
201274
def delete(self, namespace: str, name: str) -> None:
202275
"""Delete all versions and metadata for a dataset.
203276

0 commit comments

Comments
 (0)