Skip to content

Commit b6230ea

Browse files
committed
feat(registry): Add inspect() and describe() methods to datasets client
- describe(namespace, name, version): Returns structured schema dictionary mapping table names to column info (name, type, nullable) - inspect(namespace, name, version): Pretty-prints dataset structure in human-readable format for interactive exploration - Both methods use the shared manifest inspection utilities for consistency
1 parent 8149ea9 commit b6230ea

File tree

1 file changed

+68
-0
lines changed

1 file changed

+68
-0
lines changed

src/amp/registry/datasets.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import logging
66
from typing import TYPE_CHECKING, Any, Dict, Optional
77

8+
from amp.utils.manifest_inspector import describe_manifest, print_schema
9+
810
from . import models
911

1012
if TYPE_CHECKING:
@@ -197,6 +199,72 @@ def get_manifest(self, namespace: str, name: str, version: str) -> dict:
197199
response = self._registry._request('GET', path)
198200
return response.json()
199201

202+
def describe(self, namespace: str, name: str, version: str = 'latest') -> Dict[str, list[Dict[str, str | bool]]]:
203+
"""Get a structured summary of tables and columns in a dataset.
204+
205+
Returns a dictionary mapping table names to lists of column information,
206+
making it easy to programmatically inspect the dataset schema.
207+
208+
Args:
209+
namespace: Dataset namespace
210+
name: Dataset name
211+
version: Version tag (default: 'latest')
212+
213+
Returns:
214+
dict: Mapping of table names to column information. Each column is a dict with:
215+
- name: Column name (str)
216+
- type: Arrow type (str, simplified representation)
217+
- nullable: Whether the column allows NULL values (bool)
218+
219+
Example:
220+
>>> client = RegistryClient()
221+
>>> schema = client.datasets.describe('edgeandnode', 'ethereum-mainnet', 'latest')
222+
>>> for table_name, columns in schema.items():
223+
... print(f"\\nTable: {table_name}")
224+
... for col in columns:
225+
... nullable = "NULL" if col['nullable'] else "NOT NULL"
226+
... print(f" {col['name']}: {col['type']} {nullable}")
227+
"""
228+
manifest = self.get_manifest(namespace, name, version)
229+
return describe_manifest(manifest)
230+
231+
def inspect(self, namespace: str, name: str, version: str = 'latest') -> None:
232+
"""Pretty-print the structure of a dataset for easy inspection.
233+
234+
Displays tables and their columns in a human-readable format.
235+
This is perfect for exploring datasets interactively.
236+
237+
Args:
238+
namespace: Dataset namespace
239+
name: Dataset name
240+
version: Version tag (default: 'latest')
241+
242+
Example:
243+
>>> client = RegistryClient()
244+
>>> client.datasets.inspect('graphops', 'ethereum-mainnet')
245+
Dataset: graphops/ethereum-mainnet@latest
246+
247+
blocks (4 columns)
248+
block_num UInt64 NOT NULL
249+
timestamp Timestamp NOT NULL
250+
hash FixedSizeBinary(32) NOT NULL
251+
parent_hash FixedSizeBinary(32) NOT NULL
252+
253+
transactions (23 columns)
254+
block_num UInt64 NOT NULL
255+
tx_hash FixedSizeBinary(32) NOT NULL
256+
...
257+
"""
258+
# Get dataset info
259+
dataset = self.get(namespace, name)
260+
header = f'Dataset: {namespace}/{name}@{version}'
261+
if dataset.description:
262+
header += f'\nDescription: {dataset.description}'
263+
264+
# Get schema and print
265+
schema = self.describe(namespace, name, version)
266+
print_schema(schema, header=header)
267+
200268
# Write Operations (Require Authentication)
201269

202270
def publish(

0 commit comments

Comments
 (0)