|
4 | 4 | including registration, deployment, versioning, and manifest operations. |
5 | 5 | """ |
6 | 6 |
|
7 | | -from typing import TYPE_CHECKING, Optional |
| 7 | +from typing import TYPE_CHECKING, Dict, Optional |
| 8 | + |
| 9 | +from amp.utils.manifest_inspector import describe_manifest, print_schema |
8 | 10 |
|
9 | 11 | from . import models |
10 | 12 |
|
@@ -198,6 +200,77 @@ def get_manifest(self, namespace: str, name: str, revision: str) -> dict: |
198 | 200 | response = self._admin._request('GET', path) |
199 | 201 | return response.json() |
200 | 202 |
|
| 203 | + def describe(self, namespace: str, name: str, revision: str = 'latest') -> Dict[str, list[Dict[str, str | bool]]]: |
| 204 | + """Get a structured summary of tables and columns in a dataset. |
| 205 | +
|
| 206 | + Returns a dictionary mapping table names to lists of column information, |
| 207 | + making it easy to programmatically inspect the dataset schema. |
| 208 | +
|
| 209 | + Args: |
| 210 | + namespace: Dataset namespace |
| 211 | + name: Dataset name |
| 212 | + revision: Version tag (default: 'latest') |
| 213 | +
|
| 214 | + Returns: |
| 215 | + dict: Mapping of table names to column information. Each column is a dict with: |
| 216 | + - name: Column name (str) |
| 217 | + - type: Arrow type (str, simplified representation) |
| 218 | + - nullable: Whether the column allows NULL values (bool) |
| 219 | +
|
| 220 | + Example: |
| 221 | + >>> client = AdminClient('http://localhost:8080') |
| 222 | + >>> schema = client.datasets.describe('_', 'eth_firehose', 'latest') |
| 223 | + >>> for table_name, columns in schema.items(): |
| 224 | + ... print(f"\\nTable: {table_name}") |
| 225 | + ... for col in columns: |
| 226 | + ... nullable = "NULL" if col['nullable'] else "NOT NULL" |
| 227 | + ... print(f" {col['name']}: {col['type']} {nullable}") |
| 228 | + """ |
| 229 | + manifest = self.get_manifest(namespace, name, revision) |
| 230 | + return describe_manifest(manifest) |
| 231 | + |
| 232 | + def inspect(self, namespace: str, name: str, revision: str = 'latest') -> None: |
| 233 | + """Pretty-print the structure of a dataset for easy inspection. |
| 234 | +
|
| 235 | + Displays tables and their columns in a human-readable format. |
| 236 | + This is perfect for exploring datasets interactively. |
| 237 | +
|
| 238 | + Args: |
| 239 | + namespace: Dataset namespace |
| 240 | + name: Dataset name |
| 241 | + revision: Version tag (default: 'latest') |
| 242 | +
|
| 243 | + Example: |
| 244 | + >>> client = AdminClient('http://localhost:8080') |
| 245 | + >>> client.datasets.inspect('_', 'eth_firehose') |
| 246 | + Dataset: _/eth_firehose@latest |
| 247 | +
|
| 248 | + blocks (21 columns) |
| 249 | + block_num UInt64 NOT NULL |
| 250 | + timestamp Timestamp NOT NULL |
| 251 | + hash FixedSizeBinary(32) NOT NULL |
| 252 | + ... |
| 253 | +
|
| 254 | + transactions (24 columns) |
| 255 | + tx_hash FixedSizeBinary(32) NOT NULL |
| 256 | + from FixedSizeBinary(20) NOT NULL |
| 257 | + to FixedSizeBinary(20) NULL |
| 258 | + ... |
| 259 | + """ |
| 260 | + header = f'Dataset: {namespace}/{name}@{revision}' |
| 261 | + |
| 262 | + # Try to get version info for additional context (optional, might not always work) |
| 263 | + try: |
| 264 | + version_info = self.get_version(namespace, name, revision) |
| 265 | + if hasattr(version_info, 'kind'): |
| 266 | + header += f'\nKind: {version_info.kind}' |
| 267 | + except Exception: |
| 268 | + # If we can't get version info, that's okay - just continue |
| 269 | + pass |
| 270 | + |
| 271 | + schema = self.describe(namespace, name, revision) |
| 272 | + print_schema(schema, header=header) |
| 273 | + |
201 | 274 | def delete(self, namespace: str, name: str) -> None: |
202 | 275 | """Delete all versions and metadata for a dataset. |
203 | 276 |
|
|
0 commit comments