Skip to content

Commit 710c4e3

Browse files
authored
Update docs to reflect current APIs and reference specific guides (#23)
* docs: Fix old API method references and client property paths - README.md: Fixed 4 API errors (.query() → .sql(), .load() params) - docs/api/admin_api.md: Updated method name and all examples - docs/admin_client_guide.md: Fixed 8+ instances of .query() → .sql() - examples/registry/search_and_deploy.py: Fixed client.admin.* → client.* - docs/registry-guide.md: Fixed client.admin.* references - Updated auth path references (~/.amp-cli-config → ~/.amp/cache/amp_cli_auth) * docs: Enhance Quick Start with auth, registry, and inspection features * docs: Use edgeandnode namespace in docs
1 parent c30a32c commit 710c4e3

File tree

8 files changed

+138
-49
lines changed

8 files changed

+138
-49
lines changed

README.md

Lines changed: 98 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,17 @@
77

88
## Overview
99

10-
Python client for Amp - a high-performance data infrastructure for blockchain data.
10+
Python client for Amp - a database for blockchain data.
1111

1212
**Features:**
1313
- **Query Client**: Issue Flight SQL queries to Amp servers
1414
- **Admin Client**: Manage datasets, deployments, and jobs programmatically
15+
- **Registry Client**: Discover, search, and publish datasets to the Registry
16+
- **Dataset Inspection**: Explore dataset schemas with `inspect()` and `describe()` methods
1517
- **Data Loaders**: Zero-copy loading into PostgreSQL, Redis, Snowflake, Delta Lake, Iceberg, and more
1618
- **Parallel Streaming**: High-throughput parallel data ingestion with automatic resume
1719
- **Manifest Generation**: Fluent API for creating and deploying datasets from SQL queries
20+
- **Auto-Refreshing Auth**: Seamless authentication with automatic token refresh
1821

1922
## Dependencies
2023
1. Rust
@@ -45,7 +48,7 @@ from amp import Client
4548
client = Client(url="grpc://localhost:8815")
4649

4750
# Execute query and convert to pandas
48-
df = client.query("SELECT * FROM eth.blocks LIMIT 10").to_pandas()
51+
df = client.sql("SELECT * FROM eth.blocks LIMIT 10").to_arrow().to_pandas()
4952
print(df)
5053
```
5154

@@ -63,7 +66,7 @@ client = Client(
6366

6467
# Register and deploy a dataset
6568
job = (
66-
client.query("SELECT block_num, hash FROM eth.blocks")
69+
client.sql("SELECT block_num, hash FROM eth.blocks")
6770
.with_dependency('eth', '_/[email protected]')
6871
.register_as('_', 'my_dataset', '1.0.0', 'blocks', 'mainnet')
6972
.deploy(parallelism=4, end_block='latest', wait=True)
@@ -76,12 +79,97 @@ print(f"Deployment completed: {job.status}")
7679

7780
```python
7881
# Load query results into PostgreSQL
79-
loader = client.query("SELECT * FROM eth.blocks").load(
80-
loader_type='postgresql',
82+
result = client.sql("SELECT * FROM eth.blocks").load(
8183
connection='my_pg_connection',
82-
table_name='eth_blocks'
84+
destination='eth_blocks'
8385
)
84-
print(f"Loaded {loader.rows_written} rows")
86+
print(f"Loaded {result.rows_loaded} rows")
87+
```
88+
89+
### Authentication
90+
91+
The client supports three authentication methods (in priority order):
92+
93+
```python
94+
from amp import Client
95+
96+
# 1. Explicit token (highest priority)
97+
client = Client(
98+
url="grpc://localhost:8815",
99+
auth_token="your-token"
100+
)
101+
102+
# 2. Environment variable
103+
# export AMP_AUTH_TOKEN="your-token"
104+
client = Client(url="grpc://localhost:8815")
105+
106+
# 3. Shared auth file (auto-refresh, recommended)
107+
# Uses ~/.amp/cache/amp_cli_auth (shared with TypeScript CLI)
108+
client = Client(
109+
url="grpc://localhost:8815",
110+
auth=True # Automatically refreshes expired tokens
111+
)
112+
```
113+
114+
### Registry - Discovering Datasets
115+
116+
```python
117+
from amp import Client
118+
119+
# Connect with registry support
120+
client = Client(
121+
query_url="grpc://localhost:8815",
122+
registry_url="https://api.registry.amp.staging.thegraph.com",
123+
auth=True
124+
)
125+
126+
# Search for datasets
127+
results = client.registry.datasets.search('ethereum blocks')
128+
for dataset in results.datasets[:5]:
129+
print(f"{dataset.namespace}/{dataset.name} - {dataset.description}")
130+
131+
# Get dataset details
132+
dataset = client.registry.datasets.get('edgeandnode', 'ethereum-mainnet')
133+
print(f"Latest version: {dataset.latest_version}")
134+
135+
# Inspect dataset schema
136+
client.registry.datasets.inspect('edgeandnode', 'ethereum-mainnet')
137+
```
138+
139+
### Dataset Inspection
140+
141+
Explore dataset schemas before querying:
142+
143+
```python
144+
from amp.registry import RegistryClient
145+
146+
client = RegistryClient()
147+
148+
# Pretty-print dataset structure (interactive)
149+
client.datasets.inspect('edgeandnode', 'ethereum-mainnet')
150+
# Output:
151+
# Dataset: edgeandnode/ethereum-mainnet@latest
152+
#
153+
# blocks (21 columns)
154+
# block_num UInt64 NOT NULL
155+
# timestamp Timestamp(Nanosecond) NOT NULL
156+
# hash FixedSizeBinary(32) NOT NULL
157+
# ...
158+
159+
# Get structured schema data (programmatic)
160+
schema = client.datasets.describe('edgeandnode', 'ethereum-mainnet')
161+
162+
# Find tables with specific columns
163+
for table_name, columns in schema.items():
164+
col_names = [col['name'] for col in columns]
165+
if 'block_num' in col_names:
166+
print(f"Table '{table_name}' has block_num column")
167+
168+
# Find all address columns (20-byte binary)
169+
for table_name, columns in schema.items():
170+
addresses = [col['name'] for col in columns if col['type'] == 'FixedSizeBinary(20)']
171+
if addresses:
172+
print(f"{table_name}: {', '.join(addresses)}")
85173
```
86174

87175
## Usage
@@ -108,7 +196,9 @@ uv run apps/execute_query.py
108196

109197
### Getting Started
110198
- **[Admin Client Guide](docs/admin_client_guide.md)** - Complete guide for dataset management and deployment
111-
- **[Admin API Reference](docs/api/admin_api.md)** - Full API documentation for admin operations
199+
- **[Registry Guide](docs/registry-guide.md)** - Discover and search datasets in the Registry
200+
- **[Dataset Inspection](docs/inspecting_datasets.md)** - Explore dataset schemas with `inspect()` and `describe()`
201+
- **[Admin API Reference](docs/api/client_api.md)** - Full API documentation for admin operations
112202

113203
### Features
114204
- **[Parallel Streaming Usage Guide](docs/parallel_streaming_usage.md)** - User guide for high-throughput parallel data loading

docs/admin_client_guide.md

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ client = Client(
140140
)
141141

142142
# Query operations (Flight SQL)
143-
df = client.query("SELECT * FROM eth.blocks LIMIT 10").to_pandas()
143+
df = client.sql("SELECT * FROM eth.blocks LIMIT 10").to_pandas()
144144

145145
# Admin operations (HTTP API)
146146
datasets = client.datasets.list_all()
@@ -170,7 +170,7 @@ The legacy `url` parameter still works for Flight SQL:
170170
```python
171171
# This still works
172172
client = Client(url="grpc://localhost:8815")
173-
client.query("SELECT * FROM eth.blocks")
173+
client.sql("SELECT * FROM eth.blocks")
174174
```
175175

176176
### Environment Variables
@@ -376,7 +376,7 @@ The QueryBuilder provides a fluent API for generating manifests from SQL queries
376376

377377
```python
378378
# Build a query
379-
query = client.query("SELECT block_num, hash FROM eth.blocks")
379+
query = client.sql("SELECT block_num, hash FROM eth.blocks")
380380

381381
# Add dependencies
382382
query = query.with_dependency('eth', '_/[email protected]')
@@ -409,7 +409,7 @@ The most powerful pattern combines query building, manifest generation, registra
409409
```python
410410
# Build, register, and deploy in one chain
411411
job = (
412-
client.query("SELECT block_num, hash FROM eth.blocks")
412+
client.sql("SELECT block_num, hash FROM eth.blocks")
413413
.with_dependency('eth', '_/[email protected]')
414414
.register_as(
415415
namespace='_',
@@ -432,7 +432,7 @@ print(f"Deployment completed: {job.status}")
432432

433433
```python
434434
manifest = (
435-
client.query("""
435+
client.sql("""
436436
SELECT
437437
t.token_address,
438438
t.amount,
@@ -453,8 +453,7 @@ manifest = (
453453

454454
```python
455455
# 1. Develop query locally
456-
# REVIEW: IS THIS CORRECT??
457-
query = client.query("""
456+
query = client.sql("""
458457
SELECT
459458
block_num,
460459
COUNT(*) as tx_count
@@ -506,7 +505,7 @@ if job.status == 'Completed':
506505
```python
507506
# Register production version
508507
context = (
509-
client.query("SELECT * FROM processed_data")
508+
client.sql("SELECT * FROM processed_data")
510509
.with_dependency('raw', '_/[email protected]')
511510
.register_as('_', 'processed_data', '2.0.0', 'data', 'mainnet')
512511
)
@@ -691,7 +690,7 @@ thread.start()
691690
```python
692691
# Always specify full dependency references
693692
query = (
694-
client.query("SELECT * FROM base.data")
693+
client.sql("SELECT * FROM base.data")
695694
.with_dependency('base', '_/[email protected]') # Include version!
696695
)
697696

@@ -700,6 +699,6 @@ query = (
700699

701700
## Next Steps
702701

703-
- See [API Reference](api/admin_api.md) for complete API documentation
702+
- See [API Reference](api/client_api.md) for complete API documentation
704703
- Check [examples/admin/](../examples/admin/) for more code samples
705704
- Review the [Admin API OpenAPI spec](../specs/admin.spec.json) for endpoint details
Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Admin API Reference
1+
# Client API Reference
22

33
Complete API reference for the Amp Admin Client.
44

@@ -90,7 +90,7 @@ Access the SchemaClient for schema operations.
9090

9191
#### Methods
9292

93-
##### `query(sql: str) -> QueryBuilder`
93+
##### `sql(sql: str) -> QueryBuilder`
9494

9595
Create a QueryBuilder for the given SQL query.
9696

@@ -103,7 +103,7 @@ Create a QueryBuilder for the given SQL query.
103103
**Example:**
104104

105105
```python
106-
qb = client.query("SELECT * FROM eth.blocks LIMIT 10")
106+
qb = client.sql("SELECT * FROM eth.blocks LIMIT 10")
107107
df = qb.to_pandas()
108108
```
109109

@@ -832,7 +832,7 @@ with_dependency(alias: str, reference: str) -> QueryBuilder
832832

833833
```python
834834
qb = (
835-
client.query("SELECT * FROM eth.blocks")
835+
client.sql("SELECT * FROM eth.blocks")
836836
.with_dependency('eth', '_/[email protected]')
837837
)
838838
```
@@ -856,7 +856,7 @@ to_manifest(table_name: str, network: str = 'mainnet') -> dict
856856

857857
```python
858858
manifest = (
859-
client.query("SELECT * FROM eth.blocks")
859+
client.sql("SELECT * FROM eth.blocks")
860860
.with_dependency('eth', '_/[email protected]')
861861
.to_manifest('blocks', 'mainnet')
862862
)
@@ -890,7 +890,7 @@ register_as(
890890

891891
```python
892892
job = (
893-
client.query("SELECT * FROM eth.blocks")
893+
client.sql("SELECT * FROM eth.blocks")
894894
.with_dependency('eth', '_/[email protected]')
895895
.register_as('_', 'my_dataset', '1.0.0', 'blocks')
896896
.deploy(parallelism=4, wait=True)
@@ -937,7 +937,7 @@ deploy(
937937

938938
```python
939939
# Deploy and return immediately
940-
context = client.query(...).register_as(...)
940+
context = client.sql(...).register_as(...)
941941
job = context.deploy(parallelism=4)
942942
print(f"Started job {job.id}")
943943

@@ -965,7 +965,7 @@ client = Client(
965965

966966
try:
967967
# Build and test query
968-
query = client.query("""
968+
query = client.sql("""
969969
SELECT block_num, hash, timestamp
970970
FROM eth.blocks
971971
WHERE block_num > 1000000

docs/inspecting_datasets.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,10 +183,10 @@ for ds in results.datasets[:5]:
183183

184184
# Step 2: Inspect a dataset
185185
print("\nInspecting dataset structure:")
186-
registry.datasets.inspect('graphops', 'ethereum-mainnet')
186+
registry.datasets.inspect('edgeandnode', 'ethereum-mainnet')
187187

188188
# Step 3: Get schema programmatically
189-
schema = registry.datasets.describe('graphops', 'ethereum-mainnet')
189+
schema = registry.datasets.describe('edgeandnode', 'ethereum-mainnet')
190190

191191
# Step 4: Query based on discovered schema
192192
client = Client(query_url='grpc://your-server:1602', auth=True)

docs/registry-guide.md

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ client = Client(
2929
query_url='grpc://localhost:1602', # Flight SQL queries
3030
admin_url='http://localhost:8080', # Admin operations
3131
registry_url='https://api.registry.amp.staging.thegraph.com', # Registry (default)
32-
auth=True # Use ~/.amp-cli-config for authentication
32+
auth=True # Use ~/.amp/cache/amp_cli_auth for authentication
3333
)
3434

3535
# Search registry
@@ -43,7 +43,7 @@ manifest = client.registry.datasets.get_manifest(
4343
dataset.latest_version.version_tag
4444
)
4545

46-
client.admin.datasets.register(
46+
client.datasets.register(
4747
namespace=dataset.namespace,
4848
name=dataset.name,
4949
revision=dataset.latest_version.version_tag,
@@ -165,7 +165,7 @@ print(f'Dependencies: {list(manifest.get("dependencies", {}).keys())}')
165165
Publishing requires authentication. Set up your auth token:
166166

167167
```python
168-
# Option 1: Use existing auth from ~/.amp-cli-config
168+
# Option 1: Use existing auth from ~/.amp/cache/amp_cli_auth
169169
from amp import Client
170170
client = Client(auth=True)
171171

@@ -328,21 +328,21 @@ manifest = client.registry.datasets.get_manifest(
328328

329329
# 4. Deploy dependency to local node
330330
print(f'Deploying {dataset.namespace}/{dataset.name}...')
331-
client.admin.datasets.register(
331+
client.datasets.register(
332332
namespace=dataset.namespace,
333333
name=dataset.name,
334334
revision=full_dataset.latest_version.version_tag,
335335
manifest=manifest
336336
)
337337

338-
deploy_response = client.admin.datasets.deploy(
338+
deploy_response = client.datasets.deploy(
339339
dataset.namespace,
340340
dataset.name,
341341
full_dataset.latest_version.version_tag
342342
)
343343

344344
# Wait for deployment
345-
client.admin.jobs.wait_for_completion(deploy_response.job_id)
345+
client.jobs.wait_for_completion(deploy_response.job_id)
346346
print('Dependency deployed!')
347347

348348
# 5. Create derived dataset
@@ -371,15 +371,15 @@ derived_manifest = {
371371
}
372372

373373
# 6. Deploy derived dataset
374-
client.admin.datasets.register(
374+
client.datasets.register(
375375
namespace='_',
376376
name='my_sample',
377377
revision='1.0.0',
378378
manifest=derived_manifest
379379
)
380380

381-
deploy_response = client.admin.datasets.deploy('_', 'my_sample', '1.0.0')
382-
client.admin.jobs.wait_for_completion(deploy_response.job_id)
381+
deploy_response = client.datasets.deploy('_', 'my_sample', '1.0.0')
382+
client.jobs.wait_for_completion(deploy_response.job_id)
383383
print('Derived dataset deployed!')
384384

385385
# 7. Query the data
@@ -480,6 +480,6 @@ registry = RegistryClient(
480480

481481
The Registry client uses the same authentication as the Admin API:
482482

483-
1. Interactive login: `~/.amp-cli-config`
483+
1. Interactive login: `~/.amp/cache/amp_cli_auth`
484484
2. Direct token: Pass `auth_token='your-token'`
485485
3. Unified client: Set `auth=True` to use saved credentials

0 commit comments

Comments
 (0)