Skip to content

Commit ec449bd

Browse files
mesejoclaude
andcommitted
feat(catalog): add schema command to inspect entry schemas without execution
Adds `xorq catalog schema <name-or-alias>` command that prints Schema In (for unbound/partial expressions) and Schema Out (for all entries) by reading expr.yaml directly from the tgz — no backend connection required. - CatalogEntry.schema_out / .schema_in cached properties parse expr.yaml - CatalogEntry._parse_schema_* staticmethods handle dtype dict → str conversion - Catalog.get_entry() resolves both entry names and aliases - --json flag for machine-readable output - Shell completion covers both names and aliases Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent b8a6db9 commit ec449bd

File tree

3 files changed

+212
-0
lines changed

3 files changed

+212
-0
lines changed

python/xorq/catalog/catalog.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,14 @@ def get_catalog_entry(self, name):
160160
catalog_entry = CatalogEntry(name, self)
161161
return catalog_entry
162162

163+
def get_entry(self, name_or_alias):
164+
"""Resolve an entry name or alias to a CatalogEntry."""
165+
if name_or_alias in self.list():
166+
return CatalogEntry(name_or_alias, self)
167+
if name_or_alias in self.list_aliases():
168+
return CatalogAlias.from_name(name_or_alias, self).catalog_entry
169+
raise KeyError(f"No entry or alias named {name_or_alias!r}")
170+
163171
def get_tgz(self, name, dir_path=None):
164172
catalog_entry = self.get_catalog_entry(name)
165173
return catalog_entry.get(dir_path)
@@ -470,6 +478,104 @@ def kind(self) -> str:
470478
return default_value
471479
return data.get("kind", default_value)
472480

481+
@cached_property
482+
def schema_out(self) -> dict | None:
483+
"""Output schema as {column_name: dtype_str}, read directly from expr.yaml."""
484+
data = self._read_tgz_yaml(DumpFiles.expr)
485+
return CatalogEntry._parse_schema_out(data)
486+
487+
@cached_property
488+
def schema_in(self) -> dict | None:
489+
"""For unbound expressions, the expected input schema as {column_name: dtype_str}."""
490+
if self.kind != str(ExprKind.UnboundExpr):
491+
return None
492+
data = self._read_tgz_yaml(DumpFiles.expr)
493+
return CatalogEntry._parse_schema_in(data)
494+
495+
@staticmethod
496+
def _resolve_schema_ref(ref_val) -> str | None:
497+
"""Extract the schema key string from a schema_ref value.
498+
499+
In expr.yaml, schema_ref is stored as {'schema_ref': 'schema_<hash>'}.
500+
"""
501+
if isinstance(ref_val, dict):
502+
return ref_val.get("schema_ref")
503+
if isinstance(ref_val, str):
504+
return ref_val
505+
return None
506+
507+
@staticmethod
508+
def _dtype_dict_to_str(d) -> str:
509+
"""Convert a serialized DataType dict from expr.yaml to a human-readable string."""
510+
import xorq.vendor.ibis.expr.datatypes as dt # noqa: PLC0415
511+
512+
if not isinstance(d, dict) or d.get("op") != "DataType":
513+
return str(d)
514+
typ_cls = getattr(dt, d["type"], None)
515+
if typ_cls is None:
516+
return d["type"].lower()
517+
kwargs = {
518+
k: CatalogEntry._dtype_dict_to_str(v) if isinstance(v, dict) else v
519+
for k, v in d.items()
520+
if k not in ("op", "type")
521+
}
522+
523+
# Re-parse nested dtype strings back to dt objects for the constructor
524+
def _parse_val(k, v):
525+
try:
526+
arg_annotation = typ_cls.__dataclass_fields__.get(k)
527+
if arg_annotation and isinstance(v, str):
528+
return dt.dtype(v)
529+
except Exception:
530+
pass
531+
return v
532+
533+
kwargs = {k: _parse_val(k, v) for k, v in kwargs.items()}
534+
try:
535+
return str(typ_cls(**kwargs))
536+
except Exception:
537+
return d["type"].lower()
538+
539+
@staticmethod
540+
def _schema_dict_to_str_dict(schema_dict) -> dict[str, str]:
541+
return {
542+
col: CatalogEntry._dtype_dict_to_str(dtype_val)
543+
for col, dtype_val in schema_dict.items()
544+
}
545+
546+
@staticmethod
547+
def _parse_schema_out(data) -> dict | None:
548+
if not isinstance(data, dict):
549+
return None
550+
schemas = data.get("definitions", {}).get("schemas", {})
551+
schema_ref = CatalogEntry._resolve_schema_ref(
552+
data.get("expression", {}).get("schema_ref")
553+
)
554+
if not schema_ref:
555+
return None
556+
schema_dict = schemas.get(schema_ref)
557+
if not isinstance(schema_dict, dict):
558+
return None
559+
return CatalogEntry._schema_dict_to_str_dict(schema_dict)
560+
561+
@staticmethod
562+
def _parse_schema_in(data) -> dict | None:
563+
if not isinstance(data, dict):
564+
return None
565+
definitions = data.get("definitions", {})
566+
nodes = definitions.get("nodes", {})
567+
schemas = definitions.get("schemas", {})
568+
for node_def in nodes.values():
569+
if isinstance(node_def, dict) and node_def.get("op") == "UnboundTable":
570+
schema_ref = CatalogEntry._resolve_schema_ref(
571+
node_def.get("schema_ref")
572+
)
573+
if schema_ref:
574+
schema_dict = schemas.get(schema_ref)
575+
if isinstance(schema_dict, dict):
576+
return CatalogEntry._schema_dict_to_str_dict(schema_dict)
577+
return None
578+
473579
@cached_property
474580
def backends(self) -> tuple:
475581
data = self._read_tgz_yaml(DumpFiles.profiles)

python/xorq/catalog/cli.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,63 @@ def clone(url, dest_name, dest_path):
351351
click.echo(f"Cloned to {catalog.repo_path}")
352352

353353

354+
def _complete_entry_or_alias_names(ctx, param, incomplete):
355+
from click.shell_completion import CompletionItem
356+
357+
try:
358+
catalog = _make_catalog_for_completion(ctx)
359+
names = set(catalog.list()) | set(catalog.list_aliases())
360+
return [CompletionItem(n) for n in sorted(names) if n.startswith(incomplete)]
361+
except Exception:
362+
return []
363+
364+
365+
@cli.command()
366+
@click.argument("name", shell_complete=_complete_entry_or_alias_names)
367+
@click.option("--json", "as_json", is_flag=True, default=False, help="Output as JSON.")
368+
@click.pass_context
369+
def schema(ctx, name, as_json):
370+
"""Show schema of a catalog entry (name or alias)."""
371+
import json as json_mod
372+
373+
from xorq.ibis_yaml.compiler import ExprKind
374+
375+
with click_context_catalog(ctx):
376+
catalog = ctx.obj.make_catalog(init=False)
377+
try:
378+
entry = catalog.get_entry(name)
379+
except KeyError as e:
380+
raise click.ClickException(str(e)) from e
381+
382+
kind = entry.kind
383+
schema_out = entry.schema_out
384+
schema_in = entry.schema_in
385+
386+
if as_json:
387+
result = {"name": entry.name, "kind": kind, "schema_out": schema_out}
388+
if schema_in is not None:
389+
result["schema_in"] = schema_in
390+
click.echo(json_mod.dumps(result, indent=2))
391+
return
392+
393+
is_unbound = kind == str(ExprKind.UnboundExpr)
394+
type_label = "Partial (unbound)" if is_unbound else "Source (bound)"
395+
click.echo(f"Type: {type_label}\n")
396+
397+
if schema_in:
398+
click.echo("Schema In:")
399+
for col, dtype in schema_in.items():
400+
click.echo(f" {col:<24} {dtype}")
401+
click.echo()
402+
403+
if schema_out:
404+
click.echo("Schema Out:")
405+
for col, dtype in schema_out.items():
406+
click.echo(f" {col:<24} {dtype}")
407+
else:
408+
click.echo("Schema Out: (unavailable)")
409+
410+
354411
@cli.command()
355412
@click.pass_context
356413
def check(ctx):

python/xorq/catalog/tests/test_catalog.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,3 +349,52 @@ def test_extract_kind_partial(catalog):
349349
expr = t.filter(t.a > 0)
350350
entry = catalog.add(expr)
351351
assert entry.kind == ExprKind.UnboundExpr
352+
353+
354+
def test_schema_out_bound(catalog):
355+
expr = xo.memtable({"col_a": [1, 2], "col_b": ["x", "y"]})
356+
entry = catalog.add(expr)
357+
assert entry.schema_out == {"col_a": "int64", "col_b": "string"}
358+
359+
360+
def test_schema_in_none_for_bound(catalog):
361+
expr = xo.memtable({"a": [1, 2, 3]})
362+
entry = catalog.add(expr)
363+
assert entry.schema_in is None
364+
365+
366+
def test_schema_out_unbound(catalog):
367+
t = xo.table(schema={"amount": "float64", "currency": "string"})
368+
expr = t.mutate(amount_usd=t.amount * 1.2)
369+
entry = catalog.add(expr)
370+
assert entry.schema_out == {
371+
"amount": "float64",
372+
"currency": "string",
373+
"amount_usd": "float64",
374+
}
375+
376+
377+
def test_schema_in_unbound(catalog):
378+
t = xo.table(schema={"amount": "float64", "currency": "string"})
379+
expr = t.filter(t.amount > 0)
380+
entry = catalog.add(expr)
381+
assert entry.schema_in == {"amount": "float64", "currency": "string"}
382+
383+
384+
def test_get_entry_by_name(catalog):
385+
expr = xo.memtable({"x": [1]})
386+
entry = catalog.add(expr)
387+
resolved = catalog.get_entry(entry.name)
388+
assert resolved.name == entry.name
389+
390+
391+
def test_get_entry_by_alias(catalog):
392+
expr = xo.memtable({"x": [1]})
393+
entry = catalog.add(expr, aliases=("my-alias",))
394+
resolved = catalog.get_entry("my-alias")
395+
assert resolved.name == entry.name
396+
397+
398+
def test_get_entry_unknown_raises(catalog):
399+
with pytest.raises(KeyError, match="no-such"):
400+
catalog.get_entry("no-such")

0 commit comments

Comments
 (0)