Skip to content

Commit cf33196

Browse files
Create file_loader.py
1 parent 1466e21 commit cf33196

File tree

1 file changed

+72
-0
lines changed

1 file changed

+72
-0
lines changed

src/file_loader.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
# src/file_loader.py
2+
from pathlib import Path
3+
import pandas as pd
4+
import json
5+
import yaml
6+
7+
SCHEMA_PATH = Path("schema/aura_schema.yaml")
8+
9+
10+
def _read_schema():
11+
if not SCHEMA_PATH.exists():
12+
raise FileNotFoundError("schema/aura_schema.yaml not found. Add schema before running validation.")
13+
with open(SCHEMA_PATH, "r", encoding="utf-8") as f:
14+
return yaml.safe_load(f)
15+
16+
17+
def load_workbook(path: str):
18+
"""Load .xlsx or .xlsl and return dict of DataFrames."""
19+
p = Path(path)
20+
if not p.exists():
21+
raise FileNotFoundError(path)
22+
suffix = p.suffix.lower()
23+
# allow .xlsl as branded extension
24+
if suffix == ".xlsl":
25+
# still readable by pandas/openpyxl
26+
pass
27+
xl = pd.ExcelFile(p)
28+
sheets = {name: xl.parse(name) for name in xl.sheet_names}
29+
return sheets
30+
31+
32+
def validate_workbook(path: str):
33+
"""Validate workbook against schema. Returns (ok: bool, report: dict)."""
34+
schema = _read_schema()
35+
sheets = load_workbook(path)
36+
report = {
37+
"file": str(path),
38+
"checks": []
39+
}
40+
ok = True
41+
42+
for sheet_name, spec in schema.get("sheets", {}).items():
43+
required = spec.get("required", False)
44+
if required and sheet_name not in sheets:
45+
ok = False
46+
report["checks"].append({"sheet": sheet_name, "ok": False, "reason": "missing required sheet"})
47+
continue
48+
if sheet_name in sheets:
49+
df = sheets[sheet_name]
50+
# check columns
51+
cols_ok = True
52+
missing_cols = []
53+
for col in spec.get("columns", []):
54+
cname = col["name"]
55+
if cname not in df.columns:
56+
if col.get("required", False):
57+
cols_ok = False
58+
missing_cols.append(cname)
59+
if not cols_ok:
60+
ok = False
61+
report["checks"].append({"sheet": sheet_name, "ok": False, "missing_columns": missing_cols})
62+
else:
63+
report["checks"].append({"sheet": sheet_name, "ok": True})
64+
return ok, report
65+
66+
67+
if __name__ == "__main__":
68+
import sys
69+
target = sys.argv[1] if len(sys.argv) > 1 else "data/Aura.xlsl"
70+
ok, r = validate_workbook(target)
71+
print("VALID:" if ok else "INVALID:")
72+
print(json.dumps(r, indent=2))

0 commit comments

Comments
 (0)