Skip to content

Commit 53b85b0

Browse files
feat(myopencre): add export-compatible CSV import validation
- Validate file type, encoding, and required headers - Accept CSVs generated from CRE catalogue export - Skip empty and padding rows present in exported templates - Validate CRE format only when CRE references exist - Guard against misaligned rows with extra columns - Return structured validation errors before import This keeps the importer aligned with the exporter while preventing malformed inputs from causing server errors.
1 parent 288a8b3 commit 53b85b0

File tree

1 file changed

+194
-4
lines changed

1 file changed

+194
-4
lines changed

application/web/web_main.py

Lines changed: 194 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -784,25 +784,214 @@ def import_from_cre_csv() -> Any:
784784

785785
# TODO: (spyros) add optional gap analysis and embeddings calculation
786786
database = db.Node_collection().with_graph()
787+
787788
file = request.files.get("cre_csv")
789+
788790
calculate_embeddings = (
789791
False if not request.args.get("calculate_embeddings") else True
790792
)
791793
calculate_gap_analysis = (
792794
False if not request.args.get("calculate_gap_analysis") else True
793795
)
794796

797+
# ------------------------
798+
# File-level validation
799+
# ------------------------
800+
795801
if file is None:
796-
abort(400, "No file provided")
802+
return (
803+
jsonify(
804+
{
805+
"success": False,
806+
"type": "FILE_ERROR",
807+
"message": "No file provided",
808+
}
809+
),
810+
400,
811+
)
812+
813+
if not file.filename.lower().endswith(".csv"):
814+
return (
815+
jsonify(
816+
{
817+
"success": False,
818+
"type": "FILE_ERROR",
819+
"message": "Only .csv files are supported",
820+
}
821+
),
822+
400,
823+
)
824+
797825
contents = file.read()
798-
csv_read = csv.DictReader(contents.decode("utf-8").splitlines())
826+
827+
if not contents or contents.strip() == b"":
828+
return (
829+
jsonify(
830+
{
831+
"success": False,
832+
"type": "FILE_ERROR",
833+
"message": "Uploaded CSV file is empty",
834+
}
835+
),
836+
400,
837+
)
838+
799839
try:
800-
documents = spreadsheet_parsers.parse_export_format(list(csv_read))
840+
decoded_contents = contents.decode("utf-8")
841+
except UnicodeDecodeError:
842+
return (
843+
jsonify(
844+
{
845+
"success": False,
846+
"type": "FILE_ERROR",
847+
"message": "CSV file must be UTF-8 encoded",
848+
}
849+
),
850+
400,
851+
)
852+
853+
csv_read = csv.DictReader(decoded_contents.splitlines())
854+
855+
# ------------------------
856+
# Schema / header validation
857+
# ------------------------
858+
859+
headers = [h.strip() for h in csv_read.fieldnames]
860+
861+
if not headers:
862+
return (
863+
jsonify(
864+
{
865+
"success": False,
866+
"type": "SCHEMA_ERROR",
867+
"message": "CSV header row is missing",
868+
}
869+
),
870+
400,
871+
)
872+
873+
has_cre_column = any(h.startswith("CRE") for h in headers)
874+
if not has_cre_column:
875+
return (
876+
jsonify(
877+
{
878+
"success": False,
879+
"type": "SCHEMA_ERROR",
880+
"message": "At least one CRE column is required",
881+
}
882+
),
883+
400,
884+
)
885+
886+
required_columns = ["standard|name", "standard|id"]
887+
for col in required_columns:
888+
if col not in headers:
889+
return (
890+
jsonify(
891+
{
892+
"success": False,
893+
"type": "SCHEMA_ERROR",
894+
"message": f"Missing required column: {col}",
895+
}
896+
),
897+
400,
898+
)
899+
900+
# ------------------------
901+
# Row-level validation (export-compatible)
902+
# ------------------------
903+
904+
rows = list(csv_read)
905+
errors = []
906+
907+
# 🚨 NEW: guard against misaligned rows (extra columns)
908+
for row_index, row in enumerate(rows, start=2):
909+
if None in row:
910+
return (
911+
jsonify(
912+
{
913+
"success": False,
914+
"type": "SCHEMA_ERROR",
915+
"message": (
916+
f"Row {row_index} has more columns than header. "
917+
"Please ensure the CSV matches the exported template."
918+
),
919+
}
920+
),
921+
400,
922+
)
923+
924+
for row_index, row in enumerate(rows, start=2): # header is row 1
925+
normalized_row = {
926+
k: (v.strip() if isinstance(v, str) else v) for k, v in row.items()
927+
}
928+
929+
# Skip completely empty rows (exported templates contain them)
930+
if all(not v for v in normalized_row.values()):
931+
continue
932+
933+
cre_values = [normalized_row.get(h) for h in headers if h.startswith("CRE")]
934+
cre_values = [v for v in cre_values if v]
935+
936+
# Rows without CRE are allowed by export format → skip
937+
if not cre_values:
938+
continue
939+
940+
# Validate CRE format
941+
for cre in cre_values:
942+
if "|" not in cre:
943+
errors.append(
944+
{
945+
"row": row_index,
946+
"code": "INVALID_CRE_FORMAT",
947+
"message": (
948+
f"Invalid CRE entry '{cre}', expected '<CRE-ID>|<Name>'"
949+
),
950+
}
951+
)
952+
953+
if errors:
954+
return (
955+
jsonify(
956+
{
957+
"success": False,
958+
"type": "ROW_VALIDATION_ERROR",
959+
"errors": errors,
960+
}
961+
),
962+
400,
963+
)
964+
965+
# ------------------------
966+
# No-op import guard (IMPORTANT)
967+
# ------------------------
968+
969+
importable_rows = []
970+
for row in rows:
971+
if any(v for v in row.values()):
972+
importable_rows.append(row)
973+
974+
if not importable_rows:
975+
return jsonify(
976+
{
977+
"status": "success",
978+
"new_cres": [],
979+
"new_standards": 0,
980+
}
981+
)
982+
983+
# ------------------------
984+
# Import execution
985+
# ------------------------
986+
987+
try:
988+
documents = spreadsheet_parsers.parse_export_format(importable_rows)
801989
except cre_exceptions.DuplicateLinkException as dle:
802990
abort(500, f"error during parsing of the incoming CSV, err:{dle}")
803-
cres = documents.pop(defs.Credoctypes.CRE.value)
804991

992+
cres = documents.pop(defs.Credoctypes.CRE.value)
805993
standards = documents
994+
806995
new_cres = []
807996
for cre in cres:
808997
new_cre, exists = cre_main.register_cre(cre, database)
@@ -816,6 +1005,7 @@ def import_from_cre_csv() -> Any:
8161005
generate_embeddings=calculate_embeddings,
8171006
calculate_gap_analysis=calculate_gap_analysis,
8181007
)
1008+
8191009
return jsonify(
8201010
{
8211011
"status": "success",

0 commit comments

Comments
 (0)