@@ -784,25 +784,214 @@ def import_from_cre_csv() -> Any:
784784
785785 # TODO: (spyros) add optional gap analysis and embeddings calculation
786786 database = db .Node_collection ().with_graph ()
787+
787788 file = request .files .get ("cre_csv" )
789+
788790 calculate_embeddings = (
789791 False if not request .args .get ("calculate_embeddings" ) else True
790792 )
791793 calculate_gap_analysis = (
792794 False if not request .args .get ("calculate_gap_analysis" ) else True
793795 )
794796
797+ # ------------------------
798+ # File-level validation
799+ # ------------------------
800+
795801 if file is None :
796- abort (400 , "No file provided" )
802+ return (
803+ jsonify (
804+ {
805+ "success" : False ,
806+ "type" : "FILE_ERROR" ,
807+ "message" : "No file provided" ,
808+ }
809+ ),
810+ 400 ,
811+ )
812+
813+ if not file .filename .lower ().endswith (".csv" ):
814+ return (
815+ jsonify (
816+ {
817+ "success" : False ,
818+ "type" : "FILE_ERROR" ,
819+ "message" : "Only .csv files are supported" ,
820+ }
821+ ),
822+ 400 ,
823+ )
824+
797825 contents = file .read ()
798- csv_read = csv .DictReader (contents .decode ("utf-8" ).splitlines ())
826+
827+ if not contents or contents .strip () == b"" :
828+ return (
829+ jsonify (
830+ {
831+ "success" : False ,
832+ "type" : "FILE_ERROR" ,
833+ "message" : "Uploaded CSV file is empty" ,
834+ }
835+ ),
836+ 400 ,
837+ )
838+
799839 try :
800- documents = spreadsheet_parsers .parse_export_format (list (csv_read ))
840+ decoded_contents = contents .decode ("utf-8" )
841+ except UnicodeDecodeError :
842+ return (
843+ jsonify (
844+ {
845+ "success" : False ,
846+ "type" : "FILE_ERROR" ,
847+ "message" : "CSV file must be UTF-8 encoded" ,
848+ }
849+ ),
850+ 400 ,
851+ )
852+
853+ csv_read = csv .DictReader (decoded_contents .splitlines ())
854+
855+ # ------------------------
856+ # Schema / header validation
857+ # ------------------------
858+
859+ headers = [h .strip () for h in csv_read .fieldnames ]
860+
861+ if not headers :
862+ return (
863+ jsonify (
864+ {
865+ "success" : False ,
866+ "type" : "SCHEMA_ERROR" ,
867+ "message" : "CSV header row is missing" ,
868+ }
869+ ),
870+ 400 ,
871+ )
872+
873+ has_cre_column = any (h .startswith ("CRE" ) for h in headers )
874+ if not has_cre_column :
875+ return (
876+ jsonify (
877+ {
878+ "success" : False ,
879+ "type" : "SCHEMA_ERROR" ,
880+ "message" : "At least one CRE column is required" ,
881+ }
882+ ),
883+ 400 ,
884+ )
885+
886+ required_columns = ["standard|name" , "standard|id" ]
887+ for col in required_columns :
888+ if col not in headers :
889+ return (
890+ jsonify (
891+ {
892+ "success" : False ,
893+ "type" : "SCHEMA_ERROR" ,
894+ "message" : f"Missing required column: { col } " ,
895+ }
896+ ),
897+ 400 ,
898+ )
899+
900+ # ------------------------
901+ # Row-level validation (export-compatible)
902+ # ------------------------
903+
904+ rows = list (csv_read )
905+ errors = []
906+
907+ # 🚨 NEW: guard against misaligned rows (extra columns)
908+ for row_index , row in enumerate (rows , start = 2 ):
909+ if None in row :
910+ return (
911+ jsonify (
912+ {
913+ "success" : False ,
914+ "type" : "SCHEMA_ERROR" ,
915+ "message" : (
916+ f"Row { row_index } has more columns than header. "
917+ "Please ensure the CSV matches the exported template."
918+ ),
919+ }
920+ ),
921+ 400 ,
922+ )
923+
924+ for row_index , row in enumerate (rows , start = 2 ): # header is row 1
925+ normalized_row = {
926+ k : (v .strip () if isinstance (v , str ) else v ) for k , v in row .items ()
927+ }
928+
929+ # Skip completely empty rows (exported templates contain them)
930+ if all (not v for v in normalized_row .values ()):
931+ continue
932+
933+ cre_values = [normalized_row .get (h ) for h in headers if h .startswith ("CRE" )]
934+ cre_values = [v for v in cre_values if v ]
935+
936+ # Rows without CRE are allowed by export format → skip
937+ if not cre_values :
938+ continue
939+
940+ # Validate CRE format
941+ for cre in cre_values :
942+ if "|" not in cre :
943+ errors .append (
944+ {
945+ "row" : row_index ,
946+ "code" : "INVALID_CRE_FORMAT" ,
947+ "message" : (
948+ f"Invalid CRE entry '{ cre } ', expected '<CRE-ID>|<Name>'"
949+ ),
950+ }
951+ )
952+
953+ if errors :
954+ return (
955+ jsonify (
956+ {
957+ "success" : False ,
958+ "type" : "ROW_VALIDATION_ERROR" ,
959+ "errors" : errors ,
960+ }
961+ ),
962+ 400 ,
963+ )
964+
965+ # ------------------------
966+ # No-op import guard (IMPORTANT)
967+ # ------------------------
968+
969+ importable_rows = []
970+ for row in rows :
971+ if any (v for v in row .values ()):
972+ importable_rows .append (row )
973+
974+ if not importable_rows :
975+ return jsonify (
976+ {
977+ "status" : "success" ,
978+ "new_cres" : [],
979+ "new_standards" : 0 ,
980+ }
981+ )
982+
983+ # ------------------------
984+ # Import execution
985+ # ------------------------
986+
987+ try :
988+ documents = spreadsheet_parsers .parse_export_format (importable_rows )
801989 except cre_exceptions .DuplicateLinkException as dle :
802990 abort (500 , f"error during parsing of the incoming CSV, err:{ dle } " )
803- cres = documents .pop (defs .Credoctypes .CRE .value )
804991
992+ cres = documents .pop (defs .Credoctypes .CRE .value )
805993 standards = documents
994+
806995 new_cres = []
807996 for cre in cres :
808997 new_cre , exists = cre_main .register_cre (cre , database )
@@ -816,6 +1005,7 @@ def import_from_cre_csv() -> Any:
8161005 generate_embeddings = calculate_embeddings ,
8171006 calculate_gap_analysis = calculate_gap_analysis ,
8181007 )
1008+
8191009 return jsonify (
8201010 {
8211011 "status" : "success" ,
0 commit comments