diff --git a/ckanext/iati_generator/actions/iati.py b/ckanext/iati_generator/actions/iati.py index 9824ed10..15dd84bc 100644 --- a/ckanext/iati_generator/actions/iati.py +++ b/ckanext/iati_generator/actions/iati.py @@ -9,11 +9,13 @@ from ckan.plugins import toolkit from okfn_iati import IatiMultiCsvConverter from okfn_iati.organisation_xml_generator import IatiOrganisationMultiCsvConverter +from okfn_iati.csv_validators.folder_validator import CsvFolderValidator from werkzeug.datastructures import FileStorage from ckanext.iati_generator import helpers as h from ckanext.iati_generator.models.enums import IATIFileTypes from ckanext.iati_generator.models.iati_files import DEFAULT_NAMESPACE, IATIFile +from .procces import process_validation_failures, upload_or_update_xml_resource log = logging.getLogger(__name__) @@ -176,6 +178,7 @@ def iati_generate_organisation_xml(context, data_dict): required = h.required_organisation_csv_files() pre = h.validate_required_csv_folder(Path(tmp_dir), required) if pre: + log.critical(f"IATI Generation Error (organisation): {dataset} - Details: {pre}") # IatiOrganisationMultiCsvConverter will produce an empty organisation.xml file if the input_folder is empty. # This it not what we want because the file is useless. For activities this validation is handled by the converter. # We check and return error to be coherent with IatiMultiCsvConverter. @@ -188,6 +191,7 @@ def iati_generate_organisation_xml(context, data_dict): if not success: # Use the CKAN ValidationError formar for errors validation_errors = {'Organizacion XML errors': converter.latest_errors} + log.critical(f"IATI Generation Error (organisation): {dataset} - Details: {validation_errors}") log.warning("Error when generating the organisation.xml file.") raise toolkit.ValidationError( {"error_org_xml": validation_errors} @@ -279,67 +283,54 @@ def iati_generate_activities_xml(context, data_dict): toolkit.check_access("iati_generate_xml_files", context, data_dict) package_id = toolkit.get_or_bust(data_dict, "package_id") - dataset = toolkit.get_action('package_show')({}, {"id": package_id}) + dataset = toolkit.get_action("package_show")({}, {"id": package_id}) tmp_dir = tempfile.mkdtemp() - _prepare_activities_csv_folder(dataset, tmp_dir) required = h.required_activity_csv_files() - pre = h.validate_required_csv_folder(Path(tmp_dir), required) - if pre: - raise toolkit.ValidationError(pre) + pre_check = h.validate_required_csv_folder(Path(tmp_dir), required) + if pre_check: + log.critical(f"IATI Generation Error (activity): {dataset} - Details: {pre_check}") + raise toolkit.ValidationError(pre_check) + + result = CsvFolderValidator().validate_folder(tmp_dir) + + if not result.is_valid: + normalized_errors = process_validation_failures(dataset, result.issues) + log.critical(f"IATI Generation Error (activity): {dataset} - Details: {normalized_errors}") + raise toolkit.ValidationError({"error_activity_xml": normalized_errors}) output_path = tmp_dir + "/activity.xml" converter = IatiMultiCsvConverter() - success = converter.csv_folder_to_xml(csv_folder=tmp_dir, xml_output=output_path, validate_output=True) + success = converter.csv_folder_to_xml(csv_folder=tmp_dir, xml_output=output_path) + errors = {"error_activity_xml": {"Activity XML errors": converter.latest_errors}} if not success: - log.warning(f"Could not generate activity file for dataset {dataset['name']} ({dataset['id']})") - validation_errors = {'Activity XML errors': converter.latest_errors} - # Is this the best way to handle this scenario? - raise toolkit.ValidationError( - {"error_activity_xml": validation_errors} + log.critical( + f"IATI Generation Error (activity): Could not generate activity file for dataset {dataset['name']} | " + f"{dataset} - Details: {errors}" ) - - activity_resource = None - for res in dataset["resources"]: - if int(res["iati_file_type"]) == IATIFileTypes.FINAL_ACTIVITY_FILE.value: - activity_resource = res - break - - # Using werkzeug FileStorage is the only way I found to get the resource_create action working. - with open(output_path, "rb") as f: - stream = io.BytesIO(f.read()) - upload = FileStorage(stream=stream, filename="activity.xml") - - res_dict = { - "name": "activity.xml", - "url_type": "upload", - "upload": upload, - "iati_file_type": IATIFileTypes.FINAL_ACTIVITY_FILE.value, - "format": "XML", - } - if activity_resource: - res_dict["id"] = activity_resource["id"] - result = toolkit.get_action("resource_patch")({}, res_dict) - log.info(f"Patched activity.xml resource {result['id']}.") - else: - res_dict["package_id"] = dataset["id"] - result = toolkit.get_action("resource_create")({}, res_dict) - log.info(f"Created new activity.xml resource with id {result['id']}.") + raise toolkit.ValidationError(errors) + + result_resource = upload_or_update_xml_resource( + context, + dataset, + output_path, + "activity.xml", + IATIFileTypes.FINAL_ACTIVITY_FILE, + ) namespace = h.normalize_namespace(dataset.get("iati_namespace", DEFAULT_NAMESPACE)) - h.upsert_final_iati_file( - resource_id=result["id"], + resource_id=result_resource["id"], namespace=namespace, file_type=IATIFileTypes.FINAL_ACTIVITY_FILE.value, success=True, ) - shutil.rmtree(tmp_dir) - return result + + return result_resource def iati_get_dataset_by_namespace(context, data_dict): diff --git a/ckanext/iati_generator/actions/procces.py b/ckanext/iati_generator/actions/procces.py new file mode 100644 index 00000000..87175008 --- /dev/null +++ b/ckanext/iati_generator/actions/procces.py @@ -0,0 +1,77 @@ +import io +import logging + +from ckan.plugins import toolkit + +from werkzeug.datastructures import FileStorage + +from ckanext.iati_generator import helpers as h +from ckanext.iati_generator.models.enums import CSV_FILENAME_TO_FILE_TYPE +from ckanext.iati_generator.models.iati_files import DEFAULT_NAMESPACE + +log = logging.getLogger(__name__) + + +def process_validation_failures(dataset, validation_issues): + """ + Identifies which resources failed and updates their status in the database (IATIFile). + Returns the normalized issues ready to be used in ValidationError. + """ + failed_files_map = {} + for issue in validation_issues: + fname = issue.file_name + if fname and fname not in failed_files_map: + failed_files_map[fname] = issue.message + + namespace = h.normalize_namespace(dataset.get("iati_namespace", DEFAULT_NAMESPACE)) + files_by_res = h.iati_files_by_resource(namespace=namespace) + + for resource in dataset.get("resources", []): + file_type = str(resource.get("iati_file_type", "")) + + for fname, error_msg in failed_files_map.items(): + target_type = CSV_FILENAME_TO_FILE_TYPE.get(fname) + if target_type and target_type == file_type: + res_id = resource['id'] + if res_id in files_by_res: + files_by_res[res_id].track_processing( + success=False, + error_message=error_msg + ) + return h.normalize_iati_errors(validation_issues) + + +def upload_or_update_xml_resource(context, dataset, file_path, file_name, file_type_enum): + """ + Uploads the generated XML file to CKAN. + If a resource of that type (FINAL_ACTIVITY_FILE) already exists, it updates (patches) it. + If not, it creates a new one. + """ + existing_resource = None + for res in dataset.get("resources", []): + if int(res.get("iati_file_type", 0)) == file_type_enum.value: + existing_resource = res + break + + with open(file_path, "rb") as f: + stream = io.BytesIO(f.read()) + upload = FileStorage(stream=stream, filename=file_name) + + res_dict = { + "name": file_name, + "url_type": "upload", + "upload": upload, + "iati_file_type": file_type_enum.value, + "format": "XML", + } + + if existing_resource: + res_dict["id"] = existing_resource["id"] + result = toolkit.get_action("resource_patch")({}, res_dict) + log.info(f"Patched {file_name} resource {result['id']}.") + else: + res_dict["package_id"] = dataset["id"] + result = toolkit.get_action("resource_create")({}, res_dict) + log.info(f"Created new {file_name} resource with id {result['id']}.") + + return result diff --git a/ckanext/iati_generator/helpers.py b/ckanext/iati_generator/helpers.py index 89415daf..ebec6137 100644 --- a/ckanext/iati_generator/helpers.py +++ b/ckanext/iati_generator/helpers.py @@ -1,3 +1,4 @@ +import json import logging from pathlib import Path from typing import Any, Dict, List, Optional @@ -11,6 +12,7 @@ from okfn_iati import IatiMultiCsvConverter from okfn_iati.organisation_xml_generator import IatiOrganisationMultiCsvConverter +from okfn_iati.csv_validators.models import ValidationIssue log = logging.getLogger(__name__) @@ -748,13 +750,63 @@ def _deduplicate_errors(normalized: List[Dict[str, Any]]) -> List[Dict[str, Any] return deduped +def _normalize_validation_issues(error_list: List[Any]) -> List[Dict[str, Any]]: + """Helper to process list of errors (ValidationIssue objects).""" + normalized = [] + for err in error_list: + if hasattr(err, 'message'): + row = getattr(err, 'row_number', getattr(err, 'line', None)) + col = getattr(err, 'column_name', getattr(err, 'column', None)) + + item = { + "severity": "error", + "category": "csv-content", + "title": toolkit._(f"Error in {err.file_name}") if hasattr(err, 'file_name') + else toolkit._("Validation error"), + "details": err.message, + "csv_file": getattr(err, 'file_name', None), + "location": {"line": row, "col": col} if row else None, + "suggestion": toolkit._("Check the format of the uploaded file."), + "raw": str(err) + } + normalized.append(item) + else: + # Fallback for simple strings inside a list + parsed = _parse_schema_error_line(str(err)) + normalized.append(_normalize_single_error(str(err), parsed)) + return normalized + + +def _format_raw_errors_as_json(error_dict: Any) -> str: + """Helper to dump errors to a pretty JSON string.""" + def _json_default(obj): + """Converts complex objects (ValidationIssue, Enums) to dictionaries/strings.""" + if isinstance(obj, ValidationIssue): + return { + "level": getattr(obj.level, 'value', str(obj.level)) if hasattr(obj, 'level') else None, + "code": getattr(obj.code, 'value', str(obj.code)) if hasattr(obj, 'code') else None, + "message": obj.message, + "file_name": getattr(obj, 'file_name', None), + "row": getattr(obj, 'row_number', None), + "column": getattr(obj, 'column_name', None), + "value": getattr(obj, 'value', None) + } + if hasattr(obj, 'value'): + return obj.value + return str(obj) + + try: + return json.dumps(error_dict, default=_json_default, indent=4, ensure_ascii=False) + except Exception: + return str(error_dict) + + def normalize_iati_errors(error_dict: Any, package_id: Optional[str] = None) -> Dict[str, Any]: """ It normalizes converter errors (XSD / latest_errors) into a user-friendly structure. It also supports pre-normalized structures (e.g., the output of validate_required_csv_folder()). """ - # If already normalized (pre-check), return it as is if isinstance(error_dict, dict) and "items" in error_dict and "raw" in error_dict: if "summary" not in error_dict or error_dict["summary"] is None: error_dict["summary"] = toolkit._( @@ -762,22 +814,21 @@ def normalize_iati_errors(error_dict: Any, package_id: Optional[str] = None) -> ) return error_dict - raw_lines = _flatten_error_dict(error_dict) - normalized = [] - for raw in raw_lines: - parsed = _parse_schema_error_line(raw) - item = _normalize_single_error(raw, parsed) - normalized.append(item) - deduped = _deduplicate_errors(normalized) + if isinstance(error_dict, list): + normalized = _normalize_validation_issues(error_dict) + elif isinstance(error_dict, dict): + raw_lines = _flatten_error_dict(error_dict) + for raw in raw_lines: + parsed = _parse_schema_error_line(raw) + normalized.append(_normalize_single_error(raw, parsed)) - summary = toolkit._( - "The XML could not be generated due to validation errors in the source CSV files." - ) if deduped else None + deduped = _deduplicate_errors(normalized) + raw_formatted = _format_raw_errors_as_json(error_dict) return { - "summary": summary, + "summary": toolkit._("Validation errors were found in the source CSV files.") if deduped else None, "items": deduped, - "raw": raw_lines, + "raw": [raw_formatted], } diff --git a/ckanext/iati_generator/i18n/es/LC_MESSAGES/ckanext-iati-generator.mo b/ckanext/iati_generator/i18n/es/LC_MESSAGES/ckanext-iati-generator.mo index 4ba8af18..6b63ed3f 100644 Binary files a/ckanext/iati_generator/i18n/es/LC_MESSAGES/ckanext-iati-generator.mo and b/ckanext/iati_generator/i18n/es/LC_MESSAGES/ckanext-iati-generator.mo differ diff --git a/ckanext/iati_generator/i18n/es/LC_MESSAGES/ckanext-iati-generator.po b/ckanext/iati_generator/i18n/es/LC_MESSAGES/ckanext-iati-generator.po index 79f340b9..ba40830f 100644 --- a/ckanext/iati_generator/i18n/es/LC_MESSAGES/ckanext-iati-generator.po +++ b/ckanext/iati_generator/i18n/es/LC_MESSAGES/ckanext-iati-generator.po @@ -126,23 +126,33 @@ msgstr "Tipo de dato inválido" msgid "The value '%(value)s' is not of the correct type for '%(element)s'." msgstr "El valor '%(value)s' no es del tipo correcto para '%(element)s'." -#: ckanext/iati_generator/helpers.py:716 -#, fuzzy +#: ckanext/iati_generator/helpers.py:718 msgid "Validation error" msgstr "Errores al compilar IATI" -#: ckanext/iati_generator/helpers.py:718 + +#: ckanext/iati_generator/helpers.py:720 msgid "Check the required CSV files and their format." msgstr "Verifica los archivos CSV requeridos y su formato." -#: ckanext/iati_generator/helpers.py:760 +#: ckanext/iati_generator/helpers.py:813 msgid "The XML could not be generated due to errors in the source CSV files." msgstr "No se pudo generar el XML debido a errores en los archivos CSV fuente." -#: ckanext/iati_generator/helpers.py:775 -msgid "" -"The XML could not be generated due to validation errors in the source CSV" -" files." -msgstr "No se pudo generar el XML debido a errores de validación en los archivos CSV fuente." +#: ckanext/iati_generator/helpers.py:764 +msgid "Error in %(file_name)s" +msgstr "Error en %(file_name)s" + +#: ckanext/iati_generator/helpers.py:764 +msgid "Validation error" +msgstr "Error de validación" + +#: ckanext/iati_generator/helpers.py:769 +msgid "Check the format of the uploaded file." +msgstr "Verifica el formato del archivo subido." + +#: ckanext/iati_generator/helpers.py:831 +msgid "Validation errors were found in the source CSV files." +msgstr "Se encontraron errores de validación en los archivos CSV fuente." #: ckanext/iati_generator/auth/iati.py:82 msgid "Only organization admins (or sysadmins) can perform this action." diff --git a/ckanext/iati_generator/models/enums.py b/ckanext/iati_generator/models/enums.py index 27481dba..fc901c5f 100644 --- a/ckanext/iati_generator/models/enums.py +++ b/ckanext/iati_generator/models/enums.py @@ -35,3 +35,28 @@ class IATIFileTypes(Enum): ACTIVITY_DESCRIPTIONS_FILE = 340 # descriptions.csv ACTIVITY_COUNTRY_BUDGET_ITEMS_FILE = 350 # country_budget_items.csv FINAL_ACTIVITY_FILE = 299 + + +# Mapping from activity file type enum values to their CSV filenames. +# Keep in sync with IatiMultiCsvConverter.csv_files in okfn_iati. +ACTIVITY_CSV_FILENAMES = { + IATIFileTypes.ACTIVITY_MAIN_FILE: "activities.csv", + IATIFileTypes.ACTIVITY_PARTICIPATING_ORGS_FILE: "participating_orgs.csv", + IATIFileTypes.ACTIVITY_SECTORS_FILE: "sectors.csv", + IATIFileTypes.ACTIVITY_BUDGET_FILE: "budgets.csv", + IATIFileTypes.ACTIVITY_TRANSACTIONS_FILE: "transactions.csv", + IATIFileTypes.ACTIVITY_TRANSACTION_SECTORS_FILE: "transaction_sectors.csv", + IATIFileTypes.ACTIVITY_LOCATIONS_FILE: "locations.csv", + IATIFileTypes.ACTIVITY_DOCUMENTS_FILE: "documents.csv", + IATIFileTypes.ACTIVITY_RESULTS_FILE: "results.csv", + IATIFileTypes.ACTIVITY_INDICATORS_FILE: "indicators.csv", + IATIFileTypes.ACTIVITY_INDICATOR_PERIODS_FILE: "indicator_periods.csv", + IATIFileTypes.ACTIVITY_DATES_FILE: "activity_date.csv", + IATIFileTypes.ACTIVITY_CONTACT_INFO_FILE: "contact_info.csv", + IATIFileTypes.ACTIVITY_CONDITIONS_FILE: "conditions.csv", + IATIFileTypes.ACTIVITY_DESCRIPTIONS_FILE: "descriptions.csv", + IATIFileTypes.ACTIVITY_COUNTRY_BUDGET_ITEMS_FILE: "country_budget_items.csv", +} + +# Reverse mapping: CSV filename -> enum value (as string), for matching validation issues +CSV_FILENAME_TO_FILE_TYPE = {fname: str(ft.value) for ft, fname in ACTIVITY_CSV_FILENAMES.items()} diff --git a/requirements.txt b/requirements.txt index decfbfdb..0247c308 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ # PyPi is broken. Probably due to missing static files in the distribution package. # TODO: Fix this -git+https://github.com/okfn/okfn_iati.git@0.4.4#egg=okfn_iati \ No newline at end of file +git+https://github.com/okfn/okfn_iati.git@0.5.0#egg=okfn_iati \ No newline at end of file