diff --git a/.github/non_browsable_doc_map.json b/.github/non_browsable_doc_map.json new file mode 100644 index 00000000..fa661b24 --- /dev/null +++ b/.github/non_browsable_doc_map.json @@ -0,0 +1,10 @@ +{ + "_comment": "This file maps non-browsable QMD source files to their persistent random output names and URLs. Do not publish this file.", + "mappings": [ + { + "source": "it-principles/IT_Architecture_Principles_and_Implementation_Guidelines.qmd", + "base": "smnkbw05zgz5l0llz5tefwxrowqqds6c36zi1i9ssswbslnleuj28yc5c7sui5pl", + "url": "/smnkbw05zgz5l0llz5tefwxrowqqds6c36zi1i9ssswbslnleuj28yc5c7sui5pl.html" + } + ] +} \ No newline at end of file diff --git a/.github/scripts/build-docs.sh b/.github/scripts/build-docs.sh index f7f852fe..f86c31ee 100755 --- a/.github/scripts/build-docs.sh +++ b/.github/scripts/build-docs.sh @@ -1,51 +1,89 @@ #!/bin/bash set -e -echo "🖼 Render all documents into to HTML/DOCX" +echo "🔄 Copying DOCS to origin_DOCS..." +mv DOCS origin_DOCS + +echo "🔄 Updating URL mappings..." +python3 .github/scripts/update_url_mappings.py + +echo "🔄 Grouping documents by category..." +python3 .github/scripts/group_docs_by_category.py + +# Change to DOCS directory as it'll be the root of rendered content +cd DOCS +# Link assets to origin_DOCS as these files need to be served from rendered content +ln -s ../assets assets + + +echo "🖼 Render all documents to HTML" +if [[ -n "$SKIP_DOCX" ]]; then + echo " (DOCX generation will be skipped)" +else + echo " (DOCX generation will follow)" +fi sudo cp /usr/bin/chromium /usr/bin/chromium-browser -QUARTO_CHROMIUM_HEADLESS_MODE=new quarto render --to docx -find _site -type f -name 'index.docx' -delete + +# Set up quarto configuration based on environment variables +if [[ -n "$SKIP_DOCX" ]]; then + echo "📄 Using configuration for DOCX cleanup (HTML only)" + cp _quarto-no-headers.yml _quarto.yml +else + echo "📄 Using default configuration (HTML + DOCX files kept)" + # _quarto.yml is already the default with headers - no copying needed +fi + QUARTO_CHROMIUM_HEADLESS_MODE=new quarto render --to html --no-clean # Backup the correct sitemap as it may be overwritten by next operations sleep 5 mv _site/sitemap.xml _site/sitemap.xml.bkp +# Generate DOCX files (always needed for PDF conversion) +echo "📄 Generating DOCX files for PDF conversion..." +QUARTO_CHROMIUM_HEADLESS_MODE=new quarto render --to docx --no-clean +find _site -type f -name 'index.docx' -delete + echo "🛠 Generate index.qmd files for all DOCS/* folders"e -node .github/scripts/generate_index_all.mjs +node ../.github/scripts/generate_index_all.mjs echo "📄 Render only index.qmd files using 'index' profile" mv _quarto.yml _quarto_not_used.yml mv _quarto-index.yml _quarto.yml -find DOCS -type f -name index.qmd -print0 | while IFS= read -r -d '' src; do +find ./ -type f -name index.qmd -print0 | while IFS= read -r -d '' src; do echo "🔧 Rendering $src using profile=index..." - QUARTO_CHROMIUM_HEADLESS_MODE=new quarto render "$src" --profile index --to html --no-clean + QUARTO_CHROMIUM_HEADLESS_MODE=new quarto render "$src" --profile index --to html --no-clean $QUARTO_FLAGS done mv _quarto.yml _quarto-index.yml cp _quarto_not_used.yml _quarto.yml && rm _quarto_not_used.yml -echo "🔄 Additional processing of index.html file" -echo ' - -
- -If you are not redirected automatically, click here.
- -' > _site/index.html + +echo "📄 Converting .docx files to .pdf..." +timeout 3s ../.github/scripts/convert_docx_to_pdf.sh || true +timeout 10m ../.github/scripts/convert_docx_to_pdf.sh + +# Clean up DOCX files if requested (they're only needed for PDF conversion) +if [[ -n "$SKIP_DOCX" ]]; then + echo "🗑️ Cleaning up DOCX files (keeping only PDFs and HTML)..." + find _site -name "*.docx" -type f -delete + echo " ✅ DOCX files removed to save space" +else + echo "💾 Keeping DOCX files for download/access" +fi # Revert the correct sitemap cp _site/sitemap.xml.bkp _site/sitemap.xml rm -f _site/sitemap.xml.bkp -echo "📄 Converting .docx files to .pdf..." -#chmod +x ./convert_docx_to_pdf.sh -timeout 3s .github/scripts/convert_docx_to_pdf.sh || true -timeout 10m .github/scripts/convert_docx_to_pdf.sh +# Remove non-browsable links from sitemap.xml +python3 ../.github/scripts/remove_non_browsable_from_sitemap.py _site/sitemap.xml echo "🧹 Cleaning up..." -find _site -type f -name '*.docx' -delete +find _site -type f -name '*.qmd' -delete + +cp ../404.html _site/404.html +cp ../redirect_map.json _site/redirect_map.json +cp ../url_mapping.json _site/url_mapping.json + -echo "✅ Docs built successfully" +echo "✅ Docs built successfully" \ No newline at end of file diff --git a/.github/scripts/convert_docx_to_pdf.sh b/.github/scripts/convert_docx_to_pdf.sh index 8c25aa71..7279b378 100755 --- a/.github/scripts/convert_docx_to_pdf.sh +++ b/.github/scripts/convert_docx_to_pdf.sh @@ -2,7 +2,7 @@ #set -euo pipefail -RENDERED_DOCS_DIR="../../_site" +RENDERED_DOCS_DIR="../../DOCS/_site" # Set the working directory to the script's location cd "$(dirname "${BASH_SOURCE[0]}")" || exit diff --git a/.github/scripts/external_publish_before.sh b/.github/scripts/external_publish_before.sh index 3efec34c..2f4c512b 100755 --- a/.github/scripts/external_publish_before.sh +++ b/.github/scripts/external_publish_before.sh @@ -18,5 +18,6 @@ run_git "Configuring Git user name" config --global user.name "ci_docker_builder run_git "Configuring Git user email" config --global user.email "ci_docker_builder@users.noreply.github.com" run_git "Setting remote URL" remote set-url origin git@github.com:${GITHUB_REPOSITORY}.git +#run_git "Fetching origin develop branch" fetch origin develop echo "✅ Environment ready." diff --git a/.github/scripts/external_publish_main.sh b/.github/scripts/external_publish_main.sh index 6bfbc7f1..9927b6fe 100755 --- a/.github/scripts/external_publish_main.sh +++ b/.github/scripts/external_publish_main.sh @@ -3,77 +3,106 @@ set -euo pipefail source "$(dirname "$0")/helpers/git-utils.sh" - -echo "🚀 Starting validation..." PROJECT_NAME=$(echo "$PUBLISH_BRANCH" | sed -E 's/^publish-(.+)-[0-9]{8}-[0-9]{6}$/\1/') -echo "Project: $PROJECT_NAME" - -echo "🔎 Checking files inside publish branch (fast tree scan)..." -run_git "fetching develop branch" fetch origin develop PUBLISH_COMMIT=$(git rev-parse origin/${PUBLISH_BRANCH}) -echo "PUBLISH_COMMIT: $PUBLISH_COMMIT" - -git ls-tree -r --name-only "$PUBLISH_COMMIT" > changed-files.txt -sed -i '/^\.gitignore$/d; /^\.github\/workflows\/trigger\.yml$/d' changed-files.txt -cat changed-files.txt - -INVALID_FILES=$(grep -v "^DOCS/${PROJECT_NAME}/" changed-files.txt || true) -if [ -n "$INVALID_FILES" ]; then - echo "❌ Changes outside DOCS/${PROJECT_NAME} detected:" - echo "$INVALID_FILES" - exit 1 -fi - -echo "✅ Folder validation passed." - -echo "🔀 Preparing secure diff-aware merge into develop..." -git checkout develop -git pull origin develop -mkdir -p tmp_publish -git archive "$PUBLISH_COMMIT" DOCS/"$PROJECT_NAME" | tar -x -C tmp_publish - -# ✅ Remove any GitHub workflows injected into the subtree -rm -rf tmp_publish/DOCS/"${PROJECT_NAME}"/.github -rm -rf tmp_publish/DOCS/"${PROJECT_NAME}"/.gitignore - -echo "🔍 Finding modified and deleted files only..." -MODIFIED_FILES=() -while IFS= read -r file; do - if [ ! -f "$file" ] || ! cmp -s "$file" "tmp_publish/$file"; then - MODIFIED_FILES+=("$file") - fi -done < <(find tmp_publish/DOCS/"$PROJECT_NAME" -type f | sed 's|tmp_publish/||') - -DELETED_FILES=() -while IFS= read -r file; do - if [ ! -f "tmp_publish/$file" ]; then - DELETED_FILES+=("$file") - fi -done < <(find DOCS/"$PROJECT_NAME" -type f) - -if [ "${#MODIFIED_FILES[@]}" -eq 0 ] && [ "${#DELETED_FILES[@]}" -eq 0 ]; then - echo "🟡 No real changes to DOCS/$PROJECT_NAME — skipping commit." - exit 0 -fi - -echo "✅ Modified files:" -printf '%s\n' "${MODIFIED_FILES[@]}" -echo "🗑️ Deleted files:" -printf '%s\n' "${DELETED_FILES[@]}" - -for file in "${MODIFIED_FILES[@]}"; do - mkdir -p "$(dirname "$file")" - cp "tmp_publish/$file" "$file"; -done +TEMP_WORKSPACE="tmp_publish" -if [ "${#DELETED_FILES[@]}" -gt 0 ]; then - run_git "removing deleted files" rm "${DELETED_FILES[@]}" -fi -for file in "${MODIFIED_FILES[@]}"; do - run_git "adding $file to staging" add "$file" -done +echo "Project: $PROJECT_NAME" +echo "PUBLISH_COMMIT: $PUBLISH_COMMIT" -COMMIT_MSG=$(cat <| Class Coding / Attribution | +Applicable Products | +|||
|---|---|---|---|---|
| Field | +Description | +Type | +Value(s) | +Status / Change | +
| [FID] | +Dynamic Feature Identifier | +Object ID | +0 to 2.147.483.647 | +Status, Change | +
| [Shape] | +Polygon (default by ESRI) | +Geometry | +Polygon | +Status, Change | +
| [UID] | +Unique Identifier of the feature geometry | +Long, Precision 6 | +1 to 2.147.483.648 | +Status, Change | +
| [DU_ID] | +Unique Identifier of the Riparian Zones delivery unit (DU) | +String, Length 10 | +DU001A to DU043A | +Status, Change | +
| [CODE_1_18] | +Class code of Level 1 for LC/LU 2018 | +Long, Precision 9 | +1 to 10 | +Status, Change | +
| [CODE_2_18] | +Class code of Level 2 for LC/LU 2018 | +Long, Precision 9 | +11 to 100 | +Status, Change | +
| [CODE_3_18] | +Class code of Level 3 for LC/LU 2018 | +Long, Precision 9 | +111 to 1000 | +Status, Change | +
| [CODE_4_18] | +Class code of Level 4 for LC/LU 2018 | +Long, Precision 9 | +1111 to 10000 | +Status, Change | +
| [UA_18] | +Urban Atlas 2018 flag | +String, Length 10 | +UA2018[^4], UA2012, NoData | +Status, Change | +
| [NODATA_18] | +Unclassifiable area due to clouds, shadows, snow, haze, or missing data | +Long, Precision 9 | +0, 1 | +Status, Change | +
| [COMMENT_18] | +Comment field for additional information for LC/LU 2018 | +String, Length 254 | +Standardized text comments, separated by Semicolon | +Status, Change | +
| [CODE_1_12] | +Class code of Level 1 for LC/LU 2012 | +Long, Precision 9 | +1 to 10 | +Status, Change | +
| [CODE_2_12] | +Class code of Level 2 for LC/LU 2012 | +Long, Precision 9 | +11 to 100 | +Status, Change | +
| [CODE_3_12] | +Class code of Level 3 for LC/LU 2012 | +Long, Precision 9 | +111 to 1000 | +Status, Change | +
| [CODE_4_12] | +Class code of Level 4 for LC/LU 2012 | +Long, Precision 9 | +1111 to 10000 | +Status, Change | +
| [UA_12] | +Urban Atlas 2012 flag | +String, Length 10 | +UA2012, NoData | +Status, Change | +
| [NODATA_12] | +Unclassifiable area due to clouds, shadows, snow, haze, or missing data | +Long, Precision 9 | +0, 1 | +Status, Change | +
| [COMMENT_12] | +Comment field for additional information for LC/LU 2012 | +String, Length 254 | +Standardized text comments, separated by Semicolon | +Status, Change | +
| [CHANGECODE] | +Change Class code of Level 4 for LC/LU 2018 & 2012 | +String, Length 11 | +1111_1111 to 10000_10000 | +Change | +
| [AREA_HA] | +Area in hectare [ha] | +Double | +0.001 to 1.8E308 | +Status, Change | +
| Metadata: ISO TC 211 compliant according to INSPIRE metadata standards in XML format | +||||