Skip to content

Commit 99c8a1b

Browse files
authored
Merge pull request #87562 from mramendi/OSDOCS-13186-4.16
OSDOCS 13186 modify build_for_portal.py to detect images - 4.16
2 parents ae55aaf + 42f612e commit 99c8a1b

File tree

1 file changed

+52
-24
lines changed

1 file changed

+52
-24
lines changed

build_for_portal.py

Lines changed: 52 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
cli.init_logging(False, True)
2525

26-
has_errors = False
26+
list_of_errors = []
2727
CLONE_DIR = "."
2828
BASE_PORTAL_URL = "https://docs.redhat.com/en/documentation/"
2929
# ID_RE = re.compile("^\[(?:\[|id=\'|#)(.*?)(\'?,.*?)?(?:\]|\')?\]", re.M | re.DOTALL)
@@ -443,6 +443,7 @@ def reformat_for_drupal(info):
443443

444444
# Reformat the data
445445
for book in books:
446+
446447
log.info("Processing %s", book["Dir"])
447448
book_src_dir = os.path.join(src_dir, book["Dir"])
448449

@@ -454,31 +455,39 @@ def reformat_for_drupal(info):
454455

455456
ensure_directory(images_dir)
456457

458+
# ADDED 21 Jan 2025: selective processing of images
459+
# the set of file names is to be stored in image_files
460+
# The initial value includes images defined in attributes (to copy every time)
461+
image_files = set()
462+
457463
log.debug("Copying source files for " + book["Name"])
458-
copy_files(book, book_src_dir, src_dir, dest_dir, info)
464+
copy_files(book, book_src_dir, src_dir, dest_dir, info, image_files)
459465

460466
log.debug("Copying images for " + book["Name"])
461-
copy_images(book, src_dir, images_dir, distro)
467+
copy_images(book, src_dir, images_dir, distro, image_files)
462468

463469

464-
def copy_images(node, src_path, dest_dir, distro):
470+
471+
def copy_images(node, src_path, dest_dir, distro, image_files):
465472
"""
466473
Copy images over to the destination directory and flatten all image directories into the one top level dir.
467-
"""
468474
469-
def dir_callback(dir_node, parent_dir, depth):
470-
node_dir = os.path.join(parent_dir, dir_node["Dir"])
471-
src = os.path.join(node_dir, "images")
472-
473-
if os.path.exists(src):
474-
src_files = os.listdir(src)
475-
for src_file in src_files:
476-
shutil.copy(os.path.join(src, src_file), dest_dir)
475+
REWORKED 21 Jan 2025: we now assume that there is a single images directory and
476+
that all other images subdirectories are simply symlinks into it. So we do not
477+
iterate over the tree but simply copy the necessary files from that one images directory
478+
"""
477479

478-
iter_tree(node, distro, dir_callback, parent_dir=src_path)
480+
images_source_dir = os.path.join(src_path, "images")
481+
for image_file_name in image_files:
482+
image_file_pathname = os.path.join(images_source_dir,image_file_name)
483+
if os.path.exists(image_file_pathname):
484+
shutil.copy(image_file_pathname, dest_dir)
485+
# if an image file is not found, this is not an error, because it might
486+
# have been picked up from a commented-out line. Actual missing images
487+
# should be caught by the asciidoctor/asciibinder part of CI
479488

480489

481-
def copy_files(node, book_src_dir, src_dir, dest_dir, info):
490+
def copy_files(node, book_src_dir, src_dir, dest_dir, info, image_files):
482491
"""
483492
Recursively copy files from the source directory to the destination directory, making sure to scrub the content, add id's where the
484493
content is referenced elsewhere and fix any links that should be cross references.
@@ -496,7 +505,7 @@ def topic_callback(topic_node, parent_dir, depth):
496505
dest_file = os.path.join(node_dest_dir, topic_node["File"] + ".adoc")
497506

498507
# Copy the file
499-
copy_file(info, book_src_dir, src_file, dest_dir, dest_file)
508+
copy_file(info, book_src_dir, src_file, dest_dir, dest_file, image_files)
500509

501510
iter_tree(node, info["distro"], dir_callback, topic_callback)
502511

@@ -507,6 +516,7 @@ def copy_file(
507516
src_file,
508517
dest_dir,
509518
dest_file,
519+
image_files,
510520
include_check=True,
511521
tag=None,
512522
cwd=None,
@@ -527,7 +537,7 @@ def copy_file(
527537
# os.mknod(dest_file)
528538
open(dest_file, "w").close()
529539
# Scrub/fix the content
530-
content = scrub_file(info, book_src_dir, src_file, tag=tag, cwd=cwd)
540+
content = scrub_file(info, book_src_dir, src_file, image_files, tag=tag, cwd=cwd)
531541

532542
# Check for any includes
533543
if include_check:
@@ -582,6 +592,7 @@ def copy_file(
582592
include_file,
583593
dest_dir,
584594
dest_include_file,
595+
image_files,
585596
tag=include_tag,
586597
cwd=current_dir,
587598
)
@@ -610,8 +621,21 @@ def copy_file(
610621
with open(dest_file, "w") as f:
611622
f.write(content)
612623

624+
def detect_images(content, image_files):
625+
"""
626+
Detects all image file names referenced in the content, which is a readlines() output
627+
Adds the filenames to the image_files set
628+
Does NOT control for false positives such as commented out content,
629+
because "false negatives" are worse
630+
631+
TEMPORARY: use both procedural and RE detection and report any misalignment
632+
"""
633+
image_pattern = re.compile(r'image::?([^\s\[]+)\[.*?\]')
613634

614-
def scrub_file(info, book_src_dir, src_file, tag=None, cwd=None):
635+
for content_str in content:
636+
image_files.update({os.path.basename(f) for f in image_pattern.findall(content_str)})
637+
638+
def scrub_file(info, book_src_dir, src_file, image_files, tag=None, cwd=None):
615639
"""
616640
Scrubs a file and returns the cleaned file contents.
617641
"""
@@ -645,7 +669,7 @@ def scrub_file(info, book_src_dir, src_file, tag=None, cwd=None):
645669
raise ConnectionError("Malformed URL")
646670
except Exception as exception:
647671
log.error("An include file wasn't found: %s", base_src_file)
648-
has_errors = True
672+
list_of_errors.append(f"An include file wasn't found: {base_src_file}")
649673
sys.exit(-1)
650674

651675
# Get a list of predefined custom title ids for the file
@@ -655,6 +679,9 @@ def scrub_file(info, book_src_dir, src_file, tag=None, cwd=None):
655679
with open(src_file, "r") as f:
656680
src_file_content = f.readlines()
657681

682+
# detect image references in the content
683+
detect_images(src_file_content, image_files)
684+
658685
# Scrub the content
659686
content = ""
660687
header_found = content_found = False
@@ -750,7 +777,6 @@ def fix_links(content, info, book_src_dir, src_file, tag=None, cwd=None):
750777
content = _fix_links(
751778
content, book_src_dir, src_file, info, tag=tag, cwd=cwd
752779
)
753-
754780
return content
755781

756782
def dir_to_book_name(dir,src_file,info):
@@ -760,11 +786,11 @@ def dir_to_book_name(dir,src_file,info):
760786
return(book["Name"])
761787
break
762788

763-
has_errors = True
764789
log.error(
765790
'ERROR (%s): book not found for the directory %s',
766791
src_file,
767792
dir)
793+
list_of_errors.append(f"ERROR ({src_file}): book not found for the directory {dir}")
768794
return(dir)
769795

770796

@@ -809,6 +835,7 @@ def _fix_links(content, book_dir, src_file, info, tag=None, cwd=None):
809835
'ERROR (%s): link pointing outside source directory? %s',
810836
src_file,
811837
link_file)
838+
list_of_errors.append(f'ERROR ({src_file}): link pointing outside source directory? {link_file}')
812839
continue
813840
split_relative_path = full_relative_path.split("/")
814841
book_dir_name = split_relative_path[0]
@@ -841,13 +868,14 @@ def _fix_links(content, book_dir, src_file, info, tag=None, cwd=None):
841868
fixed_link = link_text
842869
if EXTERNAL_LINK_RE.search(link_file) is not None:
843870
rel_src_file = src_file.replace(os.path.dirname(book_dir) + "/", "")
844-
has_errors = True
871+
link_text_message = link_text.replace("\n", "")
845872
log.error(
846873
'ERROR (%s): "%s" appears to try to reference a file not included in the "%s" distro',
847874
rel_src_file,
848-
link_text.replace("\n", ""),
875+
link_text_message,
849876
info["distro"],
850877
)
878+
list_of_errors.append(f'ERROR ({rel_src_file})): {link_text_message} appears to try to reference a file not included in the {info["distro"]} distro')
851879
else:
852880
fixed_link = "xref:" + link_anchor.replace("#", "") + link_title
853881

@@ -1195,7 +1223,7 @@ def main():
11951223
# Copy the original data and reformat for drupal
11961224
reformat_for_drupal(info)
11971225

1198-
if has_errors:
1226+
if list_of_errors:
11991227
sys.exit(1)
12001228

12011229
if args.push:

0 commit comments

Comments
 (0)