23
23
24
24
cli .init_logging (False , True )
25
25
26
- has_errors = False
26
+ list_of_errors = []
27
27
CLONE_DIR = "."
28
28
BASE_PORTAL_URL = "https://docs.redhat.com/en/documentation/"
29
29
# ID_RE = re.compile("^\[(?:\[|id=\'|#)(.*?)(\'?,.*?)?(?:\]|\')?\]", re.M | re.DOTALL)
@@ -443,6 +443,7 @@ def reformat_for_drupal(info):
443
443
444
444
# Reformat the data
445
445
for book in books :
446
+
446
447
log .info ("Processing %s" , book ["Dir" ])
447
448
book_src_dir = os .path .join (src_dir , book ["Dir" ])
448
449
@@ -454,31 +455,39 @@ def reformat_for_drupal(info):
454
455
455
456
ensure_directory (images_dir )
456
457
458
+ # ADDED 21 Jan 2025: selective processing of images
459
+ # the set of file names is to be stored in image_files
460
+ # The initial value includes images defined in attributes (to copy every time)
461
+ image_files = set ()
462
+
457
463
log .debug ("Copying source files for " + book ["Name" ])
458
- copy_files (book , book_src_dir , src_dir , dest_dir , info )
464
+ copy_files (book , book_src_dir , src_dir , dest_dir , info , image_files )
459
465
460
466
log .debug ("Copying images for " + book ["Name" ])
461
- copy_images (book , src_dir , images_dir , distro )
467
+ copy_images (book , src_dir , images_dir , distro , image_files )
462
468
463
469
464
- def copy_images (node , src_path , dest_dir , distro ):
470
+
471
+ def copy_images (node , src_path , dest_dir , distro , image_files ):
465
472
"""
466
473
Copy images over to the destination directory and flatten all image directories into the one top level dir.
467
- """
468
474
469
- def dir_callback (dir_node , parent_dir , depth ):
470
- node_dir = os .path .join (parent_dir , dir_node ["Dir" ])
471
- src = os .path .join (node_dir , "images" )
472
-
473
- if os .path .exists (src ):
474
- src_files = os .listdir (src )
475
- for src_file in src_files :
476
- shutil .copy (os .path .join (src , src_file ), dest_dir )
475
+ REWORKED 21 Jan 2025: we now assume that there is a single images directory and
476
+ that all other images subdirectories are simply symlinks into it. So we do not
477
+ iterate over the tree but simply copy the necessary files from that one images directory
478
+ """
477
479
478
- iter_tree (node , distro , dir_callback , parent_dir = src_path )
480
+ images_source_dir = os .path .join (src_path , "images" )
481
+ for image_file_name in image_files :
482
+ image_file_pathname = os .path .join (images_source_dir ,image_file_name )
483
+ if os .path .exists (image_file_pathname ):
484
+ shutil .copy (image_file_pathname , dest_dir )
485
+ # if an image file is not found, this is not an error, because it might
486
+ # have been picked up from a commented-out line. Actual missing images
487
+ # should be caught by the asciidoctor/asciibinder part of CI
479
488
480
489
481
- def copy_files (node , book_src_dir , src_dir , dest_dir , info ):
490
+ def copy_files (node , book_src_dir , src_dir , dest_dir , info , image_files ):
482
491
"""
483
492
Recursively copy files from the source directory to the destination directory, making sure to scrub the content, add id's where the
484
493
content is referenced elsewhere and fix any links that should be cross references.
@@ -496,7 +505,7 @@ def topic_callback(topic_node, parent_dir, depth):
496
505
dest_file = os .path .join (node_dest_dir , topic_node ["File" ] + ".adoc" )
497
506
498
507
# Copy the file
499
- copy_file (info , book_src_dir , src_file , dest_dir , dest_file )
508
+ copy_file (info , book_src_dir , src_file , dest_dir , dest_file , image_files )
500
509
501
510
iter_tree (node , info ["distro" ], dir_callback , topic_callback )
502
511
@@ -507,6 +516,7 @@ def copy_file(
507
516
src_file ,
508
517
dest_dir ,
509
518
dest_file ,
519
+ image_files ,
510
520
include_check = True ,
511
521
tag = None ,
512
522
cwd = None ,
@@ -527,7 +537,7 @@ def copy_file(
527
537
# os.mknod(dest_file)
528
538
open (dest_file , "w" ).close ()
529
539
# Scrub/fix the content
530
- content = scrub_file (info , book_src_dir , src_file , tag = tag , cwd = cwd )
540
+ content = scrub_file (info , book_src_dir , src_file , image_files , tag = tag , cwd = cwd )
531
541
532
542
# Check for any includes
533
543
if include_check :
@@ -582,6 +592,7 @@ def copy_file(
582
592
include_file ,
583
593
dest_dir ,
584
594
dest_include_file ,
595
+ image_files ,
585
596
tag = include_tag ,
586
597
cwd = current_dir ,
587
598
)
@@ -610,8 +621,21 @@ def copy_file(
610
621
with open (dest_file , "w" ) as f :
611
622
f .write (content )
612
623
624
+ def detect_images (content , image_files ):
625
+ """
626
+ Detects all image file names referenced in the content, which is a readlines() output
627
+ Adds the filenames to the image_files set
628
+ Does NOT control for false positives such as commented out content,
629
+ because "false negatives" are worse
630
+
631
+ TEMPORARY: use both procedural and RE detection and report any misalignment
632
+ """
633
+ image_pattern = re .compile (r'image::?([^\s\[]+)\[.*?\]' )
613
634
614
- def scrub_file (info , book_src_dir , src_file , tag = None , cwd = None ):
635
+ for content_str in content :
636
+ image_files .update ({os .path .basename (f ) for f in image_pattern .findall (content_str )})
637
+
638
+ def scrub_file (info , book_src_dir , src_file , image_files , tag = None , cwd = None ):
615
639
"""
616
640
Scrubs a file and returns the cleaned file contents.
617
641
"""
@@ -645,7 +669,7 @@ def scrub_file(info, book_src_dir, src_file, tag=None, cwd=None):
645
669
raise ConnectionError ("Malformed URL" )
646
670
except Exception as exception :
647
671
log .error ("An include file wasn't found: %s" , base_src_file )
648
- has_errors = True
672
+ list_of_errors . append ( f"An include file wasn't found: { base_src_file } " )
649
673
sys .exit (- 1 )
650
674
651
675
# Get a list of predefined custom title ids for the file
@@ -655,6 +679,9 @@ def scrub_file(info, book_src_dir, src_file, tag=None, cwd=None):
655
679
with open (src_file , "r" ) as f :
656
680
src_file_content = f .readlines ()
657
681
682
+ # detect image references in the content
683
+ detect_images (src_file_content , image_files )
684
+
658
685
# Scrub the content
659
686
content = ""
660
687
header_found = content_found = False
@@ -750,7 +777,6 @@ def fix_links(content, info, book_src_dir, src_file, tag=None, cwd=None):
750
777
content = _fix_links (
751
778
content , book_src_dir , src_file , info , tag = tag , cwd = cwd
752
779
)
753
-
754
780
return content
755
781
756
782
def dir_to_book_name (dir ,src_file ,info ):
@@ -760,11 +786,11 @@ def dir_to_book_name(dir,src_file,info):
760
786
return (book ["Name" ])
761
787
break
762
788
763
- has_errors = True
764
789
log .error (
765
790
'ERROR (%s): book not found for the directory %s' ,
766
791
src_file ,
767
792
dir )
793
+ list_of_errors .append (f"ERROR ({ src_file } ): book not found for the directory { dir } " )
768
794
return (dir )
769
795
770
796
@@ -809,6 +835,7 @@ def _fix_links(content, book_dir, src_file, info, tag=None, cwd=None):
809
835
'ERROR (%s): link pointing outside source directory? %s' ,
810
836
src_file ,
811
837
link_file )
838
+ list_of_errors .append (f'ERROR ({ src_file } ): link pointing outside source directory? { link_file } ' )
812
839
continue
813
840
split_relative_path = full_relative_path .split ("/" )
814
841
book_dir_name = split_relative_path [0 ]
@@ -841,13 +868,14 @@ def _fix_links(content, book_dir, src_file, info, tag=None, cwd=None):
841
868
fixed_link = link_text
842
869
if EXTERNAL_LINK_RE .search (link_file ) is not None :
843
870
rel_src_file = src_file .replace (os .path .dirname (book_dir ) + "/" , "" )
844
- has_errors = True
871
+ link_text_message = link_text . replace ( " \n " , "" )
845
872
log .error (
846
873
'ERROR (%s): "%s" appears to try to reference a file not included in the "%s" distro' ,
847
874
rel_src_file ,
848
- link_text . replace ( " \n " , "" ) ,
875
+ link_text_message ,
849
876
info ["distro" ],
850
877
)
878
+ list_of_errors .append (f'ERROR ({ rel_src_file } )): { link_text_message } appears to try to reference a file not included in the { info ["distro" ]} distro' )
851
879
else :
852
880
fixed_link = "xref:" + link_anchor .replace ("#" , "" ) + link_title
853
881
@@ -1195,7 +1223,7 @@ def main():
1195
1223
# Copy the original data and reformat for drupal
1196
1224
reformat_for_drupal (info )
1197
1225
1198
- if has_errors :
1226
+ if list_of_errors :
1199
1227
sys .exit (1 )
1200
1228
1201
1229
if args .push :
0 commit comments