Skip to content

Commit f928781

Browse files
committed
Maintenance: Completed adaption of lapidify PDF tool
- appendices are correctly labeled (in table of contents and in the main text) - added new page commands before level one appendix headings - minimized the appendix patch structure by enhancing the logic in the post-processing Signed-off-by: Stefan Hagen <stefan@hagen.link>
1 parent 7fad1d7 commit f928781

File tree

1 file changed

+88
-56
lines changed

1 file changed

+88
-56
lines changed

csaf_2.1/prose/edit/bin/lapidify.py

Lines changed: 88 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -123,20 +123,24 @@
123123
META_TOC_TYPE = dict[str, dict[str, Union[bool, str, list[dict[str, str]]]]]
124124

125125
APPENDIX_HEAD_REMAP = {
126-
'# Acknowledgments': {'replace': ['# ', '# Appendix A. '], 'attrs': '{.unnumbered #acknowledgments}'},
127-
'# Revision History': {'replace': ['# ', '# Appendix B. '], 'attrs': '{.unnumbered #revision-history}'},
128-
'# Guidance on the Size of CSAF Documents': {
129-
'replace': ['# ', '# Appendix C. '],
126+
'# Appendix A. Acknowledgments': {'prepend': [r'\newpage', ''], 'attrs': '{.unnumbered #acknowledgments}'},
127+
'# Appendix B. Revision History': {'prepend': [r'\newpage', ''], 'attrs': '{.unnumbered #revision-history}'},
128+
'# Appendix C. Guidance on the Size of CSAF Documents': {
129+
'prepend': [r'\newpage', ''],
130130
'attrs': '{.unnumbered #guidance-on-the-size-of-csaf-documents}',
131131
},
132-
'## File Size': {'replace': ['## ', '## C.1 '], 'attrs': '{.unnumbered #file-size}'},
133-
'## Array Length': {'replace': ['## ', '## C.2 '], 'attrs': '{.unnumbered #array-length}'},
134-
'## String Length': {'replace': ['## ', '## C.3 '], 'attrs': '{.unnumbered #string-length}'},
135-
'## Date': {'replace': ['## ', '## C.4 '], 'attrs': '{.unnumbered #date}'},
136-
'## Enum': {'replace': ['## ', '## C.5 '], 'attrs': '{.unnumbered #enum}'},
137-
'## URI Length': {'replace': ['## ', '## C.6 '], 'attrs': '{.unnumbered #uri-length}'},
138-
'## UUID Length': {'replace': ['## ', '## C.7 '], 'attrs': '{.unnumbered #uuid-length}'},
139-
'# Collapsing Product Paths': {'replace': ['# ', '# Appendix D. '], 'attrs': '{.unnumbered #collapsing-product-paths}'},
132+
'## C.1 File Size': {'attrs': '{.unnumbered #file-size}'},
133+
'## C.2 Array Length': {'attrs': '{.unnumbered #array-length}'},
134+
'## C.3 String Length': {'attrs': '{.unnumbered #string-length}'},
135+
'## C.4 Date': {'attrs': '{.unnumbered #date}'},
136+
'## C.5 Enum': {'attrs': '{.unnumbered #enum}'},
137+
'## C.6 URI Length': {'attrs': '{.unnumbered #uri-length}'},
138+
'## C.7 UUID Length': {'attrs': '{.unnumbered #uuid-length}'},
139+
'# Appendix D. Collapsing Product Paths{#collapsing-product-paths}{#collapsing-product-paths}': {
140+
'prepend': [r'\newpage', ''],
141+
'replace': ['{#collapsing-product-paths}', ''],
142+
'attrs': '{.unnumbered #collapsing-product-paths}'
143+
},
140144
}
141145

142146

@@ -501,6 +505,12 @@ def main(args: list[str]) -> int:
501505
current_cs = None
502506
cs_of_slot: list[Union[str, None]] = [None for _ in lines]
503507
in_fenced_block = False
508+
509+
db = []
510+
is_appendix = False
511+
root: int = 0
512+
appr = ''
513+
504514
for slot, line in enumerate(lines):
505515
if line.startswith(FENCED_BLOCK_FLIP_FLOP):
506516
in_fenced_block = not in_fenced_block
@@ -510,49 +520,59 @@ def main(args: list[str]) -> int:
510520

511521
if meta_hooks.get(slot) is not None:
512522
meta_hook = meta_hooks[slot]
513-
is_plain = True # No special meta data needed
514523
if line.startswith(CLEAN_MD_START):
515524
clean_headings = True
516525
cs_of_slot[slot] = current_cs
517526
for tag in sec_cnt:
518527
if line.startswith(tag) and clean_headings and not in_fenced_block:
519528
# manage counter
520529
if not meta_hook:
521-
# auto counters
522-
is_plain = True
523-
nxt_lvl = sec_lvl[tag]
524-
sec_cnt[tag] += 1
525-
if nxt_lvl < cur_lvl:
526-
for level in range(nxt_lvl + 1, lvl_sup):
527-
sec_cnt[lvl_sec[level]] = 0
528-
sec_cnt_disp_vec = []
529-
for s_tag, cnt in sec_cnt.items():
530-
if cnt == 0:
531-
raise RuntimeError(f'counting is hard: {sec_cnt} at {tag} for {slot}:{line.rstrip(NL)}')
532-
sec_cnt_disp_vec.append(str(cnt))
533-
if s_tag == tag:
534-
break
535-
sec_cnt_disp = FULL_STOP.join(sec_cnt_disp_vec)
536-
# Hack to amend first level numeric section counter displays with a full stop - do not ask ...
537-
if FULL_STOP not in sec_cnt_disp:
538-
sec_cnt_disp += FULL_STOP
530+
display = ''
531+
level = len(line.split(SPACE, 1)[0])
532+
if level == 1:
533+
root += 1
534+
text_plus = line[level + 1:].rstrip()
535+
if text_plus.startswith('Appendix '):
536+
appr = text_plus.replace('Appendix ', '')[0]
537+
display = f'Appendix {appr}.'
538+
text_plus = text_plus.replace(f'{display} ', '')
539+
is_appendix = True
540+
else:
541+
match = APPENDIX_INNER_PATTERN.match(text_plus)
542+
if match:
543+
found = match.groupdict()
544+
display = found['display']
545+
text_plus = text_plus.replace(f'{display} ', '')
546+
if TOK_LAB in text_plus:
547+
text, slug = text_plus.rstrip(SPACE).rstrip('}').split(TOK_LAB, 1)
548+
else:
549+
text = text_plus.rstrip(SPACE)
550+
slug = slugify(text)
551+
if not is_appendix:
552+
a_root = str(root)
553+
else:
554+
a_root = appr
555+
556+
if not is_appendix:
557+
tag = f'{HASH * level} '
558+
nxt_lvl = sec_lvl[tag]
559+
sec_cnt[tag] += 1
560+
if nxt_lvl < cur_lvl:
561+
for lvl in range(nxt_lvl + 1, lvl_sup):
562+
sec_cnt[lvl_sec[lvl]] = 0
563+
sec_cnt_disp_vec = []
564+
for s_tag, cnt in sec_cnt.items():
565+
if cnt == 0:
566+
raise RuntimeError(f'ERROR: Counting is hard: {sec_cnt} at {tag} for {text}')
567+
sec_cnt_disp_vec.append(str(cnt))
568+
if s_tag == tag:
569+
break
570+
sec_cnt_disp = FULL_STOP.join(sec_cnt_disp_vec)
571+
display = sec_cnt_disp.rstrip(DOT)
572+
db.append([is_appendix, a_root, level, display, text, slug])
539573
else:
540-
# pull in counters from meta
541-
is_plain = False
542-
app_lvl = 1 # belt and braces ...
543-
text = line.split(tag, 1)[1].rstrip()
544-
if TOK_LAB in text:
545-
# special label
546-
label = text.split(TOK_LAB, 1)[1].rstrip(CB_END)
547-
text = text.split(TOK_LAB, 1)[0]
548-
if text == meta_hook[TOC][LABEL]:
549-
sec_cnt_disp = meta_hook[TOC][ENUMERATE] # type: ignore
550-
app_lvl = 1
551-
elif meta_hook[TOC].get(CHILDREN):
552-
for cand in meta_hook[TOC][CHILDREN]: # type: ignore
553-
if text == cand[LABEL]: # type: ignore
554-
sec_cnt_disp = cand[ENUMERATE] # type: ignore
555-
app_lvl = 2
574+
print('WARNING: deprecated out-of-band appendix handling triggered in manage-counter')
575+
return 1
556576

557577
# manage label
558578
text = line.split(tag, 1)[1].rstrip()
@@ -565,9 +585,10 @@ def main(args: list[str]) -> int:
565585
# reduced_text = text.split(TOK_LAB, 1)[0]
566586
else:
567587
label = slugify(text)
568-
clean_sec_cnt_disp = (f'{sec_cnt_disp}' if is_plain else sec_cnt_disp).rstrip(FULL_STOP)
588+
clean_sec_cnt_disp = (f'{sec_cnt_disp}' if is_appendix else sec_cnt_disp).rstrip(FULL_STOP)
569589
SEC_LABEL_TEXT[label] = clean_sec_cnt_disp
570590
SECTION_DISPLAY_TO_LABEL[clean_sec_cnt_disp] = label
591+
# line = tag + text + ' ' + TOK_SEC.replace('$thing$', label)
571592
# MAYBE_NO_HTML_A_FOR_HEADING #
572593
line = tag + text + link_attributes # + ' ' + TOK_SEC.replace('$thing$', label)
573594
# MAYBE_FIND_THE_APPENDIX_UNDO_BUG_WILL_YOU_?
@@ -576,20 +597,26 @@ def main(args: list[str]) -> int:
576597
terse_line = line.rstrip()
577598
if terse_line in APPENDIX_HEAD_REMAP:
578599
transform = APPENDIX_HEAD_REMAP[terse_line]
579-
this, that = transform['replace']
580-
line = terse_line.replace(this, that) + transform['attrs'] + NL # type: ignore
600+
if 'prepend' in transform:
601+
lines[slot - 1] = lines[slot - 1] + NL + NL.join(transform['prepend'])
602+
if 'replace' in transform:
603+
this, that = transform['replace']
604+
terse_line = terse_line.replace(this, that)
605+
line = terse_line + transform['attrs'] + NL # type: ignore
581606

582607
# MAYBE_NO_SECTION_NUMBERS_AS_PART_OF_HEADING # line = line.replace(tag, f'{tag}{sec_cnt_disp} ', 1) + NL
583608
cur_lvl = nxt_lvl
584-
if not did_appendix_sep and meta_hook and slot < first_meta_slot: # type: ignore
609+
if not did_appendix_sep and not is_appendix: # meta_hook and slot < first_meta_slot: # type: ignore
585610
tic_toc.append(TOC_VERTICAL_SPACER)
586611
did_appendix_sep = True
587-
toc_template = TOC_TEMPLATE[cur_lvl if not meta_hook else app_lvl]
612+
toc_template = TOC_TEMPLATE[cur_lvl if not is_appendix else level] # meta_hook else app_lvl]
588613
extended = 0
589-
if sec_cnt_disp.upper().isupper():
614+
if is_appendix: # sec_cnt_disp.upper().isupper(): # at least one lettersec_cnt_disp.upper().isupper():
590615
extended = 2 if set(sec_cnt_disp).intersection('0123456789') else 1
616+
DEBUG and print(f'DEBUG: appendixer-main at {slot=} and {extended=} on line {line.rstrip()}')
591617
if extended == 2:
592618
extended = sec_cnt_disp.count(DOT) + 1
619+
DEBUG and print(f'DEBUG: - appendixer-indent at {slot=} and {extended=} with {sec_cnt_disp}')
593620
if '{#' in text and label in text:
594621
debug and print(f'{slot=}: Fixed ToC for {line=}')
595622
debug and print(
@@ -617,6 +644,13 @@ def main(args: list[str]) -> int:
617644
if line.startswith(tag) and not clean_headings:
618645
lines[slot] = line.rstrip() + SEC_NO_TOC_POSTFIX + NL
619646

647+
if DEBUG:
648+
for is_appendix, a_root, level, display, text, slug in db: # type: ignore
649+
print(
650+
f'{" " if not is_appendix else "APPENDIX"} | {a_root} |'
651+
f' {(HASH * level).rjust(7)} "{text}" <-- {slug}'
652+
)
653+
620654
# Process the text display of citation refs
621655
for slot, line in enumerate(lines):
622656
completed = insert_any_citation(line)
@@ -773,9 +807,7 @@ def main(args: list[str]) -> int:
773807
if DUMP_LUT:
774808
with SECTION_DISPLAY_TO_LABEL_AT.open('wt', encoding=ENCODING, errors=ENC_ERRS) as handle:
775809
json.dump(SECTION_DISPLAY_TO_LABEL, handle, indent=2)
776-
section_label_to_display = {
777-
label: disp for label, disp in sorted((label, disp) for disp, label in SECTION_DISPLAY_TO_LABEL.items())
778-
}
810+
section_label_to_display = dict(sorted(((label, disp) for (disp, label) in SECTION_DISPLAY_TO_LABEL.items())))
779811
with SECTION_LABEL_TO_DISPLAY_AT.open('wt', encoding=ENCODING, errors=ENC_ERRS) as handle:
780812
json.dump(section_label_to_display, handle, indent=2)
781813

0 commit comments

Comments
 (0)