|
1 | | -# ZERO-REGEX VERSION — preserves indentation for merged comment lines |
| 1 | +# Merge Rust comment lines while preserving: |
| 2 | +# - original spacing after //, ///, //! |
| 3 | +# - empty lines |
| 4 | +# - === headings |
| 5 | +# - bullets / numbered lists |
| 6 | +# - fenced code blocks |
| 7 | +# - Markdown headings |
2 | 8 |
|
3 | 9 | function ltrim(s) { |
4 | | - while (substr(s,1,1) == " " || substr(s,1,1) == "\t") |
5 | | - s = substr(s,2) |
| 10 | + while (substr(s,1,1)==" " || substr(s,1,1)=="\t") s=substr(s,2) |
6 | 11 | return s |
7 | 12 | } |
8 | 13 |
|
9 | | -function leading_spaces(s, i, count, c) { |
10 | | - count=0 |
11 | | - for (i=1; i<=length(s); i++) { |
12 | | - c=substr(s,i,1) |
13 | | - if (c==" " || c=="\t") count++ |
14 | | - else break |
15 | | - } |
16 | | - return count |
17 | | -} |
18 | | - |
19 | | -function is_numbered_list(s, i, c) { |
| 14 | +function is_numbered_list(s, i,c) { |
20 | 15 | found_digit=0 |
21 | | - for (i=1; i<=length(s); i++) { |
| 16 | + for(i=1;i<=length(s);i++){ |
22 | 17 | c=substr(s,i,1) |
23 | | - if (c >= "0" && c <= "9") { found_digit=1; continue } |
24 | | - if (found_digit && (c=="." || c==")")) return 1 |
| 18 | + if(c>="0" && c<="9"){found_digit=1; continue} |
| 19 | + if(found_digit && (c=="."||c==")")) return 1 |
25 | 20 | return 0 |
26 | 21 | } |
27 | 22 | return 0 |
28 | 23 | } |
29 | 24 |
|
30 | 25 | { |
31 | | - line = $0 |
| 26 | + line=$0 |
32 | 27 |
|
33 | | - # ---------- Detect comment type ---------- |
34 | | - if (substr(line,1,3) == "///") |
35 | | - type = "doc" |
36 | | - else if (substr(line,1,3) == "//!") |
37 | | - type = "innerdoc" |
38 | | - else if (substr(line,1,2) == "//") |
39 | | - type = "line" |
40 | | - else |
41 | | - type = "none" |
| 28 | + # Detect comment type |
| 29 | + if(substr(line,1,3)=="///") type="doc" |
| 30 | + else if(substr(line,1,3)=="//!") type="innerdoc" |
| 31 | + else if(substr(line,1,2)=="//") type="line" |
| 32 | + else type="none" |
42 | 33 |
|
43 | | - # ---------- Handle comment lines ---------- |
44 | | - if (type != "none") { |
| 34 | + if(type!="none"){ |
| 35 | + if(type=="doc") prefix="///" |
| 36 | + else if(type=="innerdoc") prefix="//!" |
| 37 | + else prefix="//" |
45 | 38 |
|
46 | | - prefix = (type=="doc" ? "///" : type=="innerdoc" ? "//!" : "//") |
47 | | - raw = substr(line, length(prefix)+1) |
48 | | - trimmed = ltrim(raw) |
| 39 | + raw=substr(line,length(prefix)+1) |
| 40 | + # Capture the original spaces after prefix |
| 41 | + match_space="" |
| 42 | + i=1 |
| 43 | + while(i<=length(raw) && (substr(raw,i,1)==" " || substr(raw,i,1)=="\t")){ match_space = match_space substr(raw,i,1); i++ } |
| 44 | + text=substr(raw,i) |
49 | 45 |
|
50 | | - # Track indentation |
51 | | - indent = leading_spaces(raw) |
| 46 | + trimmed=ltrim(raw) |
52 | 47 |
|
53 | 48 | # ---------- Fenced code block ---------- |
54 | | - if (substr(trimmed,1,3) == "```") { |
55 | | - if (in_comment) { print out_prefix merged; in_comment=0 } |
56 | | - if (fenced==0) fenced=1; else fenced=0 |
| 49 | + if(substr(trimmed,1,3)=="```"){ |
| 50 | + if(in_comment){ print out_prefix merged; in_comment=0; out_space="" } |
| 51 | + if(fenced==0) fenced=1; else fenced=0 |
57 | 52 | print line |
58 | 53 | next |
59 | 54 | } |
60 | | - if (fenced==1) { print line; next } |
| 55 | + if(fenced==1){ print line; next } |
61 | 56 |
|
62 | | - # ---------- Empty comment line ---------- |
| 57 | + # ---------- Empty line ---------- |
63 | 58 | empty=1 |
64 | | - for (i=1; i<=length(trimmed); i++) { |
65 | | - c=substr(trimmed,i,1) |
66 | | - if (c!=" " && c!="\t") { empty=0; break } |
67 | | - } |
68 | | - if (empty) { if (in_comment) { print out_prefix merged; in_comment=0 } print line; next } |
| 59 | + for(j=1;j<=length(trimmed);j++){c=substr(trimmed,j,1); if(c!=" " && c!="\t"){empty=0; break}} |
| 60 | + if(empty){ if(in_comment){print out_prefix merged; in_comment=0; out_space=""} print line; next } |
69 | 61 |
|
70 | | - # ---------- === section line ---------- |
71 | | - if (substr(trimmed,1,3) == "===") { if (in_comment) { print out_prefix merged; in_comment=0 } print line; next } |
| 62 | + # ---------- === heading ---------- |
| 63 | + if(substr(trimmed,1,3)=="==="){ if(in_comment){print out_prefix merged; in_comment=0; out_space=""} print line; next } |
72 | 64 |
|
73 | 65 | # ---------- Markdown heading ---------- |
74 | | - if (substr(trimmed,1,1) == "#") { if (in_comment) { print out_prefix merged; in_comment=0 } print line; next } |
| 66 | + if(substr(trimmed,1,1)=="#"){ if(in_comment){print out_prefix merged; in_comment=0; out_space=""} print line; next } |
75 | 67 |
|
76 | 68 | # ---------- Bullet or numbered list ---------- |
77 | | - first = substr(trimmed,1,1) |
78 | | - if (first=="*" || first=="-" || is_numbered_list(trimmed)) { if (in_comment) { print out_prefix merged; in_comment=0 } print line; next } |
| 69 | + first=substr(trimmed,1,1) |
| 70 | + if(first=="*" || first=="-" || is_numbered_list(trimmed)){ if(in_comment){print out_prefix merged; in_comment=0; out_space=""} print line; next } |
79 | 71 |
|
80 | 72 | # ---------- Mergeable line ---------- |
81 | | - if (in_comment && type==last_type) { |
82 | | - merged = merged " " ltrim(raw) # preserve relative spacing inside line |
83 | | - if (indent < out_indent) out_indent = indent |
| 73 | + if(in_comment && type==last_type){ |
| 74 | + merged=merged " " text |
84 | 75 | } else { |
85 | | - if (in_comment) print out_prefix merged |
| 76 | + if(in_comment) print out_prefix merged |
86 | 77 | in_comment=1 |
87 | 78 | last_type=type |
88 | | - merged = ltrim(raw) |
89 | | - out_prefix = prefix |
90 | | - out_indent = indent |
| 79 | + merged=text |
| 80 | + out_prefix = prefix match_space |
91 | 81 | } |
92 | 82 |
|
93 | 83 | next |
94 | 84 | } |
95 | 85 |
|
96 | | - # ---------- Non-comment line ---------- |
97 | | - if (in_comment) { |
98 | | - # Apply preserved indentation |
99 | | - printf "%s%*s%s\n", out_prefix, out_indent, "", merged |
100 | | - in_comment=0 |
101 | | - } |
| 86 | + # Non-comment line |
| 87 | + if(in_comment){ print out_prefix merged; in_comment=0; out_space="" } |
102 | 88 | print line |
103 | 89 | } |
104 | 90 |
|
105 | | -END { |
106 | | - if (in_comment) printf "%s%*s%s\n", out_prefix, out_indent, "", merged |
107 | | -} |
| 91 | +END { if(in_comment) print out_prefix merged } |
108 | 92 |
|
0 commit comments