Skip to content

Commit f7163e3

Browse files
author
H. Peter Anvin
committed
doc: allow replicated index entries (\IR), make index sorting smarter
Allow a single index entry key to be defined with \IR more than once, generating multiple entries in the index; this is really useful for example to always generate "macros, single-line" and "single-line macros" entries sorted at different places. Be smarter about sorting the index: sort (nearly) all special characters before alphanumerics, and (attempt to) sort numbers in numerical order rather than alphabetical (so BITS8 sorts before BITS16). Signed-off-by: H. Peter Anvin <[email protected]>
1 parent 7727fbb commit f7163e3

File tree

1 file changed

+77
-55
lines changed

1 file changed

+77
-55
lines changed

doc/rdsrc.pl

Lines changed: 77 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@
157157

158158
# Read the file; pass a paragraph at a time to the paragraph processor.
159159
print "Reading input...";
160-
$pname = "para000000";
160+
$pname = [];
161161
@pnames = @pflags = ();
162162
$para = undef;
163163
foreach $file (@files) {
@@ -268,9 +268,9 @@ sub include {
268268
sub got_para {
269269
local ($_) = @_;
270270
my $pflags = "", $i, $w, $l, $t;
271-
return if !/\S/;
271+
my @para = ();
272272

273-
@$pname = ();
273+
return if !/\S/;
274274

275275
# Replace metadata macros
276276
while (/^(.*)\\m\{([^\}]*)\}(.*)$/) {
@@ -294,7 +294,7 @@ sub got_para {
294294
$l =~ s/\\\{/\{/g;
295295
$l =~ s/\\\}/}/g;
296296
$l =~ s/\\\\/\\/g;
297-
push @$pname, $l;
297+
push @para, $l;
298298
}
299299
$_ = ''; # suppress word-by-word code
300300
} elsif (/^\\C/) {
@@ -389,11 +389,11 @@ sub got_para {
389389
$pflags = "norm";
390390
}
391391

392-
# The word-by-word code: unless @$pname is already defined (which it
392+
# The word-by-word code: unless @para is already defined (which it
393393
# will be in the case of a code paragraph), split the paragraph up
394-
# into words and push each on @$pname.
394+
# into words and push each on @para.
395395
#
396-
# Each thing pushed on @$pname should have a two-character type
396+
# Each thing pushed on @para should have a two-character type
397397
# code followed by the text.
398398
#
399399
# Type codes are:
@@ -416,7 +416,7 @@ sub got_para {
416416
# index-items arrays
417417
# "sp" for space
418418
while (/\S/) {
419-
s/^\s*//, push @$pname, "sp" if /^\s/;
419+
s/^\s*//, push @para, "sp" if /^\s/;
420420
$indexing = $qindex = 0;
421421
if (/^(\\[iI])?\\c/) {
422422
$qindex = 1 if $1 eq "\\I";
@@ -429,9 +429,8 @@ sub got_para {
429429
$w =~ s/\\\}/\}/g;
430430
$w =~ s/\\-/-/g;
431431
$w =~ s/\\\\/\\/g;
432-
(push @$pname,"i"),$lastp = $#$pname if $indexing;
433-
push @$pname,"c $w" if !$qindex;
434-
$$pname[$lastp] = &addidx($node, $w, "c $w") if $indexing;
432+
push(@para, addidx($node, $w, "c $w")) if ($indexing);
433+
push(@para, "c $w") if (!$qindex);
435434
} elsif (/^\\[iIe]/) {
436435
/^(\\[iI])?(\\e)?/;
437436
$emph = 0;
@@ -448,27 +447,33 @@ sub got_para {
448447
$w =~ s/\\\\/\\/g;
449448
$t = $emph ? "es" : "n ";
450449
@ientry = ();
451-
(push @$pname,"i"),$lastp = $#$pname if $indexing;
450+
@pentry = ();
452451
foreach $i (split /\s+/,$w) { # \e and \i can be multiple words
453-
push @$pname,"$t$i","sp" if !$qindex;
454-
($ii=$i) =~ tr/A-Z/a-z/, push @ientry,"n $ii","sp" if $indexing;
452+
push @pentry, "$t$i","sp";
453+
($ii=$i) =~ tr/A-Z/a-z/, push @ientry,"n $ii","sp";
455454
$t = $emph ? "e " : "n ";
456455
}
457-
$w =~ tr/A-Z/a-z/, pop @ientry if $indexing;
458-
$$pname[$lastp] = &addidx($node, $w, @ientry) if $indexing;
459-
pop @$pname if !$qindex; # remove final space
460-
if (substr($$pname[$#$pname],0,2) eq "es" && !$qindex) {
461-
substr($$pname[$#$pname],0,2) = "eo";
462-
} elsif ($emph && !$qindex) {
463-
substr($$pname[$#$pname],0,2) = "ee";
456+
if ($indexing) {
457+
$w =~ tr/A-Z/a-z/;
458+
pop @ientry; # remove final space
459+
push(@para, addidx($node, $w, @ientry));
460+
}
461+
if (!$qindex) {
462+
pop @pentry; # remove final space
463+
if (substr($pentry[-1],0,2) eq 'es') {
464+
substr($pentry[-1],0,2) = 'eo';
465+
} elsif ($emph) {
466+
substr($pentry[-1],0,2) = 'ee';
467+
}
468+
push(@para, @pentry);
464469
}
465470
} elsif (/^\\[kK]/) {
466471
$t = "k ";
467472
$t = "kK" if /^\\K/;
468473
s/^\\[kK]//;
469474
die "badly formatted \\k: \\k$_\n" if !/\{([^\}]*)\}(.*)$/;
470475
$_ = $2;
471-
push @$pname,"$t$1";
476+
push @para,"$t$1";
472477
} elsif (/^\\W/) {
473478
s/^\\W//;
474479
die "badly formatted \\W: \\W$_\n"
@@ -483,9 +488,8 @@ sub got_para {
483488
$w =~ s/\\\}/\}/g;
484489
$w =~ s/\\-/-/g;
485490
$w =~ s/\\\\/\\/g;
486-
(push @$pname,"i"),$lastp = $#$pname if $indexing;
487-
push @$pname,"$t<$l>$w";
488-
$$pname[$lastp] = &addidx($node, $w, "c $w") if $indexing;
491+
push(@para, addidx($node, $w, "c $w")) if $indexing;
492+
push(@para, "$t<$l>$w");
489493
} else {
490494
die "what the hell? $_\n" if !/^(([^\s\\\-]|\\[\\{}\-])*-?)(.*)$/;
491495
die "painful death! $_\n" if !length $1;
@@ -496,53 +500,71 @@ sub got_para {
496500
$w =~ s/\\-/-/g;
497501
$w =~ s/\\\\/\\/g;
498502
if ($w eq '--') {
499-
push @$pname, 'dm';
503+
push @para, 'dm';
500504
} elsif ($w eq '-') {
501-
push @$pname, 'da';
505+
push @para, 'da';
502506
} else {
503-
push @$pname,"n $w";
507+
push @para,"n $w";
504508
}
505509
}
506510
}
507511
if ($irewrite ne undef) {
508-
&addidx(undef, $irewrite, @$pname);
509-
@$pname = ();
512+
addidx(undef, $irewrite, @para);
510513
} else {
511-
push @pnames, $pname;
514+
push @pnames, [@para];
512515
push @pflags, $pflags;
513-
$pname++;
514516
}
515517
}
516518

517-
sub addidx {
518-
my ($node, $text, @ientry) = @_;
519+
sub addidx($$@) {
520+
my($node, $text, @ientry) = @_;
519521
$text = $idxalias{$text} || $text;
520-
if ($node eq undef || !$idxmap{$text}) {
521-
@$ientry = @ientry;
522-
$idxmap{$text} = $ientry;
523-
$ientry++;
524-
}
525-
if ($node) {
526-
$idxnodes{$node,$text} = 1;
527-
return "i $text";
522+
if (!exists($idxmap{$text})) {
523+
$idxmap{$text} = [@ientry];
524+
$idxdup{$text} = [$text];
525+
} elsif (!defined($node)) {
526+
my $dummy = sprintf('%s #%05d', $text, $#{$idxdup{$text}} + 2);
527+
$idxmap{$dummy} = [@ientry];
528+
push(@{$idxdup{$text}}, $dummy);
528529
}
530+
531+
return undef if (!defined($node));
532+
533+
return map { $idxnodes{$node,$_} = 1; "i $_" } @{$idxdup{$text}};
529534
}
530535

531536
sub indexsort {
532537
my $iitem, $ientry, $i, $piitem, $pcval, $cval, $clrcval;
533538

534539
@itags = map { # get back the original data as the 1st elt of each list
535-
$_->[0]
536-
} sort { # compare auxiliary (non-first) elements of lists
537-
$a->[1] cmp $b->[1] ||
538-
$a->[2] cmp $b->[2] ||
539-
$a->[0] cmp $b->[0]
540-
} map { # transform array into list of 3-element lists
541-
my $ientry = $idxmap{$_};
542-
my $a = substr($$ientry[0],2);
543-
$a =~ tr/A-Za-z0-9//cd;
544-
[$_, uc($a), substr($$ientry[0],0,2)]
545-
} keys %idxmap;
540+
$_->[0]
541+
} sort { # compare auxiliary (non-first) elements of lists
542+
my $d = 0;
543+
for (my $i = 1; defined($a->[$i]) || defined($b->[$i]); $i++) {
544+
$d = $a->[$i] cmp $b->[$i];
545+
last if ($d);
546+
}
547+
$d
548+
} map { # transform array into list of 3-element lists
549+
my $ientry = $idxmap{$_};
550+
my $b = lc(join(' ', map { substr($_,2) } @$ientry));
551+
$b =~ s/([][(){}]+|\B,)//g;
552+
$b =~ s/\s+/ /g;
553+
my $a = $b;
554+
$a =~ s/([[:alpha:]])/Z$1/g;
555+
# From this point on [A-Z] means an already classed character
556+
# Try to sort numbers in numerical order (e.g. 8 before 16)
557+
while ($a =~ /^(|.*?[^A-Z])(\d+)(\.\d+)?(.*)$/) {
558+
my $p = $1; my $s = $4;
559+
my $nn = ('0' x (24 - length($2))) . $2 . $3;
560+
$nn =~ s/(.)/D$1/g;
561+
$a = $p . $nn . $s;
562+
}
563+
$a =~ s/([^A-Z\s])/A$1/g;
564+
my $c = join(' ', map { substr($_,0,2) } @$ientry);
565+
my $v = [$_, $a, $b, $_, $c];
566+
$v
567+
} keys %idxmap;
546568

547569
# Having done that, check for comma-hood.
548570
$cval = 0;
@@ -594,8 +616,8 @@ sub fixup_xrefs {
594616
next if $pflags[$p] eq "code";
595617
$pname = $pnames[$p];
596618
for ($i=$#$pname; $i >= 0; $i--) {
597-
if ($$pname[$i] =~ /^k/) {
598-
$k = $$pname[$i];
619+
$k = $$pname[$i];
620+
if ($k =~ /^k/) {
599621
$caps = ($k =~ /^kK/);
600622
$k = substr($k,2);
601623
$repl = $refs{$k};

0 commit comments

Comments
 (0)