Skip to content

Commit 44a1eae

Browse files
committed
Parse the POD from macro files .
1 parent f70c065 commit 44a1eae

4 files changed

Lines changed: 72 additions & 59 deletions

File tree

bin/dev_scripts/build-search-db.pl

Lines changed: 65 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,10 @@ =head1 SYNOPSIS
2525
2626
=head1 DESCRIPTION
2727
28-
Read through all of the files in $PG_ROOT/tutorial/samples-problems and the POD in the macro files.
28+
This script parses all of the files in $PG_ROOT/tutorial/samples-problems and the POD in the macro files.
2929
The result is a JSON file containing information about every file to be searched for in the sample-problems
30-
space.
30+
space. The purpose of creating this file is to be used on the Sample Problems page (linked from
31+
the PG Editor) to search through macros and samples problems.
3132
3233
=cut
3334

@@ -39,34 +40,35 @@ =head1 DESCRIPTION
3940
use Getopt::Long qw(:config bundling);
4041
use File::Find;
4142
use Mojo::JSON qw(encode_json);
42-
use Mojo::File qw(path curfile);
43+
use Mojo::File qw(curfile);
4344
use Pod::Simple::SimpleTree;
4445

4546
my $build = "all";
4647
my $pg_root = $ENV{PG_ROOT};
48+
4749
# These are the default sample problem directory and JSON file.
48-
my $dir = "tutorial/sample-problems";
49-
my $json_file = "htdocs/DATA/search.json";
50-
my $verbose = 0;
50+
my $sample_prob_dir = "tutorial/sample-problems";
51+
my $json_file = "htdocs/DATA/search.json";
52+
my $verbose = 0;
5153

5254
GetOptions(
5355
'p|pg-root=s' => \$pg_root,
5456
'f|json-file=s' => \$json_file,
55-
's|sample-prob-dir=s' => \$dir,
57+
's|sample-prob-dir=s' => \$sample_prob_dir,
5658
'b|build=s' => \$build,
5759
'v|verbose+' => \$verbose
5860
);
5961

6062
die "The build options must be one of (all, macros, samples). The value $build is not valid."
61-
if ((grep { $_ eq $build } qw/all macros samples/) == 0);
63+
if ((grep { $_ eq $build } qw/all macros samples/) != 1);
6264

6365
my $ww_root = $ENV{WW_ROOT};
6466
$ww_root = Mojo::File->new(curfile->dirname, "..", "..")->realpath unless defined($ww_root);
6567

6668
die "ww_root: $ww_root is not a directory" unless -d $ww_root;
6769

68-
$dir = "$pg_root/$dir";
69-
$json_file = path("$ww_root/$json_file");
70+
$sample_prob_dir = "$pg_root/$sample_prob_dir";
71+
$json_file = Mojo::File->new("$ww_root/$json_file");
7072

7173
my $json_dir = $json_file->dirname;
7274
$json_dir->make_path unless -d $json_dir;
@@ -76,29 +78,30 @@ =head1 DESCRIPTION
7678
say " pg-root: $pg_root";
7779
say " ww-root: $ww_root";
7880
say " build: $build";
79-
say " dir: $dir";
81+
say " dir: $sample_prob_dir";
8082
say " json_file: $json_file";
8183
}
8284

83-
#
84-
my $stop_words = {};
85-
8685
# Load the Stop Words File
8786
open(my $FH, '<:encoding(UTF-8)', "$ww_root/bin/dev_scripts/stop-words-en.txt") or do {
8887
warn qq{Could not open file "$ww_root/bin/dev_scripts/stop-words-en.txt": $!};
8988
};
90-
my @stop_words = <$FH>;
91-
chomp for (@stop_words);
89+
my @stop_words;
90+
for my $line (<$FH>) {
91+
chomp $line;
92+
next if $line =~ /^#/; # skip all lines starting with a #
93+
next if $line eq '';
94+
push(@stop_words, $line);
95+
}
96+
close $FH;
9297

9398
# Store all of search info for each file and store as an array of hashrefs.
94-
my @search_terms;
95-
99+
my @files;
96100
my $index = 1; # set an index for each file.
97101

98102
sub processFile {
99103
return unless $_ =~ /\.pg$/;
100104
say "Processing $_" if $verbose;
101-
102105
my $filename = $_;
103106

104107
open(my $FH, '<:encoding(UTF-8)', $File::Find::name) or do {
@@ -150,7 +153,7 @@ sub processFile {
150153
}
151154
}
152155
push(
153-
@search_terms,
156+
@files,
154157
{
155158
filename => $filename,
156159
type => 'sample problem',
@@ -171,35 +174,32 @@ sub processLine {
171174

172175
my @words = ();
173176
for my $word (@split_line) {
174-
175-
# The following lines pull out some formating.
176-
$word =~ s/(PODLINK|PROBLINK)\('([\w.]+)'\)/$2/;
177-
$word =~ s/`(.*)`/$1/;
178-
$word =~ s/[.!,]$//;
179-
$word =~ s/[()\*\\\+\{\}]//g;
177+
$word =~ s/(PODLINK|PROBLINK)\('([\w.]+)'\)/$2/; # pull related macros and problems
178+
$word =~ s/`(.*)`/$1/; # remove ``
179+
$word =~ s/[.!,]$//; # remove punctuation
180+
$word =~ s/[()\*\\\+\{\}]//g; # remove other characters.
180181
$word = lc($word);
181182
next if $word =~ /\[|\]|\d|=/;
182183

183184
my @result = grep {/^${word}$/} @stop_words;
184185
push(@words, $word) unless @result;
185-
186186
}
187187
return @words;
188188
}
189189

190190
# Extract the text for a section from the given POD (preparsed) with a section header title
191191
sub extractPODNode {
192-
my ($root, $title) = @_;
192+
my ($filename, $root, $title) = @_;
193193
my @index = grep { ref($root->[$_]) eq 'ARRAY' && $root->[$_][2] =~ /$title/ } 0 .. scalar(@$root) - 1;
194194
if (@index == 0) {
195-
warn "The section named $title is not found in the POD.";
195+
warn "In $filename: The section named $title is not found in the POD.";
196196
return;
197197
}
198198
if (@index > 1) {
199-
warn "There were more than one section named $title in the POD.";
199+
warn "In $filename: There are more than one section named $title in the POD.";
200200
return;
201201
}
202-
# start at the index 2 and extract all text
202+
# start at index 2 and extract all text
203203
my $node = $root->[ $index[0] + 1 ];
204204
my $i = 2;
205205
my $str = "";
@@ -208,41 +208,49 @@ sub extractPODNode {
208208
$i++;
209209
} while ($i < scalar(@$node));
210210

211-
return $str;
211+
my @line = split(/\s*-+\s*/, $str);
212+
return (
213+
name => $line[0],
214+
description => $line[1],
215+
);
212216
}
213217

214-
sub processPODFile {
215-
my ($filename) = @_;
216-
my $parser = Pod::Simple::SimpleTree->new();
217-
my $root = $parser->parse_file("$filename")->root;
218+
# Parse the =head2 POD to extract names of methods.
219+
sub parseHead2 {
220+
my ($root) = @_;
221+
my @head2terms = grep { ref($_) eq 'ARRAY' && $_->[0] =~ /head2/ } @$root;
222+
return [ map { $_->[2] } @head2terms ];
223+
}
218224

219-
return {
220-
type => "macro",
221-
name => extractPODNode($root, "NAME") // '',
222-
description => [ processLine(extractPODNode($root, "DESCRIPTION") // '') ]
225+
# process a macro file's POD
226+
sub processMacro {
227+
return unless $_ =~ /\.pl$/;
228+
say "Processing $_" if $verbose;
229+
my $file = Mojo::File->new($File::Find::name);
230+
return if $file->dirname =~ /deprecated/;
231+
232+
my $parser = Pod::Simple::SimpleTree->new();
233+
my $root = $parser->parse_file("$file")->root;
234+
my %description = extractPODNode($file->basename, $root, "NAME");
235+
236+
my $macro_file = {
237+
type => 'macro',
238+
methods => parseHead2($root),
239+
filename => $file->basename,
240+
id => $index++,
241+
dir => $file->dirname->to_rel("$pg_root")->to_string,
242+
%description
223243
};
244+
push(@files, $macro_file);
224245
}
225246

226-
# Process the sample problems in $dir.
227-
228-
find({ wanted => \&processFile }, "$dir") if (grep { $build eq $_ } qw/all samples/);
247+
# Process the sample problems in $sample_prob_dir.
248+
find({ wanted => \&processFile }, "$sample_prob_dir") if (grep { $build eq $_ } qw/all samples/);
229249

230250
# Process the POD within the macros dir.
251+
find({ wanted => \&processMacro }, "$pg_root/macros") if (grep { $build eq $_ } qw/all macros/);
231252

232-
if (grep { $build eq $_ } qw/all macros/) {
233-
my $macro_dir = Mojo::File->new("$pg_root/macros/math");
234-
my $macros = $macro_dir->list->each(sub {
235-
say "processing " . $_->basename if $verbose;
236-
my $pod_file = processPODFile($_);
237-
$pod_file->{filename} = $_->basename;
238-
$pod_file->{id} = $index++;
239-
$pod_file->{dir} = $_->dirname->to_rel("$pg_root")->to_string;
240-
push(@search_terms, $pod_file);
241-
});
242-
}
243-
244-
my $json = encode_json \@search_terms;
253+
my $json = encode_json \@files;
245254

246255
say "Writing document info to $json_file" if $verbose;
247256
$json_file->spew($json);
248-

bin/dev_scripts/stop-words-en.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# This file contains stop words from https://github.com/Alir3z4/stop-words
2+
# These words will be ignored in the processing of macros and sample problems used by the script
3+
# bin/dev_scripts/build-search-db.pl
4+
15
'll
26
'tis
37
'twas

htdocs/DATA/search.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

htdocs/js/SampleProblemViewer/search.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
};
3434
}
3535

36-
const miniSearch = new MiniSearch({ fields: ['terms', 'filename', 'name', 'description'] });
36+
const miniSearch = new MiniSearch({ fields: ['terms', 'filename', 'name', 'description', 'methods'] });
3737
let pages;
3838
// This is the data from sample-problems/macros POD.
3939
fetch('../../DATA/search.json')
@@ -68,6 +68,7 @@
6868
<div class="card-body">
6969
<h5 class="card-title">
7070
<a href=\"/webwork2/${path}/${p.dir}/${file}\">${p.name}</a>
71+
(${p.type})
7172
</h5>
7273
<p class="card-text">${p.description}</p>
7374
</div>

0 commit comments

Comments
 (0)