forked from MoganLab/mogan
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathkeyword_parser.cpp
More file actions
123 lines (111 loc) · 4.03 KB
/
keyword_parser.cpp
File metadata and controls
123 lines (111 loc) · 4.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
/******************************************************************************
* MODULE : keyword_parser.cpp
* DESCRIPTION: shared keyword parsing routines
* COPYRIGHT : (C) 2020 Darcy Shen
*******************************************************************************
* This software falls under the GNU general public license version 3 or later.
* It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
* in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
******************************************************************************/
#include "keyword_parser.hpp"
#include "analyze.hpp"
#include "converter.hpp"
#include "iterator.hpp"
#include "scheme.hpp"
#include "tree.hpp"
#include "tree_helper.hpp"
keyword_parser_rep::keyword_parser_rep () {
current_keyword = "";
keyword_group = hashmap<string, string> ();
extra_chars = array<char> ();
start_chars = array<char> ();
check_path_boundaries= false;
}
void
keyword_parser_rep::insert_start_char (char start_char) {
start_chars << start_char;
}
void
keyword_parser_rep::insert_extra_char (char extra_char) {
extra_chars << extra_char;
}
bool
read_keyword (string s, int& i, string& result, array<char> extras,
array<char> starts) {
int opos= i;
int s_N = N (s);
// a keyword must start with alpha or start with extra chars
if (i < s_N &&
(is_alpha (s[i] || contains (s[i], extras)) || contains (s[i], starts)))
i++;
// a keyword is consist of alpha/number/extra chars
while (i < s_N && (is_alpha (s[i]) || is_digit (s[i]) ||
contains (s[i], extras) || contains (s[i], starts))) {
i++;
}
result= s (opos, i);
return i > opos;
}
bool
keyword_parser_rep::can_parse (string s, int pos) {
// Check that the preceding character is not a word character if path boundary
// checking is enabled
if (check_path_boundaries && pos > 0) {
char prev= s[pos - 1];
if (is_alpha (prev) || is_digit (prev) || contains (prev, extra_chars) ||
contains (prev, start_chars)) {
return false;
}
// Additionally, avoid matching keywords after '.' or '/' (common in paths)
// Also avoid matching after '@' (email/git user separator) and ':'
// (URL/Windows drive separator) Also avoid matching after '-' and '_'
// (common in filenames)
if (prev == '.' || prev == '/' || prev == '\\' || prev == '@' ||
prev == ':' || prev == '-' || prev == '_') {
return false;
}
}
string word;
bool hit= read_keyword (s, pos, word, extra_chars, start_chars) &&
keyword_group->contains (word);
if (hit) {
// Avoid matching keywords before '.' or '/' (common in paths) if path
// boundary checking is enabled Also avoid matching before '@' (email/git
// user separator) and ':' (URL/Windows drive separator) Also avoid matching
// before '-' and '_' (common in filenames)
if (check_path_boundaries && pos < N (s)) {
char next= s[pos];
if (next == '.' || next == '/' || next == '\\' || next == '@' ||
next == ':' || next == '-' || next == '_') {
return false;
}
}
current_keyword= word;
}
return hit;
}
void
keyword_parser_rep::do_parse (string s, int& pos) {
pos+= N (current_keyword);
}
void
keyword_parser_rep::use_keywords_of_lang (string lang_code) {
string use_modules= "(use-modules (prog " * lang_code * "-lang))";
eval (use_modules);
string get_list_of_keywords_tree=
"(map tm->tree (" * lang_code * "-keywords))";
list<tree> l= as_list_tree (eval (get_list_of_keywords_tree));
if (DEBUG_PARSER)
debug_packrat << "Keywords definition of [" << lang_code << "] loaded!\n";
int l_N= N (l);
for (int i= 0; i < l_N; i++) {
tree group_words = l[i];
string group = get_label (group_words);
int group_words_N= N (group_words);
for (int j= 0; j < group_words_N; j++) {
string word= get_label (group_words[j]);
// number->string is actually number-<gtr>string
put (utf8_to_cork (word), group);
}
}
}