Skip to content
This repository was archived by the owner on Nov 5, 2024. It is now read-only.

Commit 9145382

Browse files
committed
added patches and build scripts
1 parent a5b0641 commit 9145382

File tree

9 files changed

+424
-0
lines changed

9 files changed

+424
-0
lines changed
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
name: build and test
2+
description: build and test
3+
inputs:
4+
base-ruby-version:
5+
description: base ruby version
6+
required: true
7+
runs:
8+
using: composite
9+
steps:
10+
- name: install ruby
11+
uses: ruby/setup-ruby@v1
12+
with:
13+
ruby-version: ${{ inputs.base-ruby-version }}
14+
15+
- name: clone ruby/master
16+
shell: bash
17+
run: git clone https://github.com/ruby/ruby.git --depth=1
18+
19+
- name: patch parse.y
20+
env:
21+
PARSE_Y_PATH: ruby/parse.y
22+
shell: bash
23+
run: ruby patch-parse-y.rb
24+
25+
- name: copy mri_tokenizer.c
26+
shell: bash
27+
run: cp mri_tokenizer.c ruby/
28+
29+
- name: create Makefile
30+
shell: bash
31+
run: |
32+
cd ruby
33+
autoconf
34+
./configure
35+
36+
- name: append mri_tokenizer.mk to Makefile
37+
shell: bash
38+
run: echo "include ../mri_tokenizer.mk" >> ruby/Makefile
39+
40+
- name: build mri_tokenizer
41+
shell: bash
42+
run: |
43+
cd ruby
44+
make objs
45+
make mri_tokenizer
46+
47+
- name: test mri_tokenizer
48+
env:
49+
MRI_TOKENIZER: ./ruby/mri_tokenizer
50+
shell: bash
51+
run: ruby test/runner.rb
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
name: build-and-release
2+
3+
on:
4+
push:
5+
branches: [ master ]
6+
7+
jobs:
8+
build-and-release:
9+
name: build-and-release
10+
runs-on: ubuntu-latest
11+
steps:
12+
- name: checkout
13+
uses: actions/checkout@v2
14+
15+
- name: build and test
16+
uses: ./.github/actions/build-and-test
17+
with:
18+
base-ruby-version: "3.0.0"
19+
20+
- name: build release_notes.md
21+
run: |
22+
echo "Build date: $(date)" > release_notes.md
23+
24+
- name: release
25+
uses: ncipollo/release-action@v1
26+
with:
27+
allowUpdates: true
28+
artifactErrorsFailBuild: true
29+
artifacts: ruby/mri_tokenizer
30+
bodyFile: release_notes.md
31+
token: ${{ secrets.GITHUB_TOKEN }}
32+
tag: latest
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
name: build-and-test
2+
3+
on:
4+
pull_request:
5+
branches: [ master ]
6+
7+
jobs:
8+
build-and-test:
9+
name: build-and-test
10+
runs-on: ubuntu-latest
11+
steps:
12+
- name: checkout
13+
uses: actions/checkout@v2
14+
15+
- name: build and test
16+
uses: ./.github/actions/build-and-test
17+
with:
18+
base-ruby-version: "3.0.0"

mri_tokenizer.c

Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
#undef RUBY_EXPORT
2+
#include "ruby.h"
3+
#include "vm_debug.h"
4+
#ifdef HAVE_LOCALE_H
5+
#include <locale.h>
6+
#endif
7+
8+
#define RUBY_PARSER_PRINT_TOKENS 1
9+
void mri_tokenizer_print_token(void *token);
10+
#include "parse.c"
11+
#include <stdbool.h>
12+
13+
#ifdef yydebug
14+
#undef yydebug
15+
#endif
16+
17+
void mri_tokenizer_print_token(void *token)
18+
{
19+
enum yytokentype *t = token;
20+
const char *token_name = yytname[YYTRANSLATE(*t)];
21+
22+
#define map_token(from, to) \
23+
if (strcmp(token_name, from) == 0) { \
24+
token_name = to; \
25+
}
26+
27+
// keywords
28+
map_token("\"`def'\"", "kDEF");
29+
map_token("\"`module'\"", "kMODULE");
30+
map_token("\"`def'\"", "kDEF");
31+
map_token("\"`undef'\"", "kUNDEF");
32+
map_token("\"`begin'\"", "kBEGIN");
33+
map_token("\"`rescue'\"", "kRESCUE");
34+
map_token("\"`ensure'\"", "kENSURE");
35+
map_token("\"`end'\"", "kEND");
36+
map_token("\"`if'\"", "kIF");
37+
map_token("\"`unless'\"", "kUNLESS");
38+
map_token("\"`then'\"", "kTHEN");
39+
map_token("\"`elsif'\"", "kELSIF");
40+
map_token("\"`else'\"", "kELSE");
41+
map_token("\"`case'\"", "kCASE");
42+
map_token("\"`when'\"", "kWHEN");
43+
map_token("\"`while'\"", "kWHILE");
44+
map_token("\"`until'\"", "kUNTIL");
45+
map_token("\"`for'\"", "kFOR");
46+
map_token("\"`break'\"", "kBREAK");
47+
map_token("\"`next'\"", "kNEXT");
48+
map_token("\"`redo'\"", "kREDO");
49+
map_token("\"`retry'\"", "kRETRY");
50+
map_token("\"`in'\"", "kIN");
51+
map_token("\"`do'\"", "kDO");
52+
map_token("\"`do'\" for condition", "kDO_COND");
53+
map_token("\"`do'\" for block", "kDO_BLOCK");
54+
map_token("\"`do'\" for lambda", "kDO_LAMBDA");
55+
map_token("\"`return'\"", "kRETURN");
56+
map_token("\"`yield'\"", "kYIELD");
57+
map_token("\"`super'\"", "kSUPER");
58+
map_token("\"`self'\"", "kSELF");
59+
map_token("\"`nil'\"", "kNIL");
60+
map_token("\"`true'\"", "kTRUE");
61+
map_token("\"`false'\"", "kFALSE");
62+
map_token("\"`and'\"", "kAND");
63+
map_token("\"`or'\"", "kOR");
64+
map_token("\"`not'\"", "kNOT");
65+
map_token("\"`if' modifier\"", "kIF_MOD");
66+
map_token("\"`unless' modifier\"", "kUNLESS_MOD");
67+
map_token("\"`while' modifier\"", "kWHILE_MOD");
68+
map_token("\"`until' modifier\"", "kUNTIL_MOD");
69+
map_token("\"`rescue' modifier\"", "kRESCUE_MOD");
70+
map_token("\"`alias'\"", "kALIAS");
71+
map_token("\"`defined?'\"", "kDEFINED");
72+
map_token("\"`BEGIN'\"", "klBEGIN");
73+
map_token("\"`END'\"", "klEND");
74+
map_token("\"`__LINE__'\"", "k__LINE__");
75+
map_token("\"`__FILE__'\"", "k__FILE__");
76+
map_token("\"`__ENCODING__'\"", "k__ENCODING__");
77+
78+
map_token("\"local variable or method\"", "tIDENTIFIER");
79+
map_token("\"method\"", "tFID");
80+
map_token("\"global variable\"", "tGVAR");
81+
map_token("\"instance variable\"", "tIVAR");
82+
map_token("\"constant\"", "tCONSTANT");
83+
map_token("\"class variable\"", "tCVAR");
84+
map_token("\"label\"", "tLABEL");
85+
map_token("\"integer literal\"", "tINTEGER");
86+
map_token("\"float literal\"", "tFLOAT");
87+
map_token("\"rational literal\"", "tRATIONAL");
88+
map_token("\"imaginary literal\"", "tIMAGINARY");
89+
map_token("\"char literal\"", "tCHAR");
90+
map_token("\"numbered reference\"", "tNTH_REF");
91+
map_token("\"back reference\"", "tBACK_REF");
92+
map_token("\"literal content\"", "tSTRING_CONTENT");
93+
map_token("tREGEXP_END", "tREGEXP_END");
94+
95+
map_token("'.'", "tDOT");
96+
map_token("\"backslash\"", "tBACKSLASH");
97+
map_token("\"escaped space\"", "tSP");
98+
map_token("\"escaped horizontal tab\"", "tSLASH_T");
99+
map_token("\"escaped form feed\"", "tSLASH_F");
100+
map_token("\"escaped carriage return\"", "tSLASH_R");
101+
map_token("\"escaped vertical tab\"", "tVTAB");
102+
map_token("\"unary+\"", "tUPLUS");
103+
map_token("\"unary-\"", "tUMINUS");
104+
map_token("\"**\"", "tPOW");
105+
map_token("\"<=>\"", "tCMP");
106+
map_token("\"==\"", "tEQ");
107+
map_token("\"===\"", "tEQQ");
108+
map_token("\"!=\"", "tNEQ");
109+
map_token("\">=\"", "tGEQ");
110+
map_token("\"<=\"", "tLEQ");
111+
map_token("\"&&\"", "tANDOP");
112+
map_token("\"||\"", "tOROP");
113+
map_token("\"=~\"", "tMATCH");
114+
map_token("\"!~\"", "tNMATCH");
115+
map_token("\"..\"", "tDOT2");
116+
map_token("\"...\"", "tDOT3");
117+
map_token("\"(..\"", "tBDOT2");
118+
map_token("\"(...\"", "tBDOT3");
119+
map_token("\"[]\"", "tAREF");
120+
map_token("\"[]=\"", "tASET");
121+
map_token("\"<<\"", "tLSHFT");
122+
map_token("\">>\"", "tRSHFT");
123+
map_token("\"&.\"", "tANDDOT");
124+
map_token("\"::\"", "tCOLON2");
125+
map_token("\":: at EXPR_BEG\"", "tCOLON3");
126+
map_token("\"operator-assignment\"", "tOP_ASGN");
127+
map_token("\"=>\"", "tASSOC");
128+
map_token("\"(\"", "tLPAREN");
129+
map_token("\"( arg\"", "tLPAREN_ARG");
130+
map_token("\")\"", "tRPAREN");
131+
map_token("\"[\"", "tLBRACK");
132+
map_token("\"{\"", "tLBRACE");
133+
map_token("\"{ arg\"", "tLBRACE_ARG");
134+
map_token("\"*\"", "tSTAR");
135+
map_token("\"**arg\"", "tDSTAR");
136+
map_token("\"&\"", "tAMPER");
137+
map_token("\"->\"", "tLAMBDA");
138+
map_token("\"symbol literal\"", "tSYMBEG");
139+
map_token("\"string literal\"", "tSTRING_BEG");
140+
map_token("\"backtick literal\"", "tXSTRING_BEG");
141+
map_token("\"regexp literal\"", "tREGEXP_BEG");
142+
map_token("\"word list\"", "tWORDS_BEG");
143+
map_token("\"verbatim word list\"", "tQWORDS_BEG");
144+
map_token("\"symbol list\"", "tSYMBOLS_BEG");
145+
map_token("\"verbatim symbol list\"", "tQSYMBOLS_BEG");
146+
map_token("\"terminator\"", "tSTRING_END");
147+
map_token("\"'}'\"", "tSTRING_DEND");
148+
map_token("tSTRING_DBEG", "tSTRING_DBEG");
149+
map_token("tSTRING_DVAR", "tSTRING_DVAR");
150+
map_token("tLAMBEG", "tLAMBEG");
151+
map_token("tLABEL_END", "tLABEL_END");
152+
// map_token("tLOWEST", "");
153+
map_token("'='", "tEQL");
154+
map_token("'?'", "tEH");
155+
map_token("':'", "tCOLON");
156+
map_token("'>'", "tGT");
157+
map_token("'<'", "tLT");
158+
map_token("'|'", "tPIPE");
159+
map_token("'^'", "tCARET");
160+
map_token("'&'", "tAMPER2");
161+
map_token("'+'", "tPLUS");
162+
map_token("'-'", "tMINUS");
163+
map_token("'*'", "tSTAR2");
164+
map_token("'/'", "tDIVIDE");
165+
map_token("'%'", "tPERCENT");
166+
map_token("tUMINUS_NUM", "tUMINUS_NUM");
167+
map_token("'!'", "tBANG");
168+
map_token("'~'", "tTILDE");
169+
// map_token("tLAST_TOKEN", "");
170+
map_token("'{'", "tLCURLY");
171+
map_token("'}'", "tRCURLY");
172+
map_token("'['", "tLBRACK2");
173+
map_token("','", "tCOMMA");
174+
map_token("'`'", "tBACK_REF2");
175+
map_token("'('", "tLPAREN2");
176+
map_token("')'", "tRPAREN");
177+
map_token("']'", "tRBRACK");
178+
map_token("';'", "tSEMI");
179+
map_token("' '", "tSPACE");
180+
map_token("'\\n'", "tNL");
181+
182+
map_token("\"end-of-input\"", "END_OF_INPUT");
183+
printf("%s\n", token_name);
184+
}
185+
186+
void __parse(const char *filename, const char *src, bool yydebug)
187+
{
188+
VALUE parser = rb_parser_new();
189+
rb_parser_set_context(parser, NULL, FALSE);
190+
rb_parser_set_yydebug(parser, RBOOL(yydebug));
191+
rb_parser_compile_string(parser, filename, rb_fstring_cstr(src), 0);
192+
}
193+
194+
char *__read_file(const char *filepath)
195+
{
196+
FILE *f = fopen(filepath, "rb");
197+
if (!f) {
198+
return NULL;
199+
}
200+
fseek(f, 0, SEEK_END);
201+
long fsize = ftell(f);
202+
fseek(f, 0, SEEK_SET);
203+
204+
char *fcontent = malloc(fsize + 1);
205+
(void)(fread(fcontent, 1, fsize, f));
206+
fclose(f);
207+
fcontent[fsize] = 0;
208+
209+
return fcontent;
210+
}
211+
212+
int
213+
main(int argc, char **argv)
214+
{
215+
#ifdef HAVE_LOCALE_H
216+
setlocale(LC_CTYPE, "");
217+
#endif
218+
219+
ruby_sysinit(&argc, &argv);
220+
{
221+
RUBY_INIT_STACK;
222+
ruby_init();
223+
if (argc != 2) {
224+
printf("Usage: mri_tokenizer path/to/file.rb\n");
225+
return 1;
226+
}
227+
char *filepath = argv[1];
228+
char *code = __read_file(filepath);
229+
if (!code) {
230+
printf("Failed to read file '%s', aborting.\n", filepath);
231+
return 1;
232+
}
233+
__parse(filepath, code, false);
234+
free(code);
235+
return 0;
236+
}
237+
}

mri_tokenizer.mk

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
mri_tokenizer.$(OBJEXT): parse.c
2+
3+
# Build process is similar to miniruby with the following changes:
4+
# 1. parse.o is excluded from linking (but parse.c is still generated)
5+
# 2. main.o is replaced with mri_tokenizer.o that includes parse.c
6+
# This way we can get access to all structs and functions defined in parse.c in our mri_tokenizer.c
7+
mri_tokenizer$(EXEEXT): config.status mri_tokenizer.$(OBJEXT) objs $(ARCHFILE)
8+
$(ECHO) linking $@
9+
$(CC) $(EXE_LDFLAGS) $(XLDFLAGS) mri_tokenizer.$(OBJEXT) $(MINIOBJS) $(filter-out parse.$(OBJEXT),$(COMMONOBJS)) $(MAINLIBS) $(LIBS) $(OUTFLAG)$@

patch-parse-y.rb

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
PATCH = <<-C
2+
#ifdef RUBY_PARSER_PRINT_TOKENS
3+
mri_tokenizer_print_token(&t);
4+
#endif
5+
C
6+
7+
def abort(message)
8+
$stderr.puts(message)
9+
exit 1
10+
end
11+
12+
PARSE_Y_PATH = ENV.fetch('PARSE_Y_PATH') { abort("PARSE_Y_PATH env var must be provided") }
13+
parse_y = File.read(PARSE_Y_PATH).lines
14+
15+
# 1. find definition of `yylex` function
16+
line_idx = parse_y.find_index { |line| line.start_with?('yylex(') }
17+
if line_idx.nil?
18+
abort('Failed to find declaration of yylex function')
19+
end
20+
# 2. find its 'return' statement
21+
loop do
22+
line_idx += 1
23+
line = parse_y[line_idx]
24+
if line.nil?
25+
abort('Failed to find return statement of yylex function')
26+
end
27+
if line.include?('return')
28+
break
29+
end
30+
end
31+
# 3. go 1 line up
32+
line_idx -= 1
33+
# 4. append "\n<patch>"
34+
parse_y[line_idx] += "\n#{PATCH}\n"
35+
# 5. write patched code back to parse.y
36+
File.write(PARSE_Y_PATH, parse_y.join)

test/input

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
def m
2+
42 + "foo + #{bar}"
3+
end

0 commit comments

Comments
 (0)