Skip to content

Commit 8fac44a

Browse files
committed
Parser: Treat javascript_tag content as foreign content
1 parent a25dec0 commit 8fac44a

File tree

34 files changed

+1015
-22
lines changed

34 files changed

+1015
-22
lines changed

ext/herb/extension.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,10 @@ static VALUE Herb_parse(int argc, VALUE* argv, VALUE self) {
156156
if (NIL_P(prism_program)) { prism_program = rb_hash_lookup(options, ID2SYM(rb_intern("prism_program"))); }
157157
if (!NIL_P(prism_program) && RTEST(prism_program)) { parser_options.prism_program = true; }
158158

159+
VALUE html = rb_hash_lookup(options, rb_utf8_str_new_cstr("html"));
160+
if (NIL_P(html)) { html = rb_hash_lookup(options, ID2SYM(rb_intern("html"))); }
161+
if (!NIL_P(html) && !RTEST(html)) { parser_options.html = false; }
162+
159163
VALUE arena_stats = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena_stats"));
160164
if (NIL_P(arena_stats)) { arena_stats = rb_hash_lookup(options, ID2SYM(rb_intern("arena_stats"))); }
161165
if (!NIL_P(arena_stats) && RTEST(arena_stats)) { print_arena_stats = true; }

java/herb_jni.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,14 @@ Java_org_herb_Herb_parse(JNIEnv* env, jclass clazz, jstring source, jobject opti
108108
jboolean prismProgram = (*env)->CallBooleanMethod(env, options, getPrismProgram);
109109
parser_options.prism_program = (prismProgram == JNI_TRUE);
110110
}
111+
112+
jmethodID getHtml =
113+
(*env)->GetMethodID(env, optionsClass, "isHtml", "()Z");
114+
115+
if (getHtml != NULL) {
116+
jboolean html = (*env)->CallBooleanMethod(env, options, getHtml);
117+
parser_options.html = (html == JNI_TRUE);
118+
}
111119
}
112120

113121
hb_allocator_T allocator;

java/org/herb/ParserOptions.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ public class ParserOptions {
1010
private boolean prismNodes = false;
1111
private boolean prismNodesDeep = false;
1212
private boolean prismProgram = false;
13+
private boolean html = true;
1314

1415
public ParserOptions() {}
1516

@@ -94,6 +95,15 @@ public boolean isPrismProgram() {
9495
return prismProgram;
9596
}
9697

98+
public ParserOptions html(boolean value) {
99+
this.html = value;
100+
return this;
101+
}
102+
103+
public boolean isHtml() {
104+
return html;
105+
}
106+
97107
public static ParserOptions create() {
98108
return new ParserOptions();
99109
}

javascript/packages/core/src/parser-options.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ export interface ParseOptions {
88
prism_nodes?: boolean
99
prism_nodes_deep?: boolean
1010
prism_program?: boolean
11+
html?: boolean
1112
}
1213

1314
export type SerializedParserOptions = Required<ParseOptions>
@@ -22,6 +23,7 @@ export const DEFAULT_PARSER_OPTIONS: SerializedParserOptions = {
2223
prism_nodes: false,
2324
prism_nodes_deep: false,
2425
prism_program: false,
26+
html: true,
2527
}
2628

2729
/**
@@ -55,6 +57,9 @@ export class ParserOptions {
5557
/** Whether the full Prism ProgramNode was serialized on the DocumentNode. */
5658
readonly prism_program: boolean
5759

60+
/** Whether HTML tag parsing is enabled during parsing. When false, HTML-like content is treated as literal text. */
61+
readonly html: boolean
62+
5863
static from(options: SerializedParserOptions): ParserOptions {
5964
return new ParserOptions(options)
6065
}
@@ -69,5 +74,6 @@ export class ParserOptions {
6974
this.prism_nodes = options.prism_nodes ?? DEFAULT_PARSER_OPTIONS.prism_nodes
7075
this.prism_nodes_deep = options.prism_nodes_deep ?? DEFAULT_PARSER_OPTIONS.prism_nodes_deep
7176
this.prism_program = options.prism_program ?? DEFAULT_PARSER_OPTIONS.prism_program
77+
this.html = options.html ?? DEFAULT_PARSER_OPTIONS.html
7278
}
7379
}

javascript/packages/linter/test/parse-cache.test.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ describe("ParseCache", () => {
7575
strict: true,
7676
strict_locals: false,
7777
action_view_helpers: false,
78+
html: true,
7879
})
7980
})
8081

@@ -92,6 +93,7 @@ describe("ParseCache", () => {
9293
strict: false,
9394
strict_locals: false,
9495
action_view_helpers: false,
96+
html: true,
9597
})
9698
})
9799

@@ -109,6 +111,7 @@ describe("ParseCache", () => {
109111
strict: true,
110112
strict_locals: false,
111113
action_view_helpers: false,
114+
html: true,
112115
})
113116
})
114117

@@ -126,6 +129,7 @@ describe("ParseCache", () => {
126129
strict: false,
127130
strict_locals: false,
128131
action_view_helpers: false,
132+
html: true,
129133
})
130134
})
131135
})

playground/index.html

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,17 @@
490490
/>
491491
<span class="select-none">Prism nodes (deep)</span>
492492
</label>
493+
494+
<label class="flex items-center gap-1.5 text-gray-300 text-sm" title="Enable HTML tag parsing — uncheck to treat all HTML-like content as literal text (ERB is still parsed)">
495+
<input
496+
type="checkbox"
497+
data-option="html"
498+
data-action="change->playground#onOptionChange"
499+
class="rounded border-gray-600 text-green-600 focus:ring-green-500 bg-gray-700"
500+
checked
501+
/>
502+
<span class="select-none">HTML</span>
503+
</label>
493504
</div>
494505

495506
<pre

playground/src/controllers/playground_controller.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1271,6 +1271,7 @@ export default class extends Controller {
12711271
prism_program: false,
12721272
prism_nodes: false,
12731273
prism_nodes_deep: false,
1274+
html: true,
12741275
}
12751276

12761277
const nonDefaultOptions = {}

rust/src/herb.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ pub struct ParserOptions {
1414
pub prism_nodes: bool,
1515
pub prism_nodes_deep: bool,
1616
pub prism_program: bool,
17+
pub html: bool,
1718
}
1819

1920
impl Default for ParserOptions {
@@ -28,6 +29,7 @@ impl Default for ParserOptions {
2829
prism_nodes: false,
2930
prism_nodes_deep: false,
3031
prism_program: false,
32+
html: true,
3133
}
3234
}
3335
}
@@ -109,6 +111,9 @@ pub fn parse_with_options(source: &str, options: &ParserOptions) -> Result<Parse
109111
prism_program: options.prism_program,
110112
prism_nodes: options.prism_nodes,
111113
prism_nodes_deep: options.prism_nodes_deep,
114+
html: options.html,
115+
start_line: 0,
116+
start_column: 0,
112117
};
113118

114119
let ast = crate::ffi::herb_parse(c_source.as_ptr(), &c_parser_options, &mut allocator);

src/analyze/action_view/tag_helper_node_builders.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ token_T* create_synthetic_token(
2626
if (value) {
2727
size_t length = strlen(value);
2828
char* copied = hb_allocator_strndup(allocator, value, length);
29-
token->value = (hb_string_T) { .data = copied, .length = (uint32_t) length };
29+
token->value = hb_string_from_data(copied, length);
3030
} else {
3131
token->value = HB_STRING_EMPTY;
3232
}

src/analyze/action_view/tag_helpers.c

Lines changed: 132 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@
44
#include "../../include/analyze/action_view/tag_helper_node_builders.h"
55
#include "../../include/analyze/analyze.h"
66
#include "../../include/ast_nodes.h"
7+
#include "../../include/herb.h"
78
#include "../../include/html_util.h"
89
#include "../../include/parser_helpers.h"
910
#include "../../include/position.h"
11+
#include "../../include/util.h"
1012
#include "../../include/util/hb_allocator.h"
1113
#include "../../include/util/hb_array.h"
1214
#include "../../include/util/hb_string.h"
@@ -580,6 +582,42 @@ static hb_array_T* transform_javascript_include_tag_multi_source(
580582
return elements;
581583
}
582584

585+
static bool erb_content_is_end_keyword(hb_string_T content) {
586+
const char* start = content.data;
587+
const char* end = content.data + content.length;
588+
589+
while (start < end && is_whitespace(*start)) {
590+
start++;
591+
}
592+
593+
while (end > start && is_whitespace(*(end - 1))) {
594+
end--;
595+
}
596+
597+
return (size_t) (end - start) == 3 && start[0] == 'e' && start[1] == 'n' && start[2] == 'd';
598+
}
599+
600+
static AST_ERB_CONTENT_NODE_T* find_swallowed_erb_end_node(hb_array_T* nodes) {
601+
if (!nodes) { return NULL; }
602+
603+
for (size_t i = 0; i < hb_array_size(nodes); i++) {
604+
AST_NODE_T* node = (AST_NODE_T*) hb_array_get(nodes, i);
605+
if (!node) { continue; }
606+
607+
if (node->type == AST_ERB_CONTENT_NODE) {
608+
AST_ERB_CONTENT_NODE_T* erb = (AST_ERB_CONTENT_NODE_T*) node;
609+
if (erb->content && erb_content_is_end_keyword(erb->content->value)) { return erb; }
610+
}
611+
612+
if (node->type == AST_HTML_OPEN_TAG_NODE) {
613+
AST_ERB_CONTENT_NODE_T* found = find_swallowed_erb_end_node(((AST_HTML_OPEN_TAG_NODE_T*) node)->children);
614+
if (found) { return found; }
615+
}
616+
}
617+
618+
return NULL;
619+
}
620+
583621
static AST_NODE_T* transform_erb_block_to_tag_helper(
584622
AST_ERB_BLOCK_NODE_T* block_node,
585623
analyze_ruby_context_T* context,
@@ -600,6 +638,7 @@ static AST_NODE_T* transform_erb_block_to_tag_helper(
600638
);
601639

602640
hb_array_T* attributes = NULL;
641+
603642
if (parse_context->info->call_node) {
604643
attributes = extract_html_attributes_from_call_node(
605644
parse_context->info->call_node,
@@ -685,25 +724,51 @@ static AST_NODE_T* transform_erb_block_to_tag_helper(
685724

686725
hb_array_T* body = block_node->body ? block_node->body : hb_array_init(0, allocator);
687726
AST_NODE_T* close_tag = (AST_NODE_T*) block_node->end_node;
727+
position_T element_end = block_node->base.location.end;
688728

689729
if (tag_name && parser_is_foreign_content_tag(hb_string_from_c_string(tag_name)) && context->source
690730
&& block_node->body && hb_array_size(block_node->body) > 0) {
691731
size_t start_offset = block_node->tag_closing->range.to;
692-
size_t end_offset = block_node->end_node->tag_opening->range.from;
732+
size_t end_offset = 0;
733+
734+
if (block_node->end_node && block_node->end_node->tag_opening) {
735+
end_offset = block_node->end_node->tag_opening->range.from;
736+
} else {
737+
AST_ERB_CONTENT_NODE_T* swallowed_end = find_swallowed_erb_end_node(block_node->body);
738+
739+
if (swallowed_end && swallowed_end->tag_opening) {
740+
end_offset = swallowed_end->tag_opening->range.from;
741+
742+
AST_ERB_END_NODE_T* end_node = ast_erb_end_node_init(
743+
swallowed_end->tag_opening,
744+
swallowed_end->content,
745+
swallowed_end->tag_closing,
746+
swallowed_end->base.location.start,
747+
swallowed_end->base.location.end,
748+
hb_array_init(0, allocator),
749+
allocator
750+
);
751+
752+
close_tag = (AST_NODE_T*) end_node;
753+
element_end = close_tag->location.end;
754+
}
755+
}
693756

694757
if (end_offset > start_offset) {
695758
position_T body_start = block_node->tag_closing->location.end;
696-
position_T body_end = block_node->end_node->tag_opening->location.start;
697759

698760
size_t content_length = end_offset - start_offset;
699761
char* raw_copy = hb_allocator_strndup(allocator, context->source + start_offset, content_length);
700-
hb_string_T raw_content = { .data = raw_copy, .length = (uint32_t) content_length };
701762

702-
AST_LITERAL_NODE_T* literal_node =
703-
ast_literal_node_init(raw_content, body_start, body_end, hb_array_init(0, allocator), allocator);
763+
parser_options_T body_options = HERB_DEFAULT_PARSER_OPTIONS;
764+
body_options.html = false;
765+
body_options.analyze = false;
766+
body_options.strict = false;
767+
body_options.start_line = body_start.line;
768+
body_options.start_column = body_start.column;
704769

705-
body = hb_array_init(1, allocator);
706-
hb_array_append(body, literal_node);
770+
AST_DOCUMENT_NODE_T* body_document = herb_parse(raw_copy, &body_options, allocator);
771+
body = body_document->children;
707772
}
708773
}
709774

@@ -715,7 +780,7 @@ static AST_NODE_T* transform_erb_block_to_tag_helper(
715780
false,
716781
parse_context->matched_handler->source,
717782
block_node->base.location.start,
718-
block_node->base.location.end,
783+
element_end,
719784
hb_array_init(0, allocator),
720785
allocator
721786
);
@@ -1055,7 +1120,65 @@ void transform_tag_helper_blocks(const AST_NODE_T* node, analyze_ruby_context_T*
10551120
}
10561121
}
10571122

1058-
if (replacement) { hb_array_set(array, i, replacement); }
1123+
if (replacement) {
1124+
position_T replacement_end = replacement->location.end;
1125+
position_T original_end = child->location.end;
1126+
bool has_trailing = replacement_end.line != original_end.line || replacement_end.column != original_end.column;
1127+
1128+
if (has_trailing && context->source && child->type == AST_ERB_BLOCK_NODE) {
1129+
AST_HTML_ELEMENT_NODE_T* element = (AST_HTML_ELEMENT_NODE_T*) replacement;
1130+
1131+
if (element->close_tag && element->close_tag->type == AST_ERB_END_NODE) {
1132+
AST_ERB_END_NODE_T* close_erb = (AST_ERB_END_NODE_T*) element->close_tag;
1133+
size_t trailing_start = close_erb->tag_closing->range.to;
1134+
size_t source_length = strlen(context->source);
1135+
size_t trailing_end = trailing_start;
1136+
1137+
while (trailing_end < source_length) {
1138+
position_T position = position_from_source_with_offset(context->source, trailing_end);
1139+
1140+
if (position.line > original_end.line
1141+
|| (position.line == original_end.line && position.column >= original_end.column)) {
1142+
break;
1143+
}
1144+
1145+
trailing_end++;
1146+
}
1147+
1148+
if (trailing_end > trailing_start) {
1149+
hb_string_T trailing_content =
1150+
hb_string_from_data(context->source + trailing_start, trailing_end - trailing_start);
1151+
AST_HTML_TEXT_NODE_T* trailing_text = ast_html_text_node_init(
1152+
trailing_content,
1153+
replacement_end,
1154+
original_end,
1155+
hb_array_init(0, context->allocator),
1156+
context->allocator
1157+
);
1158+
1159+
size_t old_size = hb_array_size(array);
1160+
hb_array_T* new_array = hb_array_init(old_size + 1, context->allocator);
1161+
1162+
for (size_t j = 0; j < old_size; j++) {
1163+
if (j == i) {
1164+
hb_array_append(new_array, replacement);
1165+
hb_array_append(new_array, trailing_text);
1166+
} else {
1167+
hb_array_append(new_array, hb_array_get(array, j));
1168+
}
1169+
}
1170+
1171+
array->items = new_array->items;
1172+
array->size = new_array->size;
1173+
array->capacity = new_array->capacity;
1174+
i++;
1175+
continue;
1176+
}
1177+
}
1178+
}
1179+
1180+
hb_array_set(array, i, replacement);
1181+
}
10591182
}
10601183
}
10611184

0 commit comments

Comments
 (0)