Skip to content

Commit a8ebd2f

Browse files
committed
Also add Unicode support to HCL parser
1 parent cfa52c8 commit a8ebd2f

File tree

2 files changed

+70
-10
lines changed

2 files changed

+70
-10
lines changed

rewrite-hcl/src/main/java/org/openrewrite/hcl/internal/HclParserVisitor.java

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,14 @@ public class HclParserVisitor extends HCLParserBaseVisitor<Hcl> {
4848
@Nullable
4949
private final FileAttributes fileAttributes;
5050

51+
/**
52+
* Track position within the file by character
53+
*/
5154
private int cursor = 0;
55+
/**
56+
* Track parsing position within the file by Unicode code point
57+
*/
58+
private int codePointCursor = 0;
5259

5360
public HclParserVisitor(Path path, String source, Charset charset, boolean charsetBomMarked, @Nullable FileAttributes fileAttributes) {
5461
this.path = path;
@@ -146,7 +153,7 @@ public Hcl visitBinaryOp(HCLParser.BinaryOpContext ctx) {
146153
break;
147154
}
148155
Space opPrefix = Space.format(prefix(ctx.binaryOperator()));
149-
cursor = ctx.binaryOperator().getStop().getStopIndex() + 1;
156+
advanceCursor(ctx.binaryOperator().getStop().getStopIndex() + 1);
150157

151158
if (c.unaryOp() != null) {
152159
right = (Expression) visit(c.operation() != null ? c.operation() : c.exprTerm(0));
@@ -380,7 +387,7 @@ private List<Expression> visitHeredocTemplateExpressions(List<HCLParser.HeredocT
380387
if (part.heredocLiteral() != null) {
381388
Space prefix = Space.format(prefix(part.heredocLiteral()));
382389
String value = part.heredocLiteral().getText();
383-
cursor = part.heredocLiteral().getStop().getStopIndex() + 1;
390+
advanceCursor(part.heredocLiteral().getStop().getStopIndex() + 1);
384391
expressions.add(new Hcl.Literal(randomId(), prefix, Markers.EMPTY, value, value));
385392
} else if (part.templateInterpolation() != null) {
386393
Space prefix = Space.format(prefix(part.templateInterpolation()));
@@ -623,7 +630,7 @@ private List<Expression> visitTemplateExpressions(List<HCLParser.QuotedTemplateP
623630
if (part.stringLiteral() != null) {
624631
Space prefix = Space.format(prefix(part.stringLiteral()));
625632
String value = part.stringLiteral().getText();
626-
cursor = part.stringLiteral().getStop().getStopIndex() + 1;
633+
advanceCursor(part.stringLiteral().getStop().getStopIndex() + 1);
627634
expressions.add(new Hcl.Literal(randomId(), prefix, Markers.EMPTY, value, value));
628635
} else if (part.templateInterpolation() != null) {
629636
Space prefix = Space.format(prefix(part.templateInterpolation()));
@@ -738,12 +745,12 @@ private String prefix(ParserRuleContext ctx) {
738745

739746
private String prefix(Token token) {
740747
int start = token.getStartIndex();
741-
if (start < cursor) {
748+
if (start < codePointCursor) {
742749
return "";
743750
}
744-
String prefix = source.substring(cursor, start);
745-
cursor = start;
746-
return prefix;
751+
int oldCursor = cursor;
752+
advanceCursor(start);
753+
return source.substring(oldCursor, cursor);
747754
}
748755

749756
private String prefix(@Nullable TerminalNode terminalNode) {
@@ -757,15 +764,15 @@ private String prefix(@Nullable TerminalNode terminalNode) {
757764

758765
T t = conversion.apply(ctx, prefix(ctx));
759766
if (ctx.getStop() != null) {
760-
cursor = ctx.getStop().getStopIndex() + (Character.isWhitespace(source.charAt(ctx.getStop().getStopIndex())) ? 0 : 1);
767+
advanceCursor(ctx.getStop().getStopIndex() + (Character.isWhitespace(source.charAt(ctx.getStop().getStopIndex())) ? 0 : 1));
761768
}
762769

763770
return t;
764771
}
765772

766773
private String skip(TerminalNode node) {
767774
String prefix = prefix(node);
768-
cursor = node.getSymbol().getStopIndex() + 1;
775+
advanceCursor(node.getSymbol().getStopIndex() + 1);
769776
return prefix;
770777
}
771778

@@ -785,7 +792,9 @@ private Space sourceBefore(String untilDelim, @Nullable Character stop) {
785792
}
786793

787794
String prefix = source.substring(cursor, delimIndex);
788-
cursor += prefix.length() + untilDelim.length(); // advance past the delimiter
795+
int codePointsInPrefix = prefix.codePointCount(0, prefix.length());
796+
// All HCL delimiters are ASCII, so length == code point count
797+
advanceCursor(codePointCursor + codePointsInPrefix + untilDelim.length());
789798
return Space.format(prefix);
790799
}
791800

@@ -832,4 +841,17 @@ private int positionOfNext(String untilDelim, @Nullable Character stop) {
832841

833842
return delimIndex > source.length() - untilDelim.length() ? -1 : delimIndex;
834843
}
844+
845+
/**
846+
* Advance both the cursor and the code point cursor
847+
*/
848+
@SuppressWarnings("UnusedReturnValue")
849+
private int advanceCursor(int newCodePointIndex) {
850+
if (newCodePointIndex <= codePointCursor) {
851+
return cursor;
852+
}
853+
cursor = source.offsetByCodePoints(cursor, newCodePointIndex - codePointCursor);
854+
codePointCursor = newCodePointIndex;
855+
return cursor;
856+
}
835857
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/*
2+
* Copyright 2025 the original author or authors.
3+
* <p>
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
* <p>
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
* <p>
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.openrewrite.hcl;
17+
18+
import org.junit.jupiter.api.Test;
19+
import org.openrewrite.test.RewriteTest;
20+
21+
import static org.openrewrite.hcl.Assertions.hcl;
22+
23+
public class HclParserTest implements RewriteTest {
24+
25+
@Test
26+
void unicode() {
27+
rewriteRun(
28+
hcl(
29+
"""
30+
tags = /*👇*/{
31+
git_file =/*👇*/ "terraform/aws/👇.tf"
32+
git_repo /*👇*/= "terragoat"
33+
}
34+
"""
35+
)
36+
);
37+
}
38+
}

0 commit comments

Comments
 (0)