@@ -48,7 +48,14 @@ public class HclParserVisitor extends HCLParserBaseVisitor<Hcl> {
4848 @ Nullable
4949 private final FileAttributes fileAttributes ;
5050
51+ /**
52+ * Track position within the file by character
53+ */
5154 private int cursor = 0 ;
55+ /**
56+ * Track parsing position within the file by Unicode code point
57+ */
58+ private int codePointCursor = 0 ;
5259
5360 public HclParserVisitor (Path path , String source , Charset charset , boolean charsetBomMarked , @ Nullable FileAttributes fileAttributes ) {
5461 this .path = path ;
@@ -146,7 +153,7 @@ public Hcl visitBinaryOp(HCLParser.BinaryOpContext ctx) {
146153 break ;
147154 }
148155 Space opPrefix = Space .format (prefix (ctx .binaryOperator ()));
149- cursor = ctx .binaryOperator ().getStop ().getStopIndex () + 1 ;
156+ advanceCursor ( ctx .binaryOperator ().getStop ().getStopIndex () + 1 ) ;
150157
151158 if (c .unaryOp () != null ) {
152159 right = (Expression ) visit (c .operation () != null ? c .operation () : c .exprTerm (0 ));
@@ -380,7 +387,7 @@ private List<Expression> visitHeredocTemplateExpressions(List<HCLParser.HeredocT
380387 if (part .heredocLiteral () != null ) {
381388 Space prefix = Space .format (prefix (part .heredocLiteral ()));
382389 String value = part .heredocLiteral ().getText ();
383- cursor = part .heredocLiteral ().getStop ().getStopIndex () + 1 ;
390+ advanceCursor ( part .heredocLiteral ().getStop ().getStopIndex () + 1 ) ;
384391 expressions .add (new Hcl .Literal (randomId (), prefix , Markers .EMPTY , value , value ));
385392 } else if (part .templateInterpolation () != null ) {
386393 Space prefix = Space .format (prefix (part .templateInterpolation ()));
@@ -623,7 +630,7 @@ private List<Expression> visitTemplateExpressions(List<HCLParser.QuotedTemplateP
623630 if (part .stringLiteral () != null ) {
624631 Space prefix = Space .format (prefix (part .stringLiteral ()));
625632 String value = part .stringLiteral ().getText ();
626- cursor = part .stringLiteral ().getStop ().getStopIndex () + 1 ;
633+ advanceCursor ( part .stringLiteral ().getStop ().getStopIndex () + 1 ) ;
627634 expressions .add (new Hcl .Literal (randomId (), prefix , Markers .EMPTY , value , value ));
628635 } else if (part .templateInterpolation () != null ) {
629636 Space prefix = Space .format (prefix (part .templateInterpolation ()));
@@ -738,12 +745,12 @@ private String prefix(ParserRuleContext ctx) {
738745
739746 private String prefix (Token token ) {
740747 int start = token .getStartIndex ();
741- if (start < cursor ) {
748+ if (start < codePointCursor ) {
742749 return "" ;
743750 }
744- String prefix = source . substring ( cursor , start ) ;
745- cursor = start ;
746- return prefix ;
751+ int oldCursor = cursor ;
752+ advanceCursor ( start ) ;
753+ return source . substring ( oldCursor , cursor ) ;
747754 }
748755
749756 private String prefix (@ Nullable TerminalNode terminalNode ) {
@@ -757,15 +764,15 @@ private String prefix(@Nullable TerminalNode terminalNode) {
757764
758765 T t = conversion .apply (ctx , prefix (ctx ));
759766 if (ctx .getStop () != null ) {
760- cursor = ctx .getStop ().getStopIndex () + (Character .isWhitespace (source .charAt (ctx .getStop ().getStopIndex ())) ? 0 : 1 );
767+ advanceCursor ( ctx .getStop ().getStopIndex () + (Character .isWhitespace (source .charAt (ctx .getStop ().getStopIndex ())) ? 0 : 1 ) );
761768 }
762769
763770 return t ;
764771 }
765772
766773 private String skip (TerminalNode node ) {
767774 String prefix = prefix (node );
768- cursor = node .getSymbol ().getStopIndex () + 1 ;
775+ advanceCursor ( node .getSymbol ().getStopIndex () + 1 ) ;
769776 return prefix ;
770777 }
771778
@@ -785,7 +792,9 @@ private Space sourceBefore(String untilDelim, @Nullable Character stop) {
785792 }
786793
787794 String prefix = source .substring (cursor , delimIndex );
788- cursor += prefix .length () + untilDelim .length (); // advance past the delimiter
795+ int codePointsInPrefix = prefix .codePointCount (0 , prefix .length ());
796+ // All HCL delimiters are ASCII, so length == code point count
797+ advanceCursor (codePointCursor + codePointsInPrefix + untilDelim .length ());
789798 return Space .format (prefix );
790799 }
791800
@@ -832,4 +841,17 @@ private int positionOfNext(String untilDelim, @Nullable Character stop) {
832841
833842 return delimIndex > source .length () - untilDelim .length () ? -1 : delimIndex ;
834843 }
844+
845+ /**
846+ * Advance both the cursor and the code point cursor
847+ */
848+ @ SuppressWarnings ("UnusedReturnValue" )
849+ private int advanceCursor (int newCodePointIndex ) {
850+ if (newCodePointIndex <= codePointCursor ) {
851+ return cursor ;
852+ }
853+ cursor = source .offsetByCodePoints (cursor , newCodePointIndex - codePointCursor );
854+ codePointCursor = newCodePointIndex ;
855+ return cursor ;
856+ }
835857}
0 commit comments