Skip to content

Commit 8c8da2b

Browse files
authored
refactor: split huge parser to manageable instruction parsers (#6)
* refactor: break down huge parser to more management instruction parsers * chore: reformat code
1 parent d204fed commit 8c8da2b

30 files changed

+1620
-742
lines changed

src/main/java/com/github/jimschubert/rewrite/docker/internal/DockerfileParser.java

Lines changed: 109 additions & 721 deletions
Large diffs are not rendered by default.
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/*
2+
* Copyright (c) 2025 Jim Schubert
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
package com.github.jimschubert.rewrite.docker.internal;
16+
17+
import lombok.EqualsAndHashCode;
18+
import lombok.NoArgsConstructor;
19+
import lombok.RequiredArgsConstructor;
20+
import lombok.Value;
21+
import lombok.experimental.Accessors;
22+
23+
/**
24+
* Represents a Heredoc structure in the context of defining content using
25+
* a unique indicator, name, and optional redirection target.
26+
*
27+
* The Heredoc class provides a representation of a heredoc-style block with
28+
* key attributes common in configuration or scripting contexts.
29+
*
30+
* This class is immutable and uses fluent accessors for its properties.
31+
*/
32+
@Value
33+
@EqualsAndHashCode(callSuper = false)
34+
@RequiredArgsConstructor
35+
@NoArgsConstructor(force = true)
36+
@Accessors(fluent = true)
37+
public class Heredoc {
38+
String indicator;
39+
String name;
40+
String redirectionTo;
41+
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/*
2+
* Copyright (c) 2025 Jim Schubert
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
package com.github.jimschubert.rewrite.docker.internal;
16+
17+
import com.github.jimschubert.rewrite.docker.internal.parsers.*;
18+
19+
import java.util.List;
20+
21+
/**
22+
* Registry for instruction parsers.
23+
* This class is responsible for managing and providing access to different instruction parsers.
24+
*/
25+
public class InstructionParserRegistry implements ParserRegistry {
26+
27+
private final List<InstructionParser> parsers = List.of(
28+
new FromInstructionParser(),
29+
new RunInstructionParser(),
30+
new OnBuildInstructionParser(this),
31+
new AddInstructionParser(),
32+
new CmdInstructionParser(),
33+
new CommentParser(),
34+
new ArgInstructionParser(),
35+
new LabelInstructionParser(),
36+
new StopSignalInstructionParser(),
37+
new ExposeInstructionParser(),
38+
new MaintainerInstructionParser(),
39+
new HealthcheckInstructionParser(),
40+
new EnvInstructionParser(),
41+
new CopyInstructionParser(),
42+
new EntrypointInstructionParser(),
43+
new VolumeInstructionParser(),
44+
new WorkdirInstructionParser(),
45+
new ShellInstructionParser(),
46+
new UserInstructionParser(),
47+
new UnknownInstruction()
48+
);
49+
50+
51+
public InstructionParser getParserFor(String keyword) {
52+
return parsers.stream()
53+
.filter(p -> p.supports(keyword))
54+
.findFirst()
55+
.orElse(new UnknownInstruction());
56+
}
57+
}

src/main/java/com/github/jimschubert/rewrite/docker/internal/ParserConstants.java

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -27,23 +27,23 @@ private ParserConstants() {
2727
static final String EMPTY = "";
2828
static final String COMMA = ",";
2929

30-
static final String SHELL = "SHELL";
31-
static final String ARG = "ARG";
32-
static final String FROM = "FROM";
33-
static final String MAINTAINER = "MAINTAINER";
34-
static final String RUN = "RUN";
35-
static final String CMD = "CMD";
36-
static final String ENTRYPOINT = "ENTRYPOINT";
37-
static final String ENV = "ENV";
38-
static final String ADD = "ADD";
39-
static final String COPY = "COPY";
40-
static final String VOLUME = "VOLUME";
41-
static final String EXPOSE = "EXPOSE";
42-
static final String USER = "USER";
43-
static final String WORKDIR = "WORKDIR";
44-
static final String LABEL = "LABEL";
45-
static final String STOPSIGNAL = "STOPSIGNAL";
46-
static final String HEALTHCHECK = "HEALTHCHECK";
47-
static final String ONBUILD = "ONBUILD";
48-
static final String COMMENT = "#";
30+
public static final String SHELL = "SHELL";
31+
public static final String ARG = "ARG";
32+
public static final String FROM = "FROM";
33+
public static final String MAINTAINER = "MAINTAINER";
34+
public static final String RUN = "RUN";
35+
public static final String CMD = "CMD";
36+
public static final String ENTRYPOINT = "ENTRYPOINT";
37+
public static final String ENV = "ENV";
38+
public static final String ADD = "ADD";
39+
public static final String COPY = "COPY";
40+
public static final String VOLUME = "VOLUME";
41+
public static final String EXPOSE = "EXPOSE";
42+
public static final String USER = "USER";
43+
public static final String WORKDIR = "WORKDIR";
44+
public static final String LABEL = "LABEL";
45+
public static final String STOPSIGNAL = "STOPSIGNAL";
46+
public static final String HEALTHCHECK = "HEALTHCHECK";
47+
public static final String ONBUILD = "ONBUILD";
48+
public static final String COMMENT = "#";
4949
}

src/main/java/com/github/jimschubert/rewrite/docker/internal/ParserState.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,14 @@ public class ParserState {
2727
private Space rightPadding = Space.EMPTY;
2828
private char escapeChar = '\\';
2929
private boolean isContinuation = false;
30+
private Heredoc heredoc = null;
3031

3132
public void reset() {
3233
prefix = Space.EMPTY;
3334
rightPadding = Space.EMPTY;
3435
escapeChar = '\\';
3536
isContinuation = false;
37+
heredoc = null;
3638
}
3739

3840
String getEscapeString() {
@@ -55,6 +57,7 @@ ParserState copy() {
5557
copy.rightPadding = this.rightPadding;
5658
copy.escapeChar = this.escapeChar;
5759
copy.isContinuation = this.isContinuation;
60+
copy.heredoc = this.heredoc;
5861
return copy;
5962
}
6063
}

src/main/java/com/github/jimschubert/rewrite/docker/internal/ParserUtils.java

Lines changed: 218 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,13 @@
1414
*/
1515
package com.github.jimschubert.rewrite.docker.internal;
1616

17-
import com.github.jimschubert.rewrite.docker.tree.Docker;
18-
import com.github.jimschubert.rewrite.docker.tree.Quoting;
17+
import com.github.jimschubert.rewrite.docker.tree.*;
18+
import org.openrewrite.internal.StringUtils;
19+
20+
import java.util.ArrayList;
21+
import java.util.HashSet;
22+
import java.util.List;
23+
import java.util.function.Function;
1924

2025
import static com.github.jimschubert.rewrite.docker.internal.ParserConstants.*;
2126

@@ -62,4 +67,215 @@ public static Docker.KeyArgs stringToKeyArgs(String s) {
6267
}
6368
return new Docker.KeyArgs(stringWithPadding.prefix(), Docker.Literal.build(key), Docker.Literal.build(value), EQUAL.equals(delim), q);
6469
}
70+
71+
public static <T> List<DockerRightPadded<T>> parseElements(String input, String delims, boolean appendRightPadding, ParserState state, Function<String, T> elementCreator) {
72+
List<DockerRightPadded<T>> elements = new ArrayList<>();
73+
StringBuilder currentElement = new StringBuilder();
74+
StringBuilder afterBuilder = new StringBuilder(); // queue up escaped newlines and whitespace as 'after' for previous element
75+
76+
// inCollectible is used to accumulate elements within surrounding characters like single/double quotes, parentheses, etc.
77+
boolean inCollectible = false;
78+
char doubleQuote = DOUBLE_QUOTE.charAt(0);
79+
char singleQuote = SINGLE_QUOTE.charAt(0);
80+
char bracketOpen = '(';
81+
char bracketClose = ')';
82+
char braceOpen = '{';
83+
char braceClose = '}';
84+
char squareBracketOpen = '[';
85+
char squareBracketClose = ']';
86+
char escape = state.escapeChar();
87+
char quote = 0;
88+
char lastChar = 0;
89+
char comma = ',';
90+
91+
// create a lookup of chars from delims
92+
HashSet<Character> delimSet = new HashSet<>();
93+
for (char c : delims.toCharArray()) {
94+
delimSet.add(c);
95+
}
96+
97+
boolean inHeredoc = false;
98+
99+
for (int i = 0; i < input.length(); i++) {
100+
char c = input.charAt(i);
101+
if (inCollectible) {
102+
if ((c == quote || c == bracketClose || c == braceClose || c == squareBracketClose) && lastChar != escape) {
103+
inCollectible = false;
104+
}
105+
currentElement.append(c);
106+
} else {
107+
if (delimSet.contains(c) && (lastChar != escape || (inHeredoc && c == '\n'))) {
108+
if (!StringUtils.isBlank(currentElement.toString())) {
109+
elements.add(DockerRightPadded.build(elementCreator.apply(currentElement.toString()))
110+
.withAfter(Space.EMPTY));
111+
currentElement.setLength(0);
112+
}
113+
// drop comma, assuming we are creating a list of elements
114+
if (c != comma) {
115+
currentElement.append(c);
116+
}
117+
} else {
118+
if (c == doubleQuote || c == singleQuote || c == bracketOpen || c == braceOpen || c == squareBracketOpen) {
119+
inCollectible = true;
120+
quote = c;
121+
}
122+
123+
Heredoc heredoc = state.heredoc();
124+
if (inHeredoc && !elements.isEmpty() && elements.get(elements.size() - 1).getElement() instanceof Docker.Literal) {
125+
Docker.Literal literal = (Docker.Literal) elements.get(elements.size() - 1).getElement();
126+
// allows commands to come after a heredoc. Does not support heredoc within a heredoc or multiple heredocs
127+
128+
if (literal.getText() != null && heredoc != null && literal.getText().equals(heredoc.name())) {
129+
inHeredoc = false;
130+
}
131+
}
132+
133+
// Check if within a heredoc and set escape character to '\n'
134+
if (heredoc != null && c == '\n' && !inHeredoc) {
135+
inHeredoc = true;
136+
afterBuilder.append(c);
137+
if (currentElement.length() > 0 && (
138+
currentElement.toString().endsWith(heredoc.indicator()) || (heredoc.redirectionTo() != null && currentElement.toString().endsWith(heredoc.redirectionTo())))) {
139+
elements.add(DockerRightPadded.build(elementCreator.apply(currentElement.toString()))
140+
.withAfter(Space.build(afterBuilder.toString())));
141+
currentElement.setLength(0);
142+
afterBuilder.setLength(0);
143+
}
144+
145+
lastChar = c;
146+
continue;
147+
} else //noinspection ConstantValue
148+
if (heredoc != null && c == '\n' && inHeredoc) {
149+
// IntelliJ incorrectly flags inHeredoc as a constant 'true', but it's obviously not.
150+
if (!currentElement.toString().endsWith(heredoc.indicator())) {
151+
afterBuilder.append(c);
152+
// this check allows us to accumulate "after" newlines and whitespace after for the last element
153+
if (currentElement.length() > 0) {
154+
elements.add(DockerRightPadded.build(elementCreator.apply(currentElement.toString()))
155+
.withAfter(Space.build(afterBuilder.toString())));
156+
currentElement.setLength(0);
157+
afterBuilder.setLength(0);
158+
}
159+
160+
lastChar = c;
161+
continue;
162+
}
163+
164+
// if we have a heredoc name, we are done with the heredoc
165+
inHeredoc = false;
166+
}
167+
168+
// "peek": if the current character is an escape and the next character is newline or carriage return, 'after' and advance
169+
int nextCharIndex = i + 1;
170+
if (c == escape && nextCharIndex < input.length() && (input.charAt(nextCharIndex) == '\n' || input.charAt(nextCharIndex) == '\r')) {
171+
// if we had already collected some whitespace (only whitespace), add it as 'after' to the last element
172+
if (StringUtils.isBlank(currentElement.toString())) {
173+
afterBuilder.append(currentElement);
174+
currentElement.setLength(0);
175+
}
176+
177+
char next = input.charAt(nextCharIndex);
178+
afterBuilder.append(escape).append(next);
179+
180+
// manually advance
181+
lastChar = next;
182+
i++;
183+
continue;
184+
}
185+
186+
// if 'after' builder is not empty and the character is whitespace, accumulate it
187+
if (afterBuilder.length() > 0 && (c == ' ' || c == '\t' || c == '\n')) {
188+
afterBuilder.append(c);
189+
lastChar = c;
190+
continue;
191+
}
192+
193+
// Drop escape character if it is followed by a space
194+
// other situations will retain the escape character
195+
if (lastChar == escape && c == ' ') {
196+
currentElement.setLength(currentElement.length() - 1);
197+
}
198+
199+
// no longer accumulating a prefix, add as "after" to the last element
200+
if (!elements.isEmpty() && afterBuilder.length() > 0) {
201+
int idx = elements.size() - 1;
202+
DockerRightPadded<T> element = elements.get(idx);
203+
elements.set(idx, element.withAfter(Space.append(element.getAfter(), Space.build(afterBuilder.toString()))));
204+
afterBuilder.setLength(0);
205+
}
206+
207+
// Only collect the current element if we're not "in a prefix" situation
208+
if (afterBuilder.length() == 0) {
209+
currentElement.append(c);
210+
}
211+
}
212+
}
213+
lastChar = c;
214+
}
215+
216+
if (currentElement.length() > 0) {
217+
// if it's whitespace only, add it as "after" to the last element
218+
if (StringUtils.isBlank(currentElement.toString())) {
219+
if (!elements.isEmpty()) {
220+
int idx = elements.size() - 1;
221+
elements.set(idx, elements.get(idx).withAfter(Space.build(currentElement.toString())));
222+
}
223+
} else {
224+
DockerRightPadded<T> element = DockerRightPadded.build(elementCreator.apply(currentElement.toString()));
225+
if (appendRightPadding) {
226+
element = element.withAfter(state.rightPadding());
227+
}
228+
elements.add(element);
229+
}
230+
}
231+
232+
if (afterBuilder.length() > 0) {
233+
int idx = elements.size() - 1;
234+
if (idx >= 0) {
235+
DockerRightPadded<T> element = elements.get(idx);
236+
elements.set(idx, element.withAfter(Space.append(element.getAfter(), Space.build(afterBuilder.toString()))));
237+
}
238+
}
239+
240+
return elements;
241+
}
242+
243+
public static List<DockerRightPadded<Docker.Port>> parsePorts(String input, ParserState state) {
244+
return parseElements(input, SPACE + TAB, true, state, ParserUtils::stringToPorts);
245+
}
246+
247+
public static List<DockerRightPadded<Docker.KeyArgs>> parseArgs(String input, ParserState state) {
248+
return parseElements(input, SPACE + TAB, true, state, ParserUtils::stringToKeyArgs);
249+
}
250+
251+
public static List<DockerRightPadded<Docker.Literal>> parseLiterals(String input, ParserState state) {
252+
return parseElements(input, SPACE, true, state, ParserUtils::createLiteral);
253+
}
254+
255+
public static List<DockerRightPadded<Docker.Literal>> parseLiterals(Form form, String input, ParserState state) {
256+
// appendRightPadding is true for shell form, false for exec form
257+
// exec form is a JSON array, so we need to parse it differently where right padding is after the ']'.
258+
return parseElements(input, form == Form.EXEC ? COMMA : SPACE, form == Form.SHELL, state, ParserUtils::createLiteral);
259+
}
260+
261+
public static Docker.Literal createLiteral(String s) {
262+
if (s == null || s.isEmpty()) {
263+
return null;
264+
}
265+
StringWithPadding stringWithPadding = StringWithPadding.of(s);
266+
String content = stringWithPadding.content();
267+
Quoting q = Quoting.UNQUOTED;
268+
if (content.startsWith(DOUBLE_QUOTE) && content.endsWith(DOUBLE_QUOTE)) {
269+
q = Quoting.DOUBLE_QUOTED;
270+
content = content.substring(1, content.length() - 1);
271+
} else if (content.startsWith(SINGLE_QUOTE) && content.endsWith(SINGLE_QUOTE)) {
272+
q = Quoting.SINGLE_QUOTED;
273+
content = content.substring(1, content.length() - 1);
274+
}
275+
return Docker.Literal.build(
276+
q, stringWithPadding.prefix(),
277+
content,
278+
stringWithPadding.suffix()
279+
);
280+
}
65281
}

0 commit comments

Comments
 (0)