testcontainers
diff --git a/‎modules/database-commons/src/main/java/org/testcontainers/ext/ScriptScanner.java‎
Lines changed: 151 additions & 0 deletions b/‎modules/database-commons/src/main/java/org/testcontainers/ext/ScriptScanner.java‎
Lines changed: 151 additions & 0 deletions
diff --git a/‎modules/database-commons/src/main/java/org/testcontainers/ext/ScriptSplitter.java‎
Lines changed: 127 additions & 0 deletions b/‎modules/database-commons/src/main/java/org/testcontainers/ext/ScriptSplitter.java‎
Lines changed: 127 additions & 0 deletions
@@ -0,0 +1,151 @@
+package org.testcontainers.ext;
+
+import lombok.Getter;
+import lombok.RequiredArgsConstructor;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Rough lexical parser for SQL scripts.
+ */
+@RequiredArgsConstructor
+class ScriptScanner {
+
+    private final String resource;
+
+    private final String script;
+
+    private final String separator;
+
+    private final String commentPrefix;
+
+    private final String blockCommentStartDelimiter;
+
+    private final String blockCommentEndDelimiter;
+
+    private final Pattern eol = Pattern.compile("[\n\r]+");
+
+    private final Pattern whitespace = Pattern.compile("\\s+");
+
+    private final Pattern identifier = Pattern.compile("[a-z][a-z0-9_]*", Pattern.CASE_INSENSITIVE);
+
+    private final Pattern singleQuotedString = Pattern.compile("'(\\\\'|[^'])*'");
+
+    private final Pattern ansiQuotedString = Pattern.compile("\"(\\\\\"|[^\"])*\"");
+
+    private final Pattern dollarQuotedStringDelimiter = Pattern.compile("\\$\\w*\\$");
+
+    private int offset;
+
+    @Getter
+    private String currentMatch;
+
+    private boolean matches(String substring) {
+        if (script.startsWith(substring, offset)) {
+            currentMatch = substring;
+            offset += currentMatch.length();
+            return true;
+        } else {
+            currentMatch = "";
+            return false;
+        }
+    }
+
+    private boolean matches(Pattern regexp) {
+        Matcher m = regexp.matcher(script);
+        if (m.find(offset) && m.start() == offset) {
+            currentMatch = m.group();
+            offset = m.end();
+            return true;
+        } else {
+            currentMatch = "";
+            return false;
+        }
+    }
+
+    private boolean matchesSingleLineComment() {
+        /* Matches from commentPrefix to the EOL or end of script */
+        if (matches(commentPrefix)) {
+            Matcher m = eol.matcher(script);
+            if (m.find(offset)) {
+                currentMatch = commentPrefix + script.substring(offset, m.end());
+                offset = m.end();
+            } else {
+                currentMatch = commentPrefix + script.substring(offset);
+                offset = script.length();
+            }
+            return true;
+        }
+        return false;
+    }
+
+    private boolean matchesMultilineComment() {
+        /* Matches from blockCommentStartDelimiter to the next blockCommentEndDelimiter.
+         * Error, if blockCommentEndDelimiter is not found. */
+        if (matches(blockCommentStartDelimiter)) {
+            int end = script.indexOf(blockCommentEndDelimiter, offset);
+            if (end < 0) {
+                throw new ScriptUtils.ScriptParseException(
+                    String.format("Missing block comment end delimiter [%s].", blockCommentEndDelimiter),
+                    resource
+                );
+            }
+            end += blockCommentEndDelimiter.length();
+            currentMatch = blockCommentStartDelimiter + script.substring(offset, end);
+            offset = end;
+            return true;
+        }
+        return false;
+    }
+
+    private boolean matchesDollarQuotedString() {
+        //Matches $<tag>$ .... $<tag>$
+        if (matches(dollarQuotedStringDelimiter)) {
+            String delimiter = currentMatch;
+            int end = script.indexOf(delimiter, offset);
+            if (end < 0) {
+                throw new ScriptUtils.ScriptParseException(
+                    String.format("Unclosed dollar quoted string [%s].", delimiter),
+                    resource
+                );
+            }
+            end += delimiter.length();
+            currentMatch = delimiter + script.substring(offset, end);
+            offset = end;
+            return true;
+        }
+        return false;
+    }
+
+    Lexem next() {
+        if (offset < script.length()) {
+            if (matches(separator)) {
+                return Lexem.SEPARATOR;
+            } else if (matchesSingleLineComment() || matchesMultilineComment()) {
+                return Lexem.COMMENT;
+            } else if (matches(singleQuotedString) || matches(ansiQuotedString) || matchesDollarQuotedString()) {
+                return Lexem.QUOTED_STRING;
+            } else if (matches(identifier)) {
+                return Lexem.IDENTIFIER;
+            } else if (matches(whitespace)) {
+                return Lexem.WHITESPACE;
+            } else {
+                currentMatch = String.valueOf(script.charAt(offset++));
+                return Lexem.OTHER;
+            }
+        } else {
+            return Lexem.EOF;
+        }
+    }
+
+    enum Lexem {
+        SEPARATOR,
+        COMMENT,
+        QUOTED_STRING,
+        WHITESPACE,
+        IDENTIFIER,
+        OTHER,
+        EOF,
+    }
+}
@@ -0,0 +1,127 @@
+package org.testcontainers.ext;
+
+import lombok.RequiredArgsConstructor;
+import org.apache.commons.lang3.StringUtils;
+import org.testcontainers.ext.ScriptScanner.Lexem;
+
+import java.util.List;
+
+/**
+ * Performs splitting of an SQL script into statements including
+ * basic clean-up.
+ */
+@RequiredArgsConstructor
+class ScriptSplitter {
+
+    private final ScriptScanner scanner;
+
+    private final List<String> statements;
+
+    private final StringBuilder sb = new StringBuilder();
+
+    /**
+     * Standard parsing:
+     * 1. Remove comments
+     * 2. Shrink whitespace and eols
+     * 3. Split on separator
+     */
+    void split() {
+        Lexem l;
+        while ((l = scanner.next()) != Lexem.EOF) {
+            switch (l) {
+                case SEPARATOR:
+                    flushStringBuilder();
+                    break;
+                case COMMENT:
+                    //skip
+                    break;
+                case WHITESPACE:
+                    if (!sb.toString().endsWith(" ")) {
+                        sb.append(' ');
+                    }
+                    break;
+                case IDENTIFIER:
+                    appendMatch();
+                    if ("begin".equalsIgnoreCase(scanner.getCurrentMatch())) {
+                        compoundStatement(false);
+                        flushStringBuilder();
+                    }
+                    break;
+                default:
+                    appendMatch();
+            }
+        }
+        flushStringBuilder();
+    }
+
+    /**
+     * Compound statement ('create procedure') mode:
+     * 1. Do not remove comments
+     * 2. Do not shrink whitespace
+     * 3. Do not split on separators
+     * 3. This mode can be recursive
+     */
+    private void compoundStatement(boolean recursive) {
+        Lexem l;
+        while ((l = scanner.next()) != Lexem.EOF) {
+            appendMatch();
+            if (Lexem.IDENTIFIER.equals(l)) {
+                if ("begin".equalsIgnoreCase(scanner.getCurrentMatch())) {
+                    compoundStatement(true);
+                } else if ("end".equalsIgnoreCase(scanner.getCurrentMatch())) {
+                    if (endOfBlock(recursive)) {
+                        return;
+                    }
+                }
+            }
+        }
+        flushStringBuilder();
+    }
+
+    private boolean endOfBlock(boolean recursive) {
+        Lexem l;
+        StringBuilder temporary = new StringBuilder();
+        while ((l = scanner.next()) != Lexem.EOF) {
+            switch (l) {
+                case COMMENT:
+                case WHITESPACE:
+                    temporary.append(scanner.getCurrentMatch());
+                    break;
+                case SEPARATOR:
+                    //Only whitespace and comments preceded the separator: true end of block
+                    //If it's an internal block, append everything
+                    if (recursive) {
+                        sb.append(temporary);
+                        appendMatch();
+                    }
+                    return true;
+                default:
+                    // Semicolon is not recognized as separator: this means that a custom
+                    // separator is used. Still, 'END;' should be a valid end of block
+                    if (";".equals(scanner.getCurrentMatch())) {
+                        if (recursive) {
+                            sb.append(temporary);
+                            appendMatch();
+                        }
+                        return true;
+                    }
+                    sb.append(temporary);
+                    appendMatch();
+                    return false;
+            }
+        }
+        return true;
+    }
+
+    private void appendMatch() {
+        sb.append(scanner.getCurrentMatch());
+    }
+
+    private void flushStringBuilder() {
+        final String s = sb.toString().trim();
+        if (StringUtils.isNotEmpty(s)) {
+            statements.add(s);
+        }
+        sb.setLength(0);
+    }
+}