Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,26 @@

package org.opensearch.sql.common.antlr;

import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import org.antlr.v4.runtime.BaseErrorListener;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.RecognitionException;
import org.antlr.v4.runtime.Recognizer;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.Vocabulary;
import org.antlr.v4.runtime.misc.IntervalSet;

/**
* Syntax analysis error listener that handles any syntax error by throwing exception with useful
* information.
*/
public class SyntaxAnalysisErrorListener extends BaseErrorListener {
// Show up to this many characters before the offending token in the query.
private static final int CONTEXT_TRUNCATION_THRESHOLD = 20;
// Avoid presenting too many alternatives when many are available.
private static final int SUGGESTION_TRUNCATION_THRESHOLD = 5;

@Override
public void syntaxError(
Expand All @@ -35,8 +42,7 @@ public void syntaxError(
throw new SyntaxCheckException(
String.format(
Locale.ROOT,
"Failed to parse query due to offending symbol [%s] "
+ "at: '%s' <--- HERE... More details: %s",
"[%s] is not a valid term at this part of the query: '%s' <-- HERE. %s",
getOffendingText(offendingToken),
truncateQueryAtOffendingToken(query, offendingToken),
getDetails(recognizer, msg, e)));
Expand All @@ -47,21 +53,47 @@ private String getOffendingText(Token offendingToken) {
}

private String truncateQueryAtOffendingToken(String query, Token offendingToken) {
return query.substring(0, offendingToken.getStopIndex() + 1);
int contextStartIndex = offendingToken.getStartIndex() - CONTEXT_TRUNCATION_THRESHOLD;
if (contextStartIndex < 3) { // The ellipses won't save us anything below the first 4 characters
return query.substring(0, offendingToken.getStopIndex() + 1);
}
return "..." + query.substring(contextStartIndex, offendingToken.getStopIndex() + 1);
}

private List<String> topSuggestions(Recognizer<?, ?> recognizer, IntervalSet continuations) {
Vocabulary vocab = recognizer.getVocabulary();
List<String> tokenNames = new ArrayList<>(SUGGESTION_TRUNCATION_THRESHOLD);
for (int tokenType :
continuations
.toList()
.subList(0, Math.min(continuations.size(), SUGGESTION_TRUNCATION_THRESHOLD))) {
tokenNames.add(vocab.getDisplayName(tokenType));
}
return tokenNames;
}

/**
* As official JavaDoc says, e=null means parser was able to recover from the error. In other
* words, "msg" argument includes the information we want.
*/
private String getDetails(Recognizer<?, ?> recognizer, String msg, RecognitionException e) {
String details;
if (e == null) {
details = msg;
private String getDetails(Recognizer<?, ?> recognizer, String msg, RecognitionException ex) {
if (ex == null) {
// According to the ANTLR docs, ex == null means the parser was able to recover from the
// error.
// In such cases, `msg` includes the raw error information we care about.
return msg;
}

IntervalSet possibleContinuations = ex.getExpectedTokens();
List<String> suggestions = topSuggestions(recognizer, possibleContinuations);

StringBuilder details = new StringBuilder("Expecting ");
if (possibleContinuations.size() > SUGGESTION_TRUNCATION_THRESHOLD) {
details
.append("one of ")
.append(suggestions.size())
.append(" possible tokens. Some examples: ")
.append(String.join(", ", suggestions))
.append(", ...");
} else {
IntervalSet followSet = e.getExpectedTokens();
details = "Expecting tokens in " + followSet.toString(recognizer.getVocabulary());
details.append("tokens: ").append(String.join(", ", suggestions));
}
return details;
return details.toString();
}
}
16 changes: 8 additions & 8 deletions docs/user/dql/troubleshooting.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,22 @@ Query:

.. code-block:: JSON

POST /_plugins/_sql
POST /_plugins/_ppl
{
"query" : "SELECT * FROM sample:data"
"query" : "SOURCE = test_index | where a > 0)"
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This SQL error no longer outputs the same error message (new parsing engine?). I couldn't hit the ANTLR exception with a new SQL query, so I updated it to a PPL one.

}

Result:

.. code-block:: JSON

{
"reason": "Invalid SQL query",
"details": "Failed to parse query due to offending symbol [:] at: 'SELECT * FROM xxx WHERE xxx:' <--- HERE...
More details: Expecting tokens in {<EOF>, 'AND', 'BETWEEN', 'GROUP', 'HAVING', 'IN', 'IS', 'LIKE', 'LIMIT',
'NOT', 'OR', 'ORDER', 'REGEXP', '*', '/', '%', '+', '-', 'DIV', 'MOD', '=', '>', '<', '!',
'|', '&', '^', '.', DOT_ID}",
"type": "SyntaxAnalysisException"
"error": {
"reason": "Invalid Query",
"details": "[)] is not a valid term at this part of the query: '..._index | where a > 0)' <-- HERE. extraneous input ')' expecting <EOF>",
"type": "SyntaxCheckException"
},
"status": 400
}

**Workaround**
Expand Down
Loading