Skip to content

Commit ecf48c5

Browse files
authored
Improve SQL validation logic and add related test (#1324)
* Improve SQL validation logic and add related test Enhanced `containsForbiddenPatterns` to use regex for stricter checks on SQL keywords and patterns, disallowing forbidden elements like comments and multiple statements. Updated `isValidSqlQuery` documentation for clarity. Added a test to validate reading from a table with column names containing reserved SQL keywords. * Lint * Refactoring
1 parent 09f1bcc commit ecf48c5

File tree

2 files changed

+81
-27
lines changed
  • dataframe-jdbc/src
    • main/kotlin/org/jetbrains/kotlinx/dataframe/io
    • test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2

2 files changed

+81
-27
lines changed

dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt

Lines changed: 60 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -370,35 +370,68 @@ public fun Connection.readDataFrame(
370370
)
371371
}
372372

373+
private val FORBIDDEN_PATTERNS_REGEX = listOf(
374+
";", // Separator for SQL statements
375+
"--", // Single-line comments
376+
"/\\*", // Start of multi-line comments
377+
"\\*/", // End of multi-line comments
378+
"\\bDROP\\b", // DROP as a full word
379+
"\\bDELETE\\b", // DELETE as a full word
380+
"\\bINSERT\\b", // INSERT as a full word
381+
"\\bUPDATE\\b", // UPDATE as a full word
382+
"\\bEXEC\\b", // EXEC as a full word
383+
"\\bEXECUTE\\b", // EXECUTE as a full word
384+
"\\bCREATE\\b", // CREATE as a full word
385+
"\\bALTER\\b", // ALTER as a full word
386+
"\\bGRANT\\b", // GRANT as a full word
387+
"\\bREVOKE\\b", // REVOKE as a full word
388+
"\\bMERGE\\b", // MERGE as a full word
389+
).map { Regex(it, RegexOption.IGNORE_CASE) }
390+
373391
/**
374392
* Checks if a given string contains forbidden patterns or keywords.
375393
* Logs a clear and friendly message if any forbidden pattern is found.
394+
*
395+
* ### Forbidden SQL Examples:
396+
* 1. **Single-line comment** (using `--`):
397+
* - `SELECT * FROM Sale WHERE amount = 100.0 -- AND id = 5`
398+
*
399+
* 2. **Multi-line comment** (using `/* */`):
400+
* - `SELECT * FROM Customer /* Possible malicious comment */ WHERE id = 1`
401+
*
402+
* 3. **Multiple statements separated by semicolon (`;`)**:
403+
* - `SELECT * FROM Sale WHERE amount = 500.0; DROP TABLE Customer`
404+
*
405+
* 4. **Potentially malicious SQL with single quotes for injection**:
406+
* - `SELECT * FROM Sale WHERE id = 1 AND amount = 100.0 OR '1'='1`
407+
*
408+
* 5. **Usage of dangerous commands like `DROP`, `DELETE`, `ALTER`, etc.**:
409+
* - `DROP TABLE Customer; SELECT * FROM Sale`
410+
*
411+
* ### Allowed SQL Examples:
412+
* 1. Query with names containing reserved words as parts of identifiers:
413+
* - `SELECT last_update FROM HELLO_ALTER`
414+
*
415+
* 2. Query with fully valid syntax:
416+
* - `SELECT id, name FROM Customers WHERE age > 25`
417+
*
418+
* 3. Query with identifiers resembling commands but not in forbidden contexts:
419+
* - `SELECT id, amount FROM TRANSACTION_DROP`
420+
*
421+
* 4. Query with case-insensitive identifiers:
422+
* - `select Id, Name from Hello_Table`
423+
*
424+
* ### Key Notes:
425+
* - Reserved keywords like `DROP`, `DELETE`, `ALTER`, etc., are forbidden **only when they appear as standalone commands**.
426+
* - Reserved words as parts of table or column names (e.g., `HELLO_ALTER`, `myDropTable`) **are allowed**.
427+
* - Inline or multi-line comments (`--` or `/* */`) are restricted to prevent potential SQL injection attacks.
428+
* - Multiple SQL statements separated by semicolons (`;`) are not allowed to prevent the execution of unintended commands.
376429
*/
377-
private fun containsForbiddenPatterns(input: String): Boolean {
378-
// List of forbidden patterns or commands
379-
val forbiddenPatterns = listOf(
380-
";", // Separator for SQL statements
381-
"--", // Single-line comments
382-
"/*", // Start of multi-line comments
383-
"*/", // End of multi-line comments
384-
"DROP",
385-
"DELETE",
386-
"INSERT",
387-
"UPDATE",
388-
"EXEC",
389-
"EXECUTE",
390-
"CREATE",
391-
"ALTER",
392-
"GRANT",
393-
"REVOKE",
394-
"MERGE",
395-
)
396-
397-
for (pattern in forbiddenPatterns) {
398-
if (input.contains(pattern)) {
430+
private fun hasForbiddenPatterns(input: String): Boolean {
431+
for (regex in FORBIDDEN_PATTERNS_REGEX) {
432+
if (regex.containsMatchIn(input)) {
399433
logger.error {
400-
"Validation failed: The input contains a forbidden element '$pattern'. " +
401-
"Please review the input: '$input'."
434+
"Validation failed: The input contains a forbidden element matching '${regex.pattern}'. Please review the input: '$input'."
402435
}
403436
return true
404437
}
@@ -408,7 +441,7 @@ private fun containsForbiddenPatterns(input: String): Boolean {
408441

409442
/**
410443
* Validates if the SQL query is safe and starts with SELECT.
411-
* Ensures proper syntax structure, checks for balanced quotes, and disallows dangerous commands or patterns.
444+
* Ensures a proper syntax structure, checks for balanced quotes, and disallows dangerous commands or patterns.
412445
*/
413446
private fun isValidSqlQuery(sqlQuery: String): Boolean {
414447
val normalizedSqlQuery = sqlQuery.trim().uppercase()
@@ -423,7 +456,7 @@ private fun isValidSqlQuery(sqlQuery: String): Boolean {
423456
}
424457

425458
// Validate against forbidden patterns
426-
if (containsForbiddenPatterns(normalizedSqlQuery)) {
459+
if (hasForbiddenPatterns(normalizedSqlQuery)) {
427460
return false
428461
}
429462

@@ -459,7 +492,7 @@ private fun isValidTableName(tableName: String): Boolean {
459492
logger.warn { "Validating SQL table name: '$tableName'" }
460493

461494
// Validate against forbidden patterns
462-
if (containsForbiddenPatterns(normalizedTableName)) {
495+
if (hasForbiddenPatterns(normalizedTableName)) {
463496
return false
464497
}
465498

dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -792,6 +792,27 @@ class JdbcTest {
792792
}
793793
}
794794

795+
@Test
796+
fun `read from table with column name containing the reserved SQL keywords`() {
797+
@Language("SQL")
798+
val createAlterTableQuery = """
799+
CREATE TABLE HELLO_ALTER (
800+
id INT PRIMARY KEY,
801+
last_update TEXT
802+
)
803+
"""
804+
805+
@Language("SQL")
806+
val selectFromWeirdTableSQL = """SELECT last_update from HELLO_ALTER"""
807+
808+
try {
809+
connection.createStatement().execute(createAlterTableQuery)
810+
DataFrame.readSqlQuery(connection, selectFromWeirdTableSQL).rowsCount() shouldBe 0
811+
} finally {
812+
connection.createStatement().execute("DROP TABLE IF EXISTS HELLO_ALTER")
813+
}
814+
}
815+
795816
@Test
796817
fun `read from non-existing jdbc url`() {
797818
shouldThrow<SQLException> {

0 commit comments

Comments
 (0)