Skip to content

Commit f675478

Browse files
authored
Added MS SQL support for the dataframe-jdbc module (#689)
* Add support for MS SQL database type and associated tests Added support for MS SQL database type in the util.kt file and created a new file for MS SQL configuration. Additionally, implemented test cases for new support in mssqlTest.kt. * Add sqlQueryLimitOne method to DbType and update test cases Implemented sqlQueryLimitOne method in DbType companion object. This method generates a SQL query that selects one record from a given table. Also, updated the unit tests to accommodate these modifications. * Refactor SQL query limit implementation across databases The SQL query limit behavior has been updated to use a unified method, `sqlQueryLimit`, on different database types, instead of hardcoding this limitation. This ensures a consistent application of these limits across different databases. Also added `TODO` comments to address the nullability issues and points to be checked like filtering system tables and special behavior with catalogues in MSSQL in the future. * Refactored and improved SQL query limit and nullability handling * Ignore MSSQLTest class in test execution * Add MSSQL support and clean up code This commit adds Microsoft SQL Server (MSSQL) library to the dataframe-jdbc project's dependencies. Also, system table filtering has been specifically implemented for MSSQL by adjusting the isSystemTable method. This is a significant improvement over the previous assumption that all DBMS are similar to MySql. * Ignore MSSQLTest class in unit tests * Refactor indentation in Kotlin files * Update comments and fix formatting in MsSql.kt and build.gradle.kts * Refactor code to simplify SQL query construction
1 parent efdbadb commit f675478

File tree

8 files changed

+500
-14
lines changed

8 files changed

+500
-14
lines changed

dataframe-jdbc/build.gradle.kts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ dependencies {
2525
testImplementation(libs.postgresql)
2626
testImplementation(libs.mysql)
2727
testImplementation(libs.h2db)
28+
testImplementation(libs.mssql)
2829
testImplementation(libs.junit)
2930
testImplementation(libs.sl4j)
3031
testImplementation(libs.kotestAssertions) {

dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/DbType.kt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,14 @@ public abstract class DbType(public val dbTypeInJdbcUrl: String) {
5050
* @return The corresponding Kotlin data type, or null if no mapping is found.
5151
*/
5252
public abstract fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType?
53+
54+
/**
55+
* Constructs a SQL query with a limit clause.
56+
*
57+
* @param sqlQuery The original SQL query.
58+
* @param limit The maximum number of rows to retrieve from the query. Default is 1.
59+
* @return A new SQL query with the limit clause added.
60+
*/
61+
public open fun sqlQueryLimit(sqlQuery: String, limit: Int = 1): String =
62+
"$sqlQuery LIMIT $limit"
5363
}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
package org.jetbrains.kotlinx.dataframe.io.db
2+
3+
import org.jetbrains.kotlinx.dataframe.io.TableColumnMetadata
4+
import org.jetbrains.kotlinx.dataframe.io.TableMetadata
5+
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
6+
import java.sql.ResultSet
7+
import java.util.*
8+
import kotlin.reflect.KType
9+
import kotlin.reflect.full.createType
10+
11+
/**
12+
* Represents the MSSQL database type.
13+
*
14+
* This class provides methods to convert data from a ResultSet to the appropriate type for MSSQL,
15+
* and to generate the corresponding column schema.
16+
*/
17+
public object MsSql : DbType("sqlserver") {
18+
override val driverClassName: String
19+
get() = "com.microsoft.sqlserver.jdbc.SQLServerDriver"
20+
21+
override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema? {
22+
return null
23+
}
24+
25+
override fun isSystemTable(tableMetadata: TableMetadata): Boolean {
26+
val locale = Locale.getDefault()
27+
28+
fun String?.containsWithLowercase(substr: String) = this?.lowercase(locale)?.contains(substr) == true
29+
30+
val schemaName = tableMetadata.schemaName
31+
val tableName = tableMetadata.name
32+
val catalogName = tableMetadata.catalogue
33+
34+
return schemaName.containsWithLowercase("sys") ||
35+
schemaName.containsWithLowercase("information_schema") ||
36+
tableName.startsWith("sys") ||
37+
tableName.startsWith("dt") ||
38+
tableName.containsWithLowercase("sys_config") ||
39+
catalogName.containsWithLowercase("system") ||
40+
catalogName.containsWithLowercase("master") ||
41+
catalogName.containsWithLowercase("model") ||
42+
catalogName.containsWithLowercase("msdb") ||
43+
catalogName.containsWithLowercase("tempdb")
44+
}
45+
46+
override fun buildTableMetadata(tables: ResultSet): TableMetadata {
47+
return TableMetadata(
48+
tables.getString("table_name"),
49+
tables.getString("table_schem"),
50+
tables.getString("table_cat")
51+
)
52+
}
53+
54+
override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? {
55+
return null
56+
}
57+
58+
public override fun sqlQueryLimit(sqlQuery: String, limit: Int): String {
59+
sqlQuery.replace("SELECT", "SELECT TOP $limit", ignoreCase = true)
60+
return sqlQuery
61+
}
62+
}

dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/util.kt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@ public fun extractDBTypeFromUrl(url: String?): DbType {
1717
MySql.dbTypeInJdbcUrl in url -> MySql
1818
Sqlite.dbTypeInJdbcUrl in url -> Sqlite
1919
PostgreSql.dbTypeInJdbcUrl in url -> PostgreSql
20+
MsSql.dbTypeInJdbcUrl in url -> MsSql
2021
else -> throw IllegalArgumentException(
2122
"Unsupported database type in the url: $url. " +
22-
"Only H2, MariaDB, MySQL, SQLite and PostgreSQL are supported!"
23+
"Only H2, MariaDB, MySQL, MSSQL, SQLite and PostgreSQL are supported!"
2324
)
2425
}
2526
} else {

dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -137,17 +137,17 @@ public fun DataFrame.Companion.readSqlTable(
137137
limit: Int = DEFAULT_LIMIT,
138138
inferNullability: Boolean = true,
139139
): AnyFrame {
140-
var preparedQuery = "SELECT * FROM $tableName"
141-
if (limit > 0) preparedQuery += " LIMIT $limit"
142-
143140
val url = connection.metaData.url
144141
val dbType = extractDBTypeFromUrl(url)
145142

143+
val selectAllQuery = if (limit > 0) dbType.sqlQueryLimit("SELECT * FROM $tableName", limit)
144+
else "SELECT * FROM $tableName"
145+
146146
connection.createStatement().use { st ->
147147
logger.debug { "Connection with url:$url is established successfully." }
148148

149149
st.executeQuery(
150-
preparedQuery
150+
selectAllQuery
151151
).use { rs ->
152152
val tableColumns = getTableColumnsMetadata(rs)
153153
return fetchAndConvertDataFromResultSet(tableColumns, rs, dbType, limit, inferNullability)
@@ -206,8 +206,7 @@ public fun DataFrame.Companion.readSqlQuery(
206206
val url = connection.metaData.url
207207
val dbType = extractDBTypeFromUrl(url)
208208

209-
var internalSqlQuery = sqlQuery
210-
if (limit > 0) internalSqlQuery += " LIMIT $limit"
209+
val internalSqlQuery = if (limit > 0) dbType.sqlQueryLimit(sqlQuery, limit) else sqlQuery
211210

212211
logger.debug { "Executing SQL query: $internalSqlQuery" }
213212

@@ -340,9 +339,11 @@ public fun DataFrame.Companion.readAllSqlTables(
340339
val table = dbType.buildTableMetadata(tables)
341340
if (!dbType.isSystemTable(table)) {
342341
// we filter her second time because of specific logic with SQLite and possible issues with future databases
343-
// val tableName = if (table.catalogue != null) table.catalogue + "." + table.name else table.name
344-
val tableName = if (catalogue != null) catalogue + "." + table.name else table.name
345-
342+
val tableName = when {
343+
catalogue != null && table.schemaName != null -> "$catalogue.${table.schemaName}.${table.name}"
344+
catalogue != null && table.schemaName == null -> "$catalogue.${table.name}"
345+
else -> table.name
346+
}
346347
// TODO: both cases is schema specified or not in URL
347348
// in h2 database name is recognized as a schema name https://www.h2database.com/html/features.html#database_url
348349
// https://stackoverflow.com/questions/20896935/spring-hibernate-h2-database-schema-not-found
@@ -390,11 +391,12 @@ public fun DataFrame.Companion.getSchemaForSqlTable(
390391
val url = connection.metaData.url
391392
val dbType = extractDBTypeFromUrl(url)
392393

393-
val preparedQuery = "SELECT * FROM $tableName LIMIT 1"
394+
val sqlQuery = "SELECT * FROM $tableName"
395+
val selectFirstRowQuery = dbType.sqlQueryLimit(sqlQuery, limit = 1)
394396

395397
connection.createStatement().use { st ->
396398
st.executeQuery(
397-
preparedQuery
399+
selectFirstRowQuery
398400
).use { rs ->
399401
val tableColumns = getTableColumnsMetadata(rs)
400402
return buildSchemaByTableColumns(tableColumns, dbType)
@@ -555,15 +557,19 @@ private fun getTableColumnsMetadata(rs: ResultSet): MutableList<TableColumnMetad
555557
val schema: String? = rs.statement.connection.schema.takeUnless { it.isNullOrBlank() }
556558

557559
for (i in 1 until numberOfColumns + 1) {
560+
val tableName = metaData.getTableName(i)
561+
val columnName = metaData.getColumnName(i)
562+
563+
// this algorithm works correctly only for SQL Table and ResultSet opened on one SQL table
558564
val columnResultSet: ResultSet =
559-
databaseMetaData.getColumns(catalog, schema, metaData.getTableName(i), metaData.getColumnName(i))
565+
databaseMetaData.getColumns(catalog, schema, tableName, columnName)
560566
val isNullable = if (columnResultSet.next()) {
561567
columnResultSet.getString("IS_NULLABLE") == "YES"
562568
} else {
563569
true // we assume that it's nullable by default
564570
}
565571

566-
val name = manageColumnNameDuplication(columnNameCounter, metaData.getColumnName(i))
572+
val name = manageColumnNameDuplication(columnNameCounter, columnName)
567573
val size = metaData.getColumnDisplaySize(i)
568574
val type = metaData.getColumnTypeName(i)
569575
val jdbcType = metaData.getColumnType(i)

dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2Test.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,7 @@ class JdbcTest {
633633

634634
val saleDataSchema = dataSchemas[1]
635635
saleDataSchema.columns.size shouldBe 3
636+
// TODO: fix nullability
636637
saleDataSchema.columns["amount"]!!.type shouldBe typeOf<BigDecimal>()
637638

638639
val dbConfig = DatabaseConfiguration(url = URL)
@@ -675,6 +676,8 @@ class JdbcTest {
675676
saleDataSchema1.columns["amount"]!!.type shouldBe typeOf<BigDecimal>()
676677
}
677678

679+
// TODO: add the same test for each particular database and refactor the scenario to the common test case
680+
// https://github.com/Kotlin/dataframe/issues/688
678681
@Test
679682
fun `infer nullability`() {
680683
// prepare tables and data

0 commit comments

Comments
 (0)