From 62fffe25c41ee4daad9bf94605a1028c061b9cd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Reynard?= Date: Fri, 14 Nov 2025 16:48:18 +0100 Subject: [PATCH] MIME type validation for dataset parts with type DB - Updated `validateFile` method to handle DB-specific MIME type checks. - Enhanced `DatasetServiceImpl` logic to differentiate validation for DB and non-DB dataset parts. - Added integration tests for invalid MIME types for DB dataset parts in `createDatasetPart`, `updateDataset`, and `replaceDatasetPart`. --- .../service/DatasetServiceIntegrationTest.kt | 224 ++++++++++++++++++ .../integrationTest/resources/customers.zip | Bin 0 -> 961 bytes .../dataset/service/DatasetServiceImpl.kt | 34 ++- 3 files changed, 248 insertions(+), 10 deletions(-) create mode 100644 dataset/src/integrationTest/resources/customers.zip diff --git a/dataset/src/integrationTest/kotlin/com/cosmotech/dataset/service/DatasetServiceIntegrationTest.kt b/dataset/src/integrationTest/kotlin/com/cosmotech/dataset/service/DatasetServiceIntegrationTest.kt index 48ba4e501..56d044f86 100644 --- a/dataset/src/integrationTest/kotlin/com/cosmotech/dataset/service/DatasetServiceIntegrationTest.kt +++ b/dataset/src/integrationTest/kotlin/com/cosmotech/dataset/service/DatasetServiceIntegrationTest.kt @@ -89,6 +89,7 @@ class DatasetServiceIntegrationTest() : CsmTestBase() { val CONNECTED_DEFAULT_USER = "test.user@cosmotech.com" val EMPTY_SOURCE_FILE_NAME = "emptyfile.csv" val CUSTOMER_SOURCE_FILE_NAME = "customers.csv" + val CUSTOMER_ZIPPED_SOURCE_FILE_NAME = "customers.zip" val CUSTOMER_50K_SOURCE_FILE_NAME = "customers_50K.csv" val CUSTOMERS_WITH_QUOTES_SOURCE_FILE_NAME = "customerswithquotes.csv" val CUSTOMERS_WITH_DOUBLE_QUOTES_SOURCE_FILE_NAME = "customerswithdoublequotes.csv" @@ -1567,6 +1568,48 @@ class DatasetServiceIntegrationTest() : CsmTestBase() { exception.message) } + @Test + fun `test createDatasetPart DB with mimetype unsupported`() { + + val datasetCreateRequest = DatasetCreateRequest(name = "Dataset Test") + + val createDataset = + datasetApiService.createDataset( + organizationSaved.id, workspaceSaved.id, datasetCreateRequest, arrayOf()) + + assertTrue(createDataset.parts.isEmpty()) + + val resourceTestFile = + resourceLoader.getResource("classpath:/$CUSTOMER_ZIPPED_SOURCE_FILE_NAME").file + + val fileToSend = FileInputStream(resourceTestFile) + + val mockMultipartFile = + MockMultipartFile( + "file", + CUSTOMER_ZIPPED_SOURCE_FILE_NAME, + MediaType.MULTIPART_FORM_DATA_VALUE, + IOUtils.toByteArray(fileToSend)) + + val exception = + assertThrows { + datasetApiService.createDatasetPart( + organizationSaved.id, + workspaceSaved.id, + createDataset.id, + mockMultipartFile, + DatasetPartCreateRequest( + name = "Customer list", + sourceName = CUSTOMER_ZIPPED_SOURCE_FILE_NAME, + description = "List of customers", + tags = mutableListOf("part", "public", "customers"), + type = DatasetPartTypeEnum.File)) + } + assertEquals( + "MIME type application/zip for file $CUSTOMER_ZIPPED_SOURCE_FILE_NAME is not authorized.", + exception.message) + } + @Test fun `test createDatasetPart with unallowed file name`() { @@ -2386,6 +2429,116 @@ class DatasetServiceIntegrationTest() : CsmTestBase() { constructFilePathForDatasetPart(updatedDataset, 0))) } + @Test + fun `test updateDataset with DB dataset part and mimetype unsupported`() { + + // Create a Dataset with dataset Part + val datasetPartName = "Customers list" + val datasetPartDescription = "List of customers" + val datasetPartTags = mutableListOf("part", "public", "customers") + val datasetPartAdditionalData = + mutableMapOf("part" to "data", "complex" to mutableMapOf("nested" to "data")) + val datasetPartCreateRequest = + DatasetPartCreateRequest( + name = datasetPartName, + sourceName = CUSTOMER_SOURCE_FILE_NAME, + description = datasetPartDescription, + tags = datasetPartTags, + additionalData = datasetPartAdditionalData, + type = DatasetPartTypeEnum.File) + + val datasetName = "Customer Dataset" + val datasetDescription = "Dataset for customers" + val datasetTags = mutableListOf("dataset", "public", "customers") + val datasetAdditionalData = + mutableMapOf("dataset" to "data", "complex" to mutableMapOf("nested" to "data")) + val datasetCreateRequest = + DatasetCreateRequest( + name = datasetName, + description = datasetDescription, + tags = datasetTags, + additionalData = datasetAdditionalData, + parts = mutableListOf(datasetPartCreateRequest)) + + val resourceTestFile = resourceLoader.getResource("classpath:/$CUSTOMER_SOURCE_FILE_NAME").file + + val fileToSend = FileInputStream(resourceTestFile) + + val mockMultipartFile = + MockMultipartFile( + "files", + CUSTOMER_SOURCE_FILE_NAME, + MediaType.MULTIPART_FORM_DATA_VALUE, + IOUtils.toByteArray(fileToSend)) + + val createdDataset = + datasetApiService.createDataset( + organizationSaved.id, + workspaceSaved.id, + datasetCreateRequest, + arrayOf(mockMultipartFile)) + + // Create a DatasetUpdateRequest with new dataset part + val newDatasetPartName = "Product list" + val newDatasetPartDescription = "List of Product" + val newDatasetPartTags = mutableListOf("part", "public", "product") + val newDatasetPartAdditionalData = mutableMapOf("part" to "new data") + val newDatasetPartCreateRequest = + DatasetPartCreateRequest( + name = newDatasetPartName, + sourceName = CUSTOMER_ZIPPED_SOURCE_FILE_NAME, + description = newDatasetPartDescription, + tags = newDatasetPartTags, + additionalData = newDatasetPartAdditionalData, + type = DatasetPartTypeEnum.DB) + + val newDatasetName = "Shop Dataset" + val newDatasetDescription = "Dataset for shop" + val newDatasetTags = mutableListOf("dataset", "public", "shop") + val newDatasetAdditionalData = mutableMapOf("dataset" to "new data") + val newDatasetSecurity = + DatasetSecurity( + default = ROLE_NONE, + accessControlList = + mutableListOf( + DatasetAccessControl(CONNECTED_ADMIN_USER, ROLE_ADMIN), + DatasetAccessControl(CONNECTED_DEFAULT_USER, ROLE_EDITOR))) + val datasetUpdateRequest = + DatasetUpdateRequest( + name = newDatasetName, + description = newDatasetDescription, + tags = newDatasetTags, + additionalData = newDatasetAdditionalData, + parts = mutableListOf(newDatasetPartCreateRequest), + security = newDatasetSecurity) + + val newDatasetPartTestFile = + resourceLoader.getResource("classpath:/$CUSTOMER_ZIPPED_SOURCE_FILE_NAME").file + + val newDatasetPartFileToSend = FileInputStream(newDatasetPartTestFile) + + val newDatasetPartMockMultipartFile = + MockMultipartFile( + "files", + CUSTOMER_ZIPPED_SOURCE_FILE_NAME, + MediaType.MULTIPART_FORM_DATA_VALUE, + IOUtils.toByteArray(newDatasetPartFileToSend)) + + val exception = + assertThrows { + datasetApiService.updateDataset( + organizationSaved.id, + workspaceSaved.id, + createdDataset.id, + datasetUpdateRequest, + arrayOf(newDatasetPartMockMultipartFile)) + } + + assertEquals( + "MIME type application/zip for file $CUSTOMER_ZIPPED_SOURCE_FILE_NAME is not authorized.", + exception.message) + } + @Test fun `test updateDataset with empty body`() { @@ -3033,6 +3186,77 @@ class DatasetServiceIntegrationTest() : CsmTestBase() { exception.message) } + @Test + fun `test replaceDatasetPart with DB dataset part with mimetype unsupported`() { + + // Create a Dataset with dataset Part + val datasetPartCreateRequest = + DatasetPartCreateRequest( + name = "Customers list", + sourceName = CUSTOMER_SOURCE_FILE_NAME, + description = "List of customers", + tags = mutableListOf("part", "public", "customers"), + type = DatasetPartTypeEnum.DB) + + val datasetCreateRequest = + DatasetCreateRequest( + name = "Customer Dataset", + description = "Dataset for customers", + tags = mutableListOf("dataset", "public", "customers"), + parts = mutableListOf(datasetPartCreateRequest)) + + val resourceTestFile = resourceLoader.getResource("classpath:/$CUSTOMER_SOURCE_FILE_NAME").file + + val fileToSend = FileInputStream(resourceTestFile) + + val mockMultipartFile = + MockMultipartFile( + "files", + CUSTOMER_SOURCE_FILE_NAME, + MediaType.MULTIPART_FORM_DATA_VALUE, + IOUtils.toByteArray(fileToSend)) + + val createdDataset = + datasetApiService.createDataset( + organizationSaved.id, + workspaceSaved.id, + datasetCreateRequest, + arrayOf(mockMultipartFile)) + + // Create a DatasetUpdateRequest with new dataset part + val datasetPartUpdateRequest = + DatasetPartUpdateRequest( + sourceName = CUSTOMER_ZIPPED_SOURCE_FILE_NAME, + description = "Dataset for shop", + tags = mutableListOf("dataset", "public", "shop")) + + val wrongTypeTestFile = + resourceLoader.getResource("classpath:/$CUSTOMER_ZIPPED_SOURCE_FILE_NAME").file + + val wrongTypeFileToSend = FileInputStream(wrongTypeTestFile) + + val wrongTypeMockMultipartFile = + MockMultipartFile( + "files", + CUSTOMER_ZIPPED_SOURCE_FILE_NAME, + MediaType.MULTIPART_FORM_DATA_VALUE, + IOUtils.toByteArray(wrongTypeFileToSend)) + + val exception = + assertThrows { + datasetApiService.replaceDatasetPart( + organizationSaved.id, + workspaceSaved.id, + createdDataset.id, + createdDataset.parts[0].id, + wrongTypeMockMultipartFile, + datasetPartUpdateRequest) + } + assertEquals( + "MIME type application/zip for file $CUSTOMER_ZIPPED_SOURCE_FILE_NAME is not authorized.", + exception.message) + } + @Test fun `test replaceDatasetPart with File dataset part with unallowed file name`() { diff --git a/dataset/src/integrationTest/resources/customers.zip b/dataset/src/integrationTest/resources/customers.zip new file mode 100644 index 0000000000000000000000000000000000000000..68aab99fb300c6c1f6489c42476ce19025b30dbe GIT binary patch literal 961 zcmWIWW@Zs#-~hrq)mhODP*Bgpz`)C(z>r*8T#}!gT2!o;TwE3!!pp#}_+2nljz>Hb zM3+`@GcdBeWM*Ih6TXvui*8#8w0^f_<-4$k>A-(gSMM(4U(&&SYMYx+_O);@g(zr5 z7caIv9+MNb<;^L+HCf+YfB*Hm>_xNn!upzqYje+?iVJ_$@@1Rj)`#D}hp{&=*mW-X zzVqtCqBifgZZ0UR4OmuocWv2*gYge*3Z`{^|FN_#Z`&O188aB;FZ|ZNomAa$>Vfp^ zYM0-yV%Wcy@hS;U%H866jU{6B;_G+i7Oq>+bJ^!Waj@g8WfLb9PFW(!e(?QSRhyUG zzr?r5M(^1Bt~syz#;VnY^K|Ea-_`zAaxa%j?s>5lRvKR0itJ<}N*yJhzYw_@{g*+J z#ozwwDW(2RAzTw)#_G1EZMY(Ivq$ay!`YwbZpvB6exu?#pPYXNbF`NCH(QUBk+l-N z_Lf3<=i+Yv)!6oz`_mNef0}Bn{ht>3DEhri;xswFdS~jUquUqoe%lZdr@igqgS)Pd z&b?LN7T9h(Sh=7~ernFb3|8$LTkZOmn@o>paQ|E>+HIkgdPq_CeU#>vxcOD-k&n+9 zq`AKJXO=SCtMO~a^6V4EW`XWsa+369-)J-%ALaPWl_d8+ZRv!RcDV^6-@i$o36W3x z)f?(u@ZrX4aTAkghd(^ZaOC~fo%hjryUV7XpS~=ay31IH)9L^H{ihDhH?8UYpZ3X} z&-7{;SI|TkNpV5RClhJ|CNlrMzc97g=V z^1(~2o7`{T3%a;AY3kbx{V6hr0TQM6XB@P2dRtCu|gtqrfledD%1p1Jv@^tL0MU-c$U*}HmkZ{UB8 zibH*;czwc)D}HH3e9PrNUmsA$@}uIw9vz1%@&DKZyxBRv3OP^v%EZ6`$~6Jrj7%cT kh#ZA12g*?}u%r>hA}N0bc(byBG%*4pD5nMhGads202;KST>t<8 literal 0 HcmV?d00001 diff --git a/dataset/src/main/kotlin/com/cosmotech/dataset/service/DatasetServiceImpl.kt b/dataset/src/main/kotlin/com/cosmotech/dataset/service/DatasetServiceImpl.kt index 785d3dac2..e4e0f3df0 100644 --- a/dataset/src/main/kotlin/com/cosmotech/dataset/service/DatasetServiceImpl.kt +++ b/dataset/src/main/kotlin/com/cosmotech/dataset/service/DatasetServiceImpl.kt @@ -847,7 +847,6 @@ class DatasetServiceImpl( datasetPartUpdateRequest: DatasetPartUpdateRequest? ): DatasetPart { val dataset = getVerifiedDataset(organizationId, workspaceId, datasetId, PERMISSION_WRITE) - validateFile(file) val datasetPart = datasetPartRepository .findBy(organizationId, workspaceId, datasetId, datasetPartId) @@ -857,6 +856,7 @@ class DatasetServiceImpl( "workspace $workspaceId and dataset $datasetId") } + validateFile(datasetPart.type == DatasetPartTypeEnum.DB, file) val now = Instant.now().toEpochMilli() val userId = getCurrentAccountIdentifier(csmPlatformProperties) val editInfo = DatasetEditInfo(timestamp = now, userId = userId) @@ -977,21 +977,25 @@ class DatasetServiceImpl( "You must upload a file with the same name as the Dataset Part sourceName. " + "You provided ${datasetPartCreateRequest.sourceName} and ${file.originalFilename} instead." } - - validateFile(file) + val isDBFile = datasetPartCreateRequest.type == DatasetPartTypeEnum.DB + validateFile(isDBFile, file) } - private fun validateFile(file: MultipartFile) { + private fun validateFile(isDBFile: Boolean, file: MultipartFile) { val originalFilename = file.originalFilename require(!originalFilename.isNullOrBlank()) { "File name must not be null or blank" } require(!originalFilename.contains("..") && !originalFilename.startsWith("/")) { "Invalid filename: '${originalFilename}'. File name should neither contains '..' nor starts by '/'." } - resourceScanner.scanMimeTypes( - originalFilename, - file.inputStream, - csmPlatformProperties.upload.authorizedMimeTypes.datasets) + if (isDBFile) { + resourceScanner.scanMimeTypes(originalFilename, file.inputStream, listOf("text/csv")) + } else { + resourceScanner.scanMimeTypes( + originalFilename, + file.inputStream, + csmPlatformProperties.upload.authorizedMimeTypes.datasets) + } } private fun validDatasetCreateRequest( @@ -1016,7 +1020,12 @@ class DatasetServiceImpl( "Multipart file names: ${files.map { it.originalFilename }}. " + "Dataset parts source names: ${datasetCreateRequest.parts?.map { it.sourceName }}." } - files.forEach { file -> validateFile(file) } + val parts = datasetCreateRequest.parts + files.forEach { file -> + val isDBFile = + parts?.find { it.sourceName == file.originalFilename }?.type == DatasetPartTypeEnum.DB + validateFile(isDBFile, file) + } } private fun validDatasetUpdateRequest( @@ -1047,7 +1056,12 @@ class DatasetServiceImpl( "Dataset parts source names: ${datasetUpdateRequest.parts?.map { it.sourceName } ?: emptyList()}." } - files.forEach { file -> validateFile(file) } + val parts = datasetUpdateRequest.parts + files.forEach { file -> + val isDBFile = + parts?.find { it.sourceName == file.originalFilename }?.type == DatasetPartTypeEnum.DB + validateFile(isDBFile, file) + } } }