diff --git a/dataset/src/integrationTest/kotlin/com/cosmotech/dataset/service/DatasetServiceIntegrationTest.kt b/dataset/src/integrationTest/kotlin/com/cosmotech/dataset/service/DatasetServiceIntegrationTest.kt index 48ba4e501..56d044f86 100644 --- a/dataset/src/integrationTest/kotlin/com/cosmotech/dataset/service/DatasetServiceIntegrationTest.kt +++ b/dataset/src/integrationTest/kotlin/com/cosmotech/dataset/service/DatasetServiceIntegrationTest.kt @@ -89,6 +89,7 @@ class DatasetServiceIntegrationTest() : CsmTestBase() { val CONNECTED_DEFAULT_USER = "test.user@cosmotech.com" val EMPTY_SOURCE_FILE_NAME = "emptyfile.csv" val CUSTOMER_SOURCE_FILE_NAME = "customers.csv" + val CUSTOMER_ZIPPED_SOURCE_FILE_NAME = "customers.zip" val CUSTOMER_50K_SOURCE_FILE_NAME = "customers_50K.csv" val CUSTOMERS_WITH_QUOTES_SOURCE_FILE_NAME = "customerswithquotes.csv" val CUSTOMERS_WITH_DOUBLE_QUOTES_SOURCE_FILE_NAME = "customerswithdoublequotes.csv" @@ -1567,6 +1568,48 @@ class DatasetServiceIntegrationTest() : CsmTestBase() { exception.message) } + @Test + fun `test createDatasetPart DB with mimetype unsupported`() { + + val datasetCreateRequest = DatasetCreateRequest(name = "Dataset Test") + + val createDataset = + datasetApiService.createDataset( + organizationSaved.id, workspaceSaved.id, datasetCreateRequest, arrayOf()) + + assertTrue(createDataset.parts.isEmpty()) + + val resourceTestFile = + resourceLoader.getResource("classpath:/$CUSTOMER_ZIPPED_SOURCE_FILE_NAME").file + + val fileToSend = FileInputStream(resourceTestFile) + + val mockMultipartFile = + MockMultipartFile( + "file", + CUSTOMER_ZIPPED_SOURCE_FILE_NAME, + MediaType.MULTIPART_FORM_DATA_VALUE, + IOUtils.toByteArray(fileToSend)) + + val exception = + assertThrows { + datasetApiService.createDatasetPart( + organizationSaved.id, + workspaceSaved.id, + createDataset.id, + mockMultipartFile, + DatasetPartCreateRequest( + name = "Customer list", + sourceName = CUSTOMER_ZIPPED_SOURCE_FILE_NAME, + description = "List of customers", + tags = mutableListOf("part", "public", "customers"), + type = DatasetPartTypeEnum.File)) + } + assertEquals( + "MIME type application/zip for file $CUSTOMER_ZIPPED_SOURCE_FILE_NAME is not authorized.", + exception.message) + } + @Test fun `test createDatasetPart with unallowed file name`() { @@ -2386,6 +2429,116 @@ class DatasetServiceIntegrationTest() : CsmTestBase() { constructFilePathForDatasetPart(updatedDataset, 0))) } + @Test + fun `test updateDataset with DB dataset part and mimetype unsupported`() { + + // Create a Dataset with dataset Part + val datasetPartName = "Customers list" + val datasetPartDescription = "List of customers" + val datasetPartTags = mutableListOf("part", "public", "customers") + val datasetPartAdditionalData = + mutableMapOf("part" to "data", "complex" to mutableMapOf("nested" to "data")) + val datasetPartCreateRequest = + DatasetPartCreateRequest( + name = datasetPartName, + sourceName = CUSTOMER_SOURCE_FILE_NAME, + description = datasetPartDescription, + tags = datasetPartTags, + additionalData = datasetPartAdditionalData, + type = DatasetPartTypeEnum.File) + + val datasetName = "Customer Dataset" + val datasetDescription = "Dataset for customers" + val datasetTags = mutableListOf("dataset", "public", "customers") + val datasetAdditionalData = + mutableMapOf("dataset" to "data", "complex" to mutableMapOf("nested" to "data")) + val datasetCreateRequest = + DatasetCreateRequest( + name = datasetName, + description = datasetDescription, + tags = datasetTags, + additionalData = datasetAdditionalData, + parts = mutableListOf(datasetPartCreateRequest)) + + val resourceTestFile = resourceLoader.getResource("classpath:/$CUSTOMER_SOURCE_FILE_NAME").file + + val fileToSend = FileInputStream(resourceTestFile) + + val mockMultipartFile = + MockMultipartFile( + "files", + CUSTOMER_SOURCE_FILE_NAME, + MediaType.MULTIPART_FORM_DATA_VALUE, + IOUtils.toByteArray(fileToSend)) + + val createdDataset = + datasetApiService.createDataset( + organizationSaved.id, + workspaceSaved.id, + datasetCreateRequest, + arrayOf(mockMultipartFile)) + + // Create a DatasetUpdateRequest with new dataset part + val newDatasetPartName = "Product list" + val newDatasetPartDescription = "List of Product" + val newDatasetPartTags = mutableListOf("part", "public", "product") + val newDatasetPartAdditionalData = mutableMapOf("part" to "new data") + val newDatasetPartCreateRequest = + DatasetPartCreateRequest( + name = newDatasetPartName, + sourceName = CUSTOMER_ZIPPED_SOURCE_FILE_NAME, + description = newDatasetPartDescription, + tags = newDatasetPartTags, + additionalData = newDatasetPartAdditionalData, + type = DatasetPartTypeEnum.DB) + + val newDatasetName = "Shop Dataset" + val newDatasetDescription = "Dataset for shop" + val newDatasetTags = mutableListOf("dataset", "public", "shop") + val newDatasetAdditionalData = mutableMapOf("dataset" to "new data") + val newDatasetSecurity = + DatasetSecurity( + default = ROLE_NONE, + accessControlList = + mutableListOf( + DatasetAccessControl(CONNECTED_ADMIN_USER, ROLE_ADMIN), + DatasetAccessControl(CONNECTED_DEFAULT_USER, ROLE_EDITOR))) + val datasetUpdateRequest = + DatasetUpdateRequest( + name = newDatasetName, + description = newDatasetDescription, + tags = newDatasetTags, + additionalData = newDatasetAdditionalData, + parts = mutableListOf(newDatasetPartCreateRequest), + security = newDatasetSecurity) + + val newDatasetPartTestFile = + resourceLoader.getResource("classpath:/$CUSTOMER_ZIPPED_SOURCE_FILE_NAME").file + + val newDatasetPartFileToSend = FileInputStream(newDatasetPartTestFile) + + val newDatasetPartMockMultipartFile = + MockMultipartFile( + "files", + CUSTOMER_ZIPPED_SOURCE_FILE_NAME, + MediaType.MULTIPART_FORM_DATA_VALUE, + IOUtils.toByteArray(newDatasetPartFileToSend)) + + val exception = + assertThrows { + datasetApiService.updateDataset( + organizationSaved.id, + workspaceSaved.id, + createdDataset.id, + datasetUpdateRequest, + arrayOf(newDatasetPartMockMultipartFile)) + } + + assertEquals( + "MIME type application/zip for file $CUSTOMER_ZIPPED_SOURCE_FILE_NAME is not authorized.", + exception.message) + } + @Test fun `test updateDataset with empty body`() { @@ -3033,6 +3186,77 @@ class DatasetServiceIntegrationTest() : CsmTestBase() { exception.message) } + @Test + fun `test replaceDatasetPart with DB dataset part with mimetype unsupported`() { + + // Create a Dataset with dataset Part + val datasetPartCreateRequest = + DatasetPartCreateRequest( + name = "Customers list", + sourceName = CUSTOMER_SOURCE_FILE_NAME, + description = "List of customers", + tags = mutableListOf("part", "public", "customers"), + type = DatasetPartTypeEnum.DB) + + val datasetCreateRequest = + DatasetCreateRequest( + name = "Customer Dataset", + description = "Dataset for customers", + tags = mutableListOf("dataset", "public", "customers"), + parts = mutableListOf(datasetPartCreateRequest)) + + val resourceTestFile = resourceLoader.getResource("classpath:/$CUSTOMER_SOURCE_FILE_NAME").file + + val fileToSend = FileInputStream(resourceTestFile) + + val mockMultipartFile = + MockMultipartFile( + "files", + CUSTOMER_SOURCE_FILE_NAME, + MediaType.MULTIPART_FORM_DATA_VALUE, + IOUtils.toByteArray(fileToSend)) + + val createdDataset = + datasetApiService.createDataset( + organizationSaved.id, + workspaceSaved.id, + datasetCreateRequest, + arrayOf(mockMultipartFile)) + + // Create a DatasetUpdateRequest with new dataset part + val datasetPartUpdateRequest = + DatasetPartUpdateRequest( + sourceName = CUSTOMER_ZIPPED_SOURCE_FILE_NAME, + description = "Dataset for shop", + tags = mutableListOf("dataset", "public", "shop")) + + val wrongTypeTestFile = + resourceLoader.getResource("classpath:/$CUSTOMER_ZIPPED_SOURCE_FILE_NAME").file + + val wrongTypeFileToSend = FileInputStream(wrongTypeTestFile) + + val wrongTypeMockMultipartFile = + MockMultipartFile( + "files", + CUSTOMER_ZIPPED_SOURCE_FILE_NAME, + MediaType.MULTIPART_FORM_DATA_VALUE, + IOUtils.toByteArray(wrongTypeFileToSend)) + + val exception = + assertThrows { + datasetApiService.replaceDatasetPart( + organizationSaved.id, + workspaceSaved.id, + createdDataset.id, + createdDataset.parts[0].id, + wrongTypeMockMultipartFile, + datasetPartUpdateRequest) + } + assertEquals( + "MIME type application/zip for file $CUSTOMER_ZIPPED_SOURCE_FILE_NAME is not authorized.", + exception.message) + } + @Test fun `test replaceDatasetPart with File dataset part with unallowed file name`() { diff --git a/dataset/src/integrationTest/resources/customers.zip b/dataset/src/integrationTest/resources/customers.zip new file mode 100644 index 000000000..68aab99fb Binary files /dev/null and b/dataset/src/integrationTest/resources/customers.zip differ diff --git a/dataset/src/main/kotlin/com/cosmotech/dataset/service/DatasetServiceImpl.kt b/dataset/src/main/kotlin/com/cosmotech/dataset/service/DatasetServiceImpl.kt index 785d3dac2..e4e0f3df0 100644 --- a/dataset/src/main/kotlin/com/cosmotech/dataset/service/DatasetServiceImpl.kt +++ b/dataset/src/main/kotlin/com/cosmotech/dataset/service/DatasetServiceImpl.kt @@ -847,7 +847,6 @@ class DatasetServiceImpl( datasetPartUpdateRequest: DatasetPartUpdateRequest? ): DatasetPart { val dataset = getVerifiedDataset(organizationId, workspaceId, datasetId, PERMISSION_WRITE) - validateFile(file) val datasetPart = datasetPartRepository .findBy(organizationId, workspaceId, datasetId, datasetPartId) @@ -857,6 +856,7 @@ class DatasetServiceImpl( "workspace $workspaceId and dataset $datasetId") } + validateFile(datasetPart.type == DatasetPartTypeEnum.DB, file) val now = Instant.now().toEpochMilli() val userId = getCurrentAccountIdentifier(csmPlatformProperties) val editInfo = DatasetEditInfo(timestamp = now, userId = userId) @@ -977,21 +977,25 @@ class DatasetServiceImpl( "You must upload a file with the same name as the Dataset Part sourceName. " + "You provided ${datasetPartCreateRequest.sourceName} and ${file.originalFilename} instead." } - - validateFile(file) + val isDBFile = datasetPartCreateRequest.type == DatasetPartTypeEnum.DB + validateFile(isDBFile, file) } - private fun validateFile(file: MultipartFile) { + private fun validateFile(isDBFile: Boolean, file: MultipartFile) { val originalFilename = file.originalFilename require(!originalFilename.isNullOrBlank()) { "File name must not be null or blank" } require(!originalFilename.contains("..") && !originalFilename.startsWith("/")) { "Invalid filename: '${originalFilename}'. File name should neither contains '..' nor starts by '/'." } - resourceScanner.scanMimeTypes( - originalFilename, - file.inputStream, - csmPlatformProperties.upload.authorizedMimeTypes.datasets) + if (isDBFile) { + resourceScanner.scanMimeTypes(originalFilename, file.inputStream, listOf("text/csv")) + } else { + resourceScanner.scanMimeTypes( + originalFilename, + file.inputStream, + csmPlatformProperties.upload.authorizedMimeTypes.datasets) + } } private fun validDatasetCreateRequest( @@ -1016,7 +1020,12 @@ class DatasetServiceImpl( "Multipart file names: ${files.map { it.originalFilename }}. " + "Dataset parts source names: ${datasetCreateRequest.parts?.map { it.sourceName }}." } - files.forEach { file -> validateFile(file) } + val parts = datasetCreateRequest.parts + files.forEach { file -> + val isDBFile = + parts?.find { it.sourceName == file.originalFilename }?.type == DatasetPartTypeEnum.DB + validateFile(isDBFile, file) + } } private fun validDatasetUpdateRequest( @@ -1047,7 +1056,12 @@ class DatasetServiceImpl( "Dataset parts source names: ${datasetUpdateRequest.parts?.map { it.sourceName } ?: emptyList()}." } - files.forEach { file -> validateFile(file) } + val parts = datasetUpdateRequest.parts + files.forEach { file -> + val isDBFile = + parts?.find { it.sourceName == file.originalFilename }?.type == DatasetPartTypeEnum.DB + validateFile(isDBFile, file) + } } }