Skip to content

Commit 8be7f8b

Browse files
committed
Enhance dataset part handling and improve file operations
- Added the ability to create dataset parts directly from resource files. (internal use) - Introduced new `createDatasetPartFromResource` method in `DatasetApiServiceInterface` for enhanced flexibility. - Refactored S3 file operations to consolidate `uploadFile` logic and handle both `Resource` and `MultipartFile`. - Updated `DatasetServiceImpl` to incorporate new repository methods ensuring optimized queries for dataset and dataset part retrieval. - Revised MIME type scanning across services for consistent validation of uploaded files. - Enhanced `RunnerApiServiceImpl` to use `DatasetApiServiceInterface` for managing dataset parts and introduced logic for inheriting dataset parts in runners. - Enhanced `RunnerDatasets` object to normalize dataset parameters on `Runner` response (GET /runners/<runner_id>
1 parent 817cbed commit 8be7f8b

File tree

20 files changed

+989
-176
lines changed

20 files changed

+989
-176
lines changed

api/src/integrationTest/kotlin/com/cosmotech/api/home/runner/RunnerControllerTests.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ class RunnerControllerTests : ControllerTestBase() {
171171
.andExpect(jsonPath("$.solutionName").value(solutionName))
172172
.andExpect(jsonPath("$.runTemplateName").value(runTemplateName))
173173
.andExpect(jsonPath("$.tags").value(tags))
174-
.andExpect(jsonPath("$.datasetList").value(datasetList))
174+
.andExpect(jsonPath("$.datasets.bases").value(datasetList))
175175
.andExpect(jsonPath("$.security.default").value(ROLE_NONE))
176176
.andExpect(jsonPath("$.runSizing.requests.cpu").value("cpu_requests"))
177177
.andExpect(jsonPath("$.runSizing.requests.memory").value("memory_requests"))

build.gradle.kts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ version = scmVersion.version
5858

5959
// Dependencies version
6060
val kotlinJvmTarget = 21
61-
val cosmotechApiCommonVersion = "2.1.1-SNAPSHOT"
61+
val cosmotechApiCommonVersion = "2.1.2-JREY-split_datasetList_on_runner-SNAPSHOT"
6262
val redisOmSpringVersion = "0.9.7"
6363
val kotlinCoroutinesVersion = "1.10.2"
6464
val oktaSpringBootVersion = "3.0.7"
@@ -128,6 +128,7 @@ allprojects {
128128
configurations { all { resolutionStrategy { force("com.redis.om:redis-om-spring:0.9.10") } } }
129129

130130
repositories {
131+
mavenLocal()
131132
maven {
132133
name = "GitHubPackages"
133134
url = uri("https://maven.pkg.github.com/Cosmo-Tech/cosmotech-api-common")

dataset/src/integrationTest/kotlin/com/cosmotech/dataset/service/DatasetServiceIntegrationTest.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2137,7 +2137,7 @@ class DatasetServiceIntegrationTest() : CsmTestBase() {
21372137
}
21382138

21392139
private fun constructFilePathForDatasetPart(createdDataset: Dataset, partIndex: Int): String =
2140-
"${createdDataset.organizationId}/${createdDataset.workspaceId}/${createdDataset.id}/${createdDataset.parts[partIndex].id}/${createdDataset.parts[partIndex].sourceName}"
2140+
"${createdDataset.organizationId}/${createdDataset.workspaceId}/${createdDataset.id}/${createdDataset.parts[partIndex].id}"
21412141

21422142
fun makeOrganizationCreateRequest(
21432143
name: String = "Organization Name",

dataset/src/main/kotlin/com/cosmotech/dataset/DatasetApiServiceInterface.kt

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import com.cosmotech.dataset.api.DatasetApiService
77
import com.cosmotech.dataset.domain.Dataset
88
import com.cosmotech.dataset.domain.DatasetPart
99
import com.cosmotech.dataset.domain.DatasetPartCreateRequest
10+
import org.springframework.core.io.Resource
1011

1112
interface DatasetApiServiceInterface : DatasetApiService {
1213

@@ -74,4 +75,25 @@ interface DatasetApiServiceInterface : DatasetApiService {
7475
datasetId: String,
7576
datasetPartCreateRequest: DatasetPartCreateRequest
7677
): DatasetPart
78+
79+
/**
80+
* Create a data part of a Dataset from a solution file
81+
*
82+
* @param organizationId the Organization identifier (required)
83+
* @param workspaceId the Workspace identifier (required)
84+
* @param datasetId the Dataset identifier (required)
85+
* @param file Data file to upload (required)
86+
* @param datasetPartCreateRequest (required)
87+
* @return Dataset part successfully created (status code 201) or Bad request - Dataset part
88+
* cannot be created (status code 400) or Insufficient permissions on organization or workspace
89+
* or dataset (status code 403) or Dataset specified is not found (status code 404)
90+
* @see DatasetApi#createDatasetPart
91+
*/
92+
fun createDatasetPartFromResource(
93+
organizationId: kotlin.String,
94+
workspaceId: kotlin.String,
95+
datasetId: kotlin.String,
96+
file: Resource,
97+
datasetPartCreateRequest: DatasetPartCreateRequest
98+
): DatasetPart
7799
}

dataset/src/main/kotlin/com/cosmotech/dataset/part/factories/DatasetPartManagementFactory.kt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@ class DatasetPartManagementFactory(
4242
datasetPartManagementService.storeData(file, datasetPart, overwrite)
4343
}
4444

45+
fun storeData(datasetPart: DatasetPart, file: Resource, overwrite: Boolean = false) {
46+
val datasetPartManagementService = getDatasetPartManagementService(datasetPart.type.value)
47+
datasetPartManagementService.storeData(file, datasetPart, overwrite)
48+
}
49+
4550
fun removeData(datasetPart: DatasetPart) {
4651
val datasetPartManagementService = getDatasetPartManagementService(datasetPart.type.value)
4752
datasetPartManagementService.delete(datasetPart)

dataset/src/main/kotlin/com/cosmotech/dataset/part/services/DatasetPartManagementService.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ interface DatasetPartManagementService {
1616

1717
fun storeData(file: MultipartFile, datasetPart: DatasetPart, overwrite: Boolean)
1818

19+
fun storeData(file: Resource, datasetPart: DatasetPart, overwrite: Boolean)
20+
1921
fun getData(datasetPart: DatasetPart): Resource
2022

2123
fun delete(datasetPart: DatasetPart)

dataset/src/main/kotlin/com/cosmotech/dataset/part/services/FileDatasetPartManagementService.kt

Lines changed: 35 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ package com.cosmotech.dataset.part.services
55
import com.cosmotech.api.config.CsmPlatformProperties
66
import com.cosmotech.dataset.domain.DatasetPart
77
import io.awspring.cloud.s3.S3Template
8+
import java.io.InputStream
89
import org.slf4j.LoggerFactory
9-
import org.springframework.core.io.InputStreamResource
1010
import org.springframework.core.io.Resource
1111
import org.springframework.stereotype.Service
1212
import org.springframework.web.multipart.MultipartFile
@@ -30,19 +30,17 @@ class FileDatasetPartManagementService(
3030
val workspaceId = datasetPart.workspaceId
3131
val datasetId = datasetPart.datasetId
3232
val datasetPartId = datasetPart.id
33-
val fileName = datasetPart.sourceName
34-
val filePath =
35-
constructFilePath(organizationId, workspaceId, datasetId, datasetPartId, fileName)
36-
val fileAlreadyExists = s3Template.objectExists(csmPlatformProperties.s3.bucketName, filePath)
37-
38-
check(overwrite || !fileAlreadyExists) { "File $filePath already exists" }
33+
val filePath = constructFilePath(organizationId, workspaceId, datasetId, datasetPartId)
34+
uploadFile(filePath, overwrite, datasetPartId, file.size, file.inputStream)
35+
}
3936

40-
if (fileAlreadyExists) {
41-
logger.debug("Deleting existing file $filePath before overwriting it")
42-
s3Template.deleteObject(csmPlatformProperties.s3.bucketName, filePath)
43-
}
44-
logger.debug("Saving file ${file.originalFilename} of size ${file.size} to $filePath")
45-
s3Template.upload(csmPlatformProperties.s3.bucketName, filePath, file.inputStream)
37+
override fun storeData(file: Resource, datasetPart: DatasetPart, overwrite: Boolean) {
38+
val organizationId = datasetPart.organizationId
39+
val workspaceId = datasetPart.workspaceId
40+
val datasetId = datasetPart.datasetId
41+
val datasetPartId = datasetPart.id
42+
val filePath = constructFilePath(organizationId, workspaceId, datasetId, datasetPartId)
43+
uploadFile(filePath, overwrite, datasetPartId, file.contentLength(), file.inputStream)
4644
}
4745

4846
override fun getData(datasetPart: DatasetPart): Resource {
@@ -51,12 +49,10 @@ class FileDatasetPartManagementService(
5149
datasetPart.organizationId,
5250
datasetPart.workspaceId,
5351
datasetPart.datasetId,
54-
datasetPart.id,
55-
datasetPart.sourceName)
52+
datasetPart.id)
5653
logger.debug(
5754
"Downloading file resource for dataset part #{} from path {}", datasetPart.id, filePath)
58-
return InputStreamResource(
59-
s3Template.download(csmPlatformProperties.s3.bucketName, filePath).inputStream)
55+
return s3Template.download(csmPlatformProperties.s3.bucketName, filePath)
6056
}
6157

6258
override fun delete(datasetPart: DatasetPart) {
@@ -65,8 +61,7 @@ class FileDatasetPartManagementService(
6561
datasetPart.organizationId,
6662
datasetPart.workspaceId,
6763
datasetPart.datasetId,
68-
datasetPart.id,
69-
datasetPart.sourceName)
64+
datasetPart.id)
7065
logger.debug("Deleting file resource from workspace #{} from path {}", datasetPart.id, filePath)
7166

7267
s3Template.deleteObject(csmPlatformProperties.s3.bucketName, filePath)
@@ -76,7 +71,25 @@ class FileDatasetPartManagementService(
7671
organizationId: String,
7772
workspaceId: String,
7873
datasetId: String,
79-
datasetPartId: String,
80-
fileName: String
81-
) = "$organizationId/$workspaceId/$datasetId/$datasetPartId/$fileName"
74+
datasetPartId: String
75+
) = "$organizationId/$workspaceId/$datasetId/$datasetPartId"
76+
77+
private fun uploadFile(
78+
filePath: String,
79+
overwrite: Boolean,
80+
fileName: String,
81+
fileSize: Long,
82+
file: InputStream
83+
) {
84+
val fileAlreadyExists = s3Template.objectExists(csmPlatformProperties.s3.bucketName, filePath)
85+
86+
check(overwrite || !fileAlreadyExists) { "File $filePath already exists" }
87+
88+
if (fileAlreadyExists) {
89+
logger.debug("Deleting existing file $filePath before overwriting it")
90+
s3Template.deleteObject(csmPlatformProperties.s3.bucketName, filePath)
91+
}
92+
logger.debug("Saving file $fileName of size $fileSize to $filePath")
93+
s3Template.upload(csmPlatformProperties.s3.bucketName, filePath, file)
94+
}
8295
}

dataset/src/main/kotlin/com/cosmotech/dataset/part/services/RelationalDatasetPartManagementService.kt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ class RelationalDatasetPartManagementService : DatasetPartManagementService {
2424
TODO("Not yet implemented")
2525
}
2626

27+
override fun storeData(file: Resource, datasetPart: DatasetPart, overwrite: Boolean) {
28+
TODO("Not yet implemented")
29+
}
30+
2731
override fun getData(datasetPart: DatasetPart): Resource {
2832
logger.debug("RelationalDatasetPartManagementService#getData")
2933
TODO("Not yet implemented")

dataset/src/main/kotlin/com/cosmotech/dataset/service/DatasetServiceImpl.kt

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,9 @@ class DatasetServiceImpl(
232232
.findByOrganizationIdAndWorkspaceId(organizationId, workspaceId, currentUser, it)
233233
.toList()
234234
} else {
235-
datasetRepository.findAll(it).toList()
235+
datasetRepository
236+
.findByOrganizationIdAndWorkspaceIdNoSecurity(organizationId, workspaceId, it)
237+
.toList()
236238
}
237239
}
238240
} else {
@@ -244,7 +246,9 @@ class DatasetServiceImpl(
244246
organizationId, workspaceId, currentUser, pageable)
245247
.toList()
246248
} else {
247-
datasetRepository.findAll(pageable).toList()
249+
datasetRepository
250+
.findByOrganizationIdAndWorkspaceIdNoSecurity(organizationId, workspaceId, pageable)
251+
.toList()
248252
}
249253
}
250254
result.forEach { it.security = updateSecurityVisibility(it).security }
@@ -405,6 +409,21 @@ class DatasetServiceImpl(
405409
return addDatasetPartToDataset(dataset, createdDatasetPart)
406410
}
407411

412+
override fun createDatasetPartFromResource(
413+
organizationId: String,
414+
workspaceId: String,
415+
datasetId: String,
416+
file: Resource,
417+
datasetPartCreateRequest: DatasetPartCreateRequest
418+
): DatasetPart {
419+
val dataset = getVerifiedDataset(organizationId, workspaceId, datasetId, PERMISSION_WRITE)
420+
val createdDatasetPart =
421+
constructDatasetPart(organizationId, workspaceId, datasetId, datasetPartCreateRequest)
422+
datasetPartManagementFactory.storeData(createdDatasetPart, file)
423+
datasetPartRepository.save(createdDatasetPart)
424+
return addDatasetPartToDataset(dataset, createdDatasetPart)
425+
}
426+
408427
override fun constructDatasetPart(
409428
organizationId: String,
410429
workspaceId: String,
@@ -508,7 +527,10 @@ class DatasetServiceImpl(
508527
organizationId, workspaceId, datasetId, currentUser, it)
509528
.toList()
510529
} else {
511-
datasetPartRepository.findAll(it).toList()
530+
datasetPartRepository
531+
.findByOrganizationIdAndWorkspaceIdAndDatasetIdNoSecurity(
532+
organizationId, workspaceId, datasetId, it)
533+
.toList()
512534
}
513535
}
514536
} else {
@@ -520,7 +542,10 @@ class DatasetServiceImpl(
520542
organizationId, workspaceId, datasetId, currentUser, pageable)
521543
.toList()
522544
} else {
523-
datasetPartRepository.findAll(pageable).toList()
545+
datasetPartRepository
546+
.findByOrganizationIdAndWorkspaceIdAndDatasetIdNoSecurity(
547+
organizationId, workspaceId, datasetId, pageable)
548+
.toList()
524549
}
525550
}
526551

@@ -658,7 +683,10 @@ class DatasetServiceImpl(
658683
file.originalFilename?.contains("/") == true)) {
659684
"Invalid filename: '${file.originalFilename}'. '..' and '/' are not allowed"
660685
}
661-
resourceScanner.scanMimeTypes(file, csmPlatformProperties.upload.authorizedMimeTypes.datasets)
686+
resourceScanner.scanMimeTypes(
687+
file.originalFilename!!,
688+
file.inputStream,
689+
csmPlatformProperties.upload.authorizedMimeTypes.datasets)
662690
}
663691

664692
private fun validDatasetPartCreateRequest(
@@ -675,7 +703,10 @@ class DatasetServiceImpl(
675703
file.originalFilename?.contains("/") == true)) {
676704
"Invalid filename: '${file.originalFilename}'. '..' and '/' are not allowed"
677705
}
678-
resourceScanner.scanMimeTypes(file, csmPlatformProperties.upload.authorizedMimeTypes.datasets)
706+
resourceScanner.scanMimeTypes(
707+
file.originalFilename!!,
708+
file.inputStream,
709+
csmPlatformProperties.upload.authorizedMimeTypes.datasets)
679710
}
680711

681712
private fun validDatasetCreateRequest(
@@ -706,7 +737,10 @@ class DatasetServiceImpl(
706737
file.originalFilename?.contains("/") == true)) {
707738
"Invalid filename: '${file.originalFilename}'. '..' and '/' are not allowed"
708739
}
709-
resourceScanner.scanMimeTypes(file, csmPlatformProperties.upload.authorizedMimeTypes.datasets)
740+
resourceScanner.scanMimeTypes(
741+
file.originalFilename!!,
742+
file.inputStream,
743+
csmPlatformProperties.upload.authorizedMimeTypes.datasets)
710744
}
711745
}
712746

@@ -744,7 +778,10 @@ class DatasetServiceImpl(
744778
file.originalFilename?.contains("/") == true)) {
745779
"Invalid filename: '${file.originalFilename}'. '..' and '/' are not allowed"
746780
}
747-
resourceScanner.scanMimeTypes(file, csmPlatformProperties.upload.authorizedMimeTypes.datasets)
781+
resourceScanner.scanMimeTypes(
782+
file.originalFilename!!,
783+
file.inputStream,
784+
csmPlatformProperties.upload.authorizedMimeTypes.datasets)
748785
}
749786
}
750787
}

openapi/plantuml/schemas.plantuml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,7 @@ entity RunnerCreateRequest {
420420
entity RunnerDatasets {
421421
* bases: List<String>
422422
* parameter: String
423+
parameters: List<Object>
423424
}
424425

425426
entity RunnerEditInfo {

0 commit comments

Comments
 (0)