Skip to content

Commit 04d25a5

Browse files
committed
Add searchDatasetParts feature and integrate tests for tag-based search
- Introduced `searchDatasetParts` API endpoint to enable dataset part search by tags. - Updated OpenAPI schema, repository, and service implementation to support tag-based search functionality. - Added integration and RBAC tests to validate search behavior and permission checks. - Enhanced `DatasetServiceIntegrationTest` with test cases for `searchDatasetParts`. - Updated dataset part model and API documentation with detailed tag descriptions.
1 parent 2a5eb5a commit 04d25a5

File tree

9 files changed

+284
-6
lines changed

9 files changed

+284
-6
lines changed

api/src/integrationTest/kotlin/com/cosmotech/api/home/ControllerTestUtils.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,7 @@ class ControllerTestUtils {
434434
return DatasetPartCreateRequest(
435435
name = name,
436436
description = DATASET_PART_DESCRIPTION,
437-
tags = mutableListOf("tag_part1", "tag_part2"),
437+
tags = mutableListOf("tag_part1", "tag_part3"),
438438
type = type,
439439
sourceName = TEST_FILE_NAME)
440440
}

api/src/integrationTest/kotlin/com/cosmotech/api/home/dataset/DatasetControllerTests.kt

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@ class DatasetControllerTests : ControllerTestBase() {
382382
.andExpect(jsonPath("$.updateInfo.userId").value(PLATFORM_ADMIN_EMAIL))
383383
.andExpect(jsonPath("$.updateInfo.timestamp").isNumber)
384384
.andExpect(jsonPath("$.updateInfo.timestamp").value(greaterThan(0.toLong())))
385-
.andExpect(jsonPath("$.tags").value(mutableListOf("tag_part1", "tag_part2")))
385+
.andExpect(jsonPath("$.tags").value(mutableListOf("tag_part1", "tag_part3")))
386386
.andExpect(jsonPath("$.type").value(DatasetPartTypeEnum.File.value))
387387
.andExpect(jsonPath("$.organizationId").value(organizationId))
388388
.andExpect(jsonPath("$.workspaceId").value(workspaceId))
@@ -442,7 +442,7 @@ class DatasetControllerTests : ControllerTestBase() {
442442
.andExpect(jsonPath("$.updateInfo.userId").value(PLATFORM_ADMIN_EMAIL))
443443
.andExpect(jsonPath("$.updateInfo.timestamp").isNumber)
444444
.andExpect(jsonPath("$.updateInfo.timestamp").value(greaterThan(0.toLong())))
445-
.andExpect(jsonPath("$.tags").value(mutableListOf("tag_part1", "tag_part2")))
445+
.andExpect(jsonPath("$.tags").value(mutableListOf("tag_part1", "tag_part3")))
446446
.andExpect(jsonPath("$.type").value(DatasetPartTypeEnum.File.value))
447447
.andExpect(jsonPath("$.organizationId").value(organizationId))
448448
.andExpect(jsonPath("$.workspaceId").value(workspaceId))
@@ -491,7 +491,7 @@ class DatasetControllerTests : ControllerTestBase() {
491491
.andExpect(jsonPath("$[1].updateInfo.userId").value(PLATFORM_ADMIN_EMAIL))
492492
.andExpect(jsonPath("$[1].updateInfo.timestamp").isNumber)
493493
.andExpect(jsonPath("$[1].updateInfo.timestamp").value(greaterThan(0.toLong())))
494-
.andExpect(jsonPath("$[1].tags").value(mutableListOf("tag_part1", "tag_part2")))
494+
.andExpect(jsonPath("$[1].tags").value(mutableListOf("tag_part1", "tag_part3")))
495495
.andExpect(jsonPath("$[1].type").value(DatasetPartTypeEnum.File.value))
496496
.andExpect(jsonPath("$[1].organizationId").value(organizationId))
497497
.andExpect(jsonPath("$[1].workspaceId").value(workspaceId))
@@ -502,6 +502,44 @@ class DatasetControllerTests : ControllerTestBase() {
502502
"organizations/{organization_id}/workspaces/{workspace_id}/datasets/{dataset_id}/parts/GET"))
503503
}
504504

505+
@Test
506+
@WithMockOauth2User
507+
fun search_dataset_parts() {
508+
509+
val datasetId =
510+
createDatasetAndReturnId(mvc, organizationId, workspaceId, constructDatasetCreateRequest())
511+
512+
val datasetPartId =
513+
createDatasetPartAndReturnId(
514+
mvc, organizationId, workspaceId, datasetId, constructDatasetPartCreateRequest())
515+
516+
mvc.perform(
517+
post(
518+
"/organizations/$organizationId/workspaces/$workspaceId/datasets/$datasetId/parts/search")
519+
.contentType(MediaType.APPLICATION_JSON)
520+
.content(JSONArray(listOf("tag_part3")).toString())
521+
.accept(MediaType.APPLICATION_JSON)
522+
.with(csrf()))
523+
.andExpect(status().is2xxSuccessful)
524+
.andDo(MockMvcResultHandlers.print())
525+
.andExpect(jsonPath("$[0].id").value(datasetPartId))
526+
.andExpect(jsonPath("$[0].name").value(DATASET_PART_NAME))
527+
.andExpect(jsonPath("$[0].description").value(DATASET_PART_DESCRIPTION))
528+
.andExpect(jsonPath("$[0].organizationId").value(organizationId))
529+
.andExpect(jsonPath("$[0].workspaceId").value(workspaceId))
530+
.andExpect(jsonPath("$[0].sourceName").value(TEST_FILE_NAME))
531+
.andExpect(jsonPath("$[0].tags").value(mutableListOf("tag_part1", "tag_part3")))
532+
.andExpect(jsonPath("$[0].createInfo.userId").value(PLATFORM_ADMIN_EMAIL))
533+
.andExpect(jsonPath("$[0].createInfo.timestamp").isNumber)
534+
.andExpect(jsonPath("$[0].createInfo.timestamp").value(greaterThan(0.toLong())))
535+
.andExpect(jsonPath("$[0].updateInfo.userId").value(PLATFORM_ADMIN_EMAIL))
536+
.andExpect(jsonPath("$[0].updateInfo.timestamp").isNumber)
537+
.andExpect(jsonPath("$[0].updateInfo.timestamp").value(greaterThan(0.toLong())))
538+
.andDo(
539+
document(
540+
"organizations/{organization_id}/workspaces/{workspace_id}/datasets/{dataset_id}/parts/search/POST"))
541+
}
542+
505543
@Test
506544
@WithMockOauth2User
507545
fun list_dataset_users() {

dataset/src/integrationTest/kotlin/com/cosmotech/dataset/service/DatasetServiceIntegrationTest.kt

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1195,6 +1195,63 @@ class DatasetServiceIntegrationTest() : CsmTestBase() {
11951195
assertEquals(createDatasetPart, retrievedDatasetPart)
11961196
}
11971197

1198+
@Test
1199+
fun `test searchDatasetParts`() {
1200+
1201+
val datasetCreateRequest = DatasetCreateRequest(name = "Dataset Test")
1202+
1203+
val createDataset =
1204+
datasetApiService.createDataset(
1205+
organizationSaved.id, workspaceSaved.id, datasetCreateRequest, arrayOf())
1206+
1207+
assertTrue(createDataset.parts.isEmpty())
1208+
1209+
val resourceTestFile = resourceLoader.getResource("classpath:/$CUSTOMER_SOURCE_FILE_NAME").file
1210+
1211+
val fileToSend = FileInputStream(resourceTestFile)
1212+
1213+
val mockMultipartFile =
1214+
MockMultipartFile(
1215+
"file",
1216+
CUSTOMER_SOURCE_FILE_NAME,
1217+
MediaType.MULTIPART_FORM_DATA_VALUE,
1218+
IOUtils.toByteArray(fileToSend))
1219+
1220+
val datasetPartName = "Customer list"
1221+
val datasetPartDescription = "List of customers"
1222+
val datasetPartTags = mutableListOf("part", "public", "customers")
1223+
1224+
val createDatasetPart =
1225+
datasetApiService.createDatasetPart(
1226+
organizationSaved.id,
1227+
workspaceSaved.id,
1228+
createDataset.id,
1229+
mockMultipartFile,
1230+
DatasetPartCreateRequest(
1231+
name = datasetPartName,
1232+
sourceName = CUSTOMER_SOURCE_FILE_NAME,
1233+
description = datasetPartDescription,
1234+
tags = datasetPartTags,
1235+
type = DatasetPartTypeEnum.File))
1236+
1237+
val foundDatasetParts =
1238+
datasetApiService.searchDatasetParts(
1239+
organizationSaved.id,
1240+
workspaceSaved.id,
1241+
createDataset.id,
1242+
listOf("customers"),
1243+
null,
1244+
null)
1245+
1246+
assertTrue(foundDatasetParts.size == 1)
1247+
assertEquals(createDatasetPart.id, foundDatasetParts[0].id)
1248+
assertEquals(datasetPartName, foundDatasetParts[0].name)
1249+
assertEquals(datasetPartDescription, foundDatasetParts[0].description)
1250+
assertEquals(datasetPartTags, foundDatasetParts[0].tags)
1251+
assertEquals(CUSTOMER_SOURCE_FILE_NAME, foundDatasetParts[0].sourceName)
1252+
assertEquals(DatasetPartTypeEnum.File, foundDatasetParts[0].type)
1253+
}
1254+
11981255
@Test
11991256
fun `test getDatasetPart with wrong id`() {
12001257

dataset/src/integrationTest/kotlin/com/cosmotech/dataset/service/DatasetServiceRBACTest.kt

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -646,6 +646,61 @@ class DatasetServiceRBACTest : CsmTestBase() {
646646
}
647647
}
648648

649+
@TestFactory
650+
fun `test RBAC searchDatasetParts`() =
651+
mapOf(
652+
ROLE_VIEWER to false,
653+
ROLE_EDITOR to false,
654+
ROLE_USER to false,
655+
ROLE_NONE to true,
656+
ROLE_ADMIN to false,
657+
)
658+
.map { (role, shouldThrow) ->
659+
DynamicTest.dynamicTest("Test RBAC searchDatasetParts : $role") {
660+
every { getCurrentAccountIdentifier(any()) } returns CONNECTED_DEFAULT_USER
661+
662+
dataset =
663+
makeDatasetCreateRequest(
664+
datasetSecurity =
665+
DatasetSecurity(
666+
default = ROLE_NONE,
667+
accessControlList =
668+
mutableListOf(
669+
DatasetAccessControl(CONNECTED_ADMIN_USER, ROLE_ADMIN),
670+
DatasetAccessControl(
671+
id = CONNECTED_DEFAULT_USER, role = role))))
672+
datasetSaved =
673+
datasetApiService.createDataset(
674+
organizationSaved.id, workspaceSaved.id, dataset, mockMultipartFiles)
675+
676+
if (shouldThrow) {
677+
val exception =
678+
assertThrows<CsmAccessForbiddenException> {
679+
datasetApiService.searchDatasetParts(
680+
organizationSaved.id,
681+
workspaceSaved.id,
682+
datasetSaved.id,
683+
listOf(),
684+
null,
685+
null)
686+
}
687+
assertEquals(
688+
"RBAC ${datasetSaved.id} - User does not have permission $PERMISSION_READ",
689+
exception.message)
690+
} else {
691+
assertDoesNotThrow {
692+
datasetApiService.searchDatasetParts(
693+
organizationSaved.id,
694+
workspaceSaved.id,
695+
datasetSaved.id,
696+
listOf(),
697+
null,
698+
null)
699+
}
700+
}
701+
}
702+
}
703+
649704
@TestFactory
650705
fun `test RBAC updateDatasetAccessControl`() =
651706
mapOf(

dataset/src/main/kotlin/com/cosmotech/dataset/repositories/DatasetPartRepository.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ interface DatasetPartRepository : RedisDocumentRepository<DatasetPart, String> {
5252
@Sanitize @Param("organizationId") organizationId: String,
5353
@Sanitize @Param("workspaceId") workspaceId: String,
5454
@Sanitize @Param("datasetId") datasetId: String,
55-
@Param("tags") tags: Set<String>,
55+
@Param("tags") tags: List<String>,
5656
pageRequest: PageRequest
5757
): Page<DatasetPart>
5858
}

dataset/src/main/kotlin/com/cosmotech/dataset/service/DatasetServiceImpl.kt

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ import org.springframework.stereotype.Service
4242
import org.springframework.web.multipart.MultipartFile
4343

4444
@Service
45-
@Suppress("TooManyFunctions")
45+
@Suppress("TooManyFunctions", "LargeClass")
4646
class DatasetServiceImpl(
4747
private val workspaceService: WorkspaceApiServiceInterface,
4848
private val datasetRepository: DatasetRepository,
@@ -582,6 +582,37 @@ class DatasetServiceImpl(
582582
return datasetPartRepository.update(datasetPart)
583583
}
584584

585+
override fun searchDatasetParts(
586+
organizationId: String,
587+
workspaceId: String,
588+
datasetId: String,
589+
requestBody: List<String>,
590+
page: Int?,
591+
size: Int?
592+
): List<DatasetPart> {
593+
if (requestBody.isEmpty()) {
594+
return listDatasetParts(organizationId, workspaceId, datasetId, page, size)
595+
}
596+
getVerifiedDataset(organizationId, workspaceId, datasetId)
597+
598+
val defaultPageSize = csmPlatformProperties.twincache.dataset.defaultPageSize
599+
val pageable = constructPageRequest(page, size, defaultPageSize)
600+
val datasetPartList =
601+
if (pageable != null) {
602+
datasetPartRepository
603+
.findDatasetPartByTags(organizationId, workspaceId, datasetId, requestBody, pageable)
604+
.toList()
605+
} else {
606+
findAllPaginated(defaultPageSize) {
607+
datasetPartRepository
608+
.findDatasetPartByTags(organizationId, workspaceId, datasetId, requestBody, it)
609+
.toList()
610+
}
611+
}
612+
613+
return datasetPartList
614+
}
615+
585616
override fun searchDatasets(
586617
organizationId: String,
587618
workspaceId: String,

dataset/src/main/openapi/dataset.yaml

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -779,6 +779,70 @@ paths:
779779
description: Dataset part specified is not found
780780

781781

782+
/organizations/{organization_id}/workspaces/{workspace_id}/datasets/{dataset_id}/parts/search:
783+
parameters:
784+
- name: organization_id
785+
in: path
786+
description: the Organization identifier
787+
required: true
788+
schema:
789+
type: string
790+
- name: workspace_id
791+
in: path
792+
description: the Workspace identifier
793+
required: true
794+
schema:
795+
type: string
796+
- name: dataset_id
797+
in: path
798+
description: the Dataset identifier
799+
required: true
800+
schema:
801+
type: string
802+
post:
803+
parameters:
804+
- name: page
805+
in: query
806+
description: Page number to query (first page is at index 0)
807+
required: false
808+
schema:
809+
type: integer
810+
- name: size
811+
in: query
812+
description: Amount of result by page
813+
required: false
814+
schema:
815+
type: integer
816+
operationId: searchDatasetParts
817+
tags:
818+
- dataset
819+
summary: Search Dataset parts by tags
820+
requestBody:
821+
description: the Dataset parts search parameters
822+
required: true
823+
content:
824+
application/json:
825+
schema:
826+
type: array
827+
description: List of tags
828+
items:
829+
type: string
830+
application/yaml:
831+
schema:
832+
type: array
833+
description: List of tags
834+
items:
835+
type: string
836+
responses:
837+
"200":
838+
description: Dataset part list containing tags
839+
content:
840+
application/json:
841+
schema:
842+
type: array
843+
items:
844+
$ref: '#/components/schemas/DatasetPart'
845+
782846

783847
components:
784848
schemas:
@@ -911,6 +975,7 @@ components:
911975
description:
912976
type: string
913977
tags:
978+
x-field-extra-annotation: "@com.redis.om.spring.annotations.Indexed"
914979
type: array
915980
items:
916981
type: string

doc/Apis/DatasetApi.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ All URIs are relative to *http://localhost:8080*
1919
| [**listDatasets**](DatasetApi.md#listDatasets) | **GET** /organizations/{organization_id}/workspaces/{workspace_id}/datasets | Retrieve a list of defined Dataset |
2020
| [**queryData**](DatasetApi.md#queryData) | **GET** /organizations/{organization_id}/workspaces/{workspace_id}/datasets/{dataset_id}/parts/{dataset_part_id}/query | Get data of a Dataset |
2121
| [**replaceDatasetPart**](DatasetApi.md#replaceDatasetPart) | **PUT** /organizations/{organization_id}/workspaces/{workspace_id}/datasets/{dataset_id}/parts/{dataset_part_id} | Replace existing dataset parts of a Dataset |
22+
| [**searchDatasetParts**](DatasetApi.md#searchDatasetParts) | **POST** /organizations/{organization_id}/workspaces/{workspace_id}/datasets/{dataset_id}/parts/search | Search Dataset parts by tags |
2223
| [**searchDatasets**](DatasetApi.md#searchDatasets) | **POST** /organizations/{organization_id}/workspaces/{workspace_id}/datasets/search | Search Datasets by tags |
2324
| [**updateDataset**](DatasetApi.md#updateDataset) | **PATCH** /organizations/{organization_id}/workspaces/{workspace_id}/datasets/{dataset_id} | Update a Dataset |
2425
| [**updateDatasetAccessControl**](DatasetApi.md#updateDatasetAccessControl) | **PATCH** /organizations/{organization_id}/workspaces/{workspace_id}/datasets/{dataset_id}/security/access/{identity_id} | Update the specified access to User for a Dataset |
@@ -459,6 +460,36 @@ Replace existing dataset parts of a Dataset
459460
- **Content-Type**: multipart/form-data
460461
- **Accept**: application/json, application/yaml
461462

463+
<a name="searchDatasetParts"></a>
464+
# **searchDatasetParts**
465+
> List searchDatasetParts(organization\_id, workspace\_id, dataset\_id, request\_body, page, size)
466+
467+
Search Dataset parts by tags
468+
469+
### Parameters
470+
471+
|Name | Type | Description | Notes |
472+
|------------- | ------------- | ------------- | -------------|
473+
| **organization\_id** | **String**| the Organization identifier | [default to null] |
474+
| **workspace\_id** | **String**| the Workspace identifier | [default to null] |
475+
| **dataset\_id** | **String**| the Dataset identifier | [default to null] |
476+
| **request\_body** | [**List**](../Models/string.md)| the Dataset parts search parameters | |
477+
| **page** | **Integer**| Page number to query (first page is at index 0) | [optional] [default to null] |
478+
| **size** | **Integer**| Amount of result by page | [optional] [default to null] |
479+
480+
### Return type
481+
482+
[**List**](../Models/DatasetPart.md)
483+
484+
### Authorization
485+
486+
[oAuth2AuthCode](../README.md#oAuth2AuthCode)
487+
488+
### HTTP request headers
489+
490+
- **Content-Type**: application/json, application/yaml
491+
- **Accept**: application/json
492+
462493
<a name="searchDatasets"></a>
463494
# **searchDatasets**
464495
> List searchDatasets(organization\_id, workspace\_id, request\_body, page, size)

doc/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ All URIs are relative to *http://localhost:8080*
2222
*DatasetApi* | [**listDatasets**](Apis/DatasetApi.md#listdatasets) | **GET** /organizations/{organization_id}/workspaces/{workspace_id}/datasets | Retrieve a list of defined Dataset |
2323
*DatasetApi* | [**queryData**](Apis/DatasetApi.md#querydata) | **GET** /organizations/{organization_id}/workspaces/{workspace_id}/datasets/{dataset_id}/parts/{dataset_part_id}/query | Get data of a Dataset |
2424
*DatasetApi* | [**replaceDatasetPart**](Apis/DatasetApi.md#replacedatasetpart) | **PUT** /organizations/{organization_id}/workspaces/{workspace_id}/datasets/{dataset_id}/parts/{dataset_part_id} | Replace existing dataset parts of a Dataset |
25+
*DatasetApi* | [**searchDatasetParts**](Apis/DatasetApi.md#searchdatasetparts) | **POST** /organizations/{organization_id}/workspaces/{workspace_id}/datasets/{dataset_id}/parts/search | Search Dataset parts by tags |
2526
*DatasetApi* | [**searchDatasets**](Apis/DatasetApi.md#searchdatasets) | **POST** /organizations/{organization_id}/workspaces/{workspace_id}/datasets/search | Search Datasets by tags |
2627
*DatasetApi* | [**updateDataset**](Apis/DatasetApi.md#updatedataset) | **PATCH** /organizations/{organization_id}/workspaces/{workspace_id}/datasets/{dataset_id} | Update a Dataset |
2728
*DatasetApi* | [**updateDatasetAccessControl**](Apis/DatasetApi.md#updatedatasetaccesscontrol) | **PATCH** /organizations/{organization_id}/workspaces/{workspace_id}/datasets/{dataset_id}/security/access/{identity_id} | Update the specified access to User for a Dataset |

0 commit comments

Comments
 (0)