diff --git a/backend/openapi/specs/data-management.yaml b/backend/openapi/specs/data-management.yaml index b9d26fdb..e505ee57 100644 --- a/backend/openapi/specs/data-management.yaml +++ b/backend/openapi/specs/data-management.yaml @@ -1,852 +1,890 @@ -openapi: 3.0.3 -info: - title: Data Management Service API - description: | - 数据管理服务API,提供数据集的创建、管理和文件操作功能。 - - 主要功能: - - 数据集的创建和管理 - - 多种数据集类型支持(图像、文本、音频、视频、多模态等) - - 数据集文件管理 - - 数据集标签和元数据管理 - - 数据集统计信息 - version: 1.0.0 - -servers: - - url: http://localhost:8092/api/v1/data-management - description: Development server - -tags: - - name: Dataset - description: 数据集管理 - - name: DatasetFile - description: 数据集文件管理 - - name: DatasetType - description: 数据集类型管理 - - name: Tag - description: 标签管理 - -paths: - /data-management/datasets: - get: - tags: [Dataset] - operationId: getDatasets - summary: 获取数据集列表 - description: 分页查询数据集列表,支持按类型、标签等条件筛选 - parameters: - - name: page - in: query - schema: - type: integer - default: 0 - description: 页码,从1开始 - - name: size - in: query - schema: - type: integer - default: 20 - description: 每页大小 - - name: type - in: query - schema: - type: string - description: 数据集类型过滤 - - name: tags - in: query - schema: - type: string - description: 标签过滤,多个标签用逗号分隔 - - name: keyword - in: query - schema: - type: string - description: 关键词搜索(名称、描述) - - name: status - in: query - schema: - type: string - enum: [DRAFT, ACTIVE, PROCESSING, ARCHIVED, PUBLISHED, DEPRECATED] - description: 数据集状态过滤 - responses: - '200': - description: 成功 - content: - application/json: - schema: - $ref: '#/components/schemas/PagedDatasetResponse' - '400': - description: 请求参数错误 - content: - application/json: - schema: - $ref: '#/components/schemas/ErrorResponse' - - post: - tags: [Dataset] - operationId: createDataset - summary: 创建数据集 - description: 创建新的数据集 - requestBody: - required: true - content: - application/json: - schema: - $ref: '#/components/schemas/CreateDatasetRequest' - responses: - '201': - description: 创建成功 - content: - application/json: - schema: - $ref: '#/components/schemas/DatasetResponse' - '400': - description: 请求参数错误 - content: - application/json: - schema: - $ref: '#/components/schemas/ErrorResponse' - - /data-management/datasets/{datasetId}: - get: - tags: [Dataset] - operationId: getDatasetById - summary: 获取数据集详情 - description: 根据ID获取数据集详细信息 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - responses: - '200': - description: 成功 - content: - application/json: - schema: - $ref: '#/components/schemas/DatasetResponse' - '404': - description: 数据集不存在 - content: - application/json: - schema: - $ref: '#/components/schemas/ErrorResponse' - - put: - tags: [Dataset] - summary: 更新数据集 - operationId: updateDataset - description: 更新数据集信息 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - requestBody: - required: true - content: - application/json: - schema: - $ref: '#/components/schemas/UpdateDatasetRequest' - responses: - '200': - description: 更新成功 - content: - application/json: - schema: - $ref: '#/components/schemas/DatasetResponse' - '404': - description: 数据集不存在 - content: - application/json: - schema: - $ref: '#/components/schemas/ErrorResponse' - - delete: - tags: [Dataset] - operationId: deleteDataset - summary: 删除数据集 - description: 删除指定的数据集 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - responses: - '204': - description: 删除成功 - '404': - description: 数据集不存在 - content: - application/json: - schema: - $ref: '#/components/schemas/ErrorResponse' - - /data-management/datasets/{datasetId}/files: - get: - tags: [DatasetFile] - summary: 获取数据集文件列表 - operationId: getDatasetFiles - description: 分页获取数据集中的文件列表 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - - name: page - in: query - schema: - type: integer - default: 0 - description: 页码,从0开始 - - name: size - in: query - schema: - type: integer - default: 20 - description: 每页大小 - - name: fileType - in: query - schema: - type: string - description: 文件类型过滤 - - name: status - in: query - schema: - type: string - enum: [UPLOADED, PROCESSING, COMPLETED, ERROR] - description: 文件状态过滤 - responses: - '200': - description: 成功 - content: - application/json: - schema: - $ref: '#/components/schemas/PagedDatasetFileResponse' - - /data-management/datasets/{datasetId}/files/{fileId}: - get: - tags: [DatasetFile] - summary: 获取文件详情 - description: 获取数据集中指定文件的详细信息 - operationId: getDatasetFileById - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - - name: fileId - in: path - required: true - schema: - type: string - description: 文件ID - responses: - '200': - description: 成功 - content: - application/json: - schema: - $ref: '#/components/schemas/DatasetFileResponse' - - delete: - tags: [DatasetFile] - summary: 删除文件 - operationId: deleteDatasetFile - description: 从数据集中删除指定文件 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - - name: fileId - in: path - required: true - schema: - type: string - description: 文件ID - responses: - '204': - description: 删除成功 - - /data-management/datasets/{datasetId}/files/{fileId}/download: - get: - tags: [DatasetFile] - operationId: downloadDatasetFile - summary: 下载文件 - description: 下载数据集中的指定文件 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - - name: fileId - in: path - required: true - schema: - type: string - description: 文件ID - responses: - '200': - description: 文件内容 - content: - application/octet-stream: - schema: - type: string - format: binary - - /data-management/datasets/{datasetId}/files/download: - get: - tags: [ DatasetFile ] - operationId: downloadDatasetFileAsZip - summary: 下载文件 - description: 下载数据集中全部文件 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - responses: - '200': - description: 文件内容 - content: - application/octet-stream: - schema: - type: string - format: binary - - /data-management/datasets/{datasetId}/files/upload/add: - post: - tags: [ DatasetFile ] - operationId: addFilesToDataset - summary: 添加文件到数据集(仅创建数据库记录) - description: 将指定源文件路径列表添加到数据集,仅在数据库中创建记录,不执行物理文件系统操作。 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - requestBody: - required: true - content: - application/json: - schema: - $ref: '#/components/schemas/AddFilesRequest' - responses: - '200': - description: 添加成功,返回创建的文件记录列表 - content: - application/json: - schema: - type: array - items: - $ref: '#/components/schemas/DatasetFileResponse' - - /data-management/datasets/{datasetId}/files/upload/pre-upload: - post: - tags: [ DatasetFile ] - operationId: preUpload - summary: 切片上传预上传 - description: 预上传接口,返回后续分片上传所需的请求ID - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - requestBody: - required: true - content: - application/json: - schema: - $ref: '#/components/schemas/UploadFilesPreRequest' - responses: - '200': - description: 预上传成功,返回请求ID - content: - application/json: - schema: - type: string - - /data-management/datasets/{datasetId}/files/upload/chunk: - post: - tags: [ DatasetFile ] - operationId: chunkUpload - summary: 切片上传 - description: 使用预上传返回的请求ID进行分片上传 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - requestBody: - required: true - content: - multipart/form-data: - schema: - $ref: '#/components/schemas/UploadFileRequest' - responses: - '200': - description: 上传成功 - - /data-management/dataset-types: - get: - operationId: getDatasetTypes - tags: [DatasetType] - summary: 获取数据集类型列表 - description: 获取所有支持的数据集类型 - responses: - '200': - description: 成功 - content: - application/json: - schema: - type: array - items: - $ref: '#/components/schemas/DatasetTypeResponse' - - /data-management/tags: - get: - tags: [Tag] - operationId: getTags - summary: 获取标签列表 - description: 获取所有可用的标签 - parameters: - - name: keyword - in: query - schema: - type: string - description: 标签名称关键词搜索 - responses: - '200': - description: 成功 - content: - application/json: - schema: - type: array - items: - $ref: '#/components/schemas/TagResponse' - - post: - tags: [Tag] - operationId: createTag - summary: 创建标签 - description: 创建新的标签 - requestBody: - required: true - content: - application/json: - schema: - $ref: '#/components/schemas/CreateTagRequest' - responses: - '201': - description: 创建成功 - content: - application/json: - schema: - $ref: '#/components/schemas/TagResponse' - - /data-management/datasets/{datasetId}/statistics: - get: - tags: [Dataset] - operationId: getDatasetStatistics - summary: 获取数据集统计信息 - description: 获取数据集的统计信息(文件数量、大小、完成度等) - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - responses: - '200': - description: 成功 - content: - application/json: - schema: - $ref: '#/components/schemas/DatasetStatisticsResponse' - -components: - schemas: - PagedDatasetResponse: - type: object - properties: - content: - type: array - items: - $ref: '#/components/schemas/DatasetResponse' - page: - type: integer - description: 当前页码 - size: - type: integer - description: 每页大小 - totalElements: - type: integer - description: 总元素数 - totalPages: - type: integer - description: 总页数 - first: - type: boolean - description: 是否为第一页 - last: - type: boolean - description: 是否为最后一页 - - DatasetResponse: - type: object - properties: - id: - type: string - description: 数据集ID - name: - type: string - description: 数据集名称 - description: - type: string - description: 数据集描述 - type: - $ref: '#/components/schemas/DatasetTypeResponse' - status: - type: string - enum: [ACTIVE, INACTIVE, PROCESSING] - description: 数据集状态 - tags: - type: array - items: - $ref: '#/components/schemas/TagResponse' - description: 标签列表 - dataSource: - type: string - description: 数据源 - targetLocation: - type: string - description: 目标位置 - fileCount: - type: integer - description: 文件数量 - totalSize: - type: integer - format: int64 - description: 总大小(字节) - completionRate: - type: number - format: float - description: 完成率(0-100) - createdAt: - type: string - format: date-time - description: 创建时间 - updatedAt: - type: string - format: date-time - description: 更新时间 - createdBy: - type: string - description: 创建者 - - CreateDatasetRequest: - type: object - required: - - name - - type - properties: - name: - type: string - description: 数据集名称 - minLength: 1 - maxLength: 100 - description: - type: string - description: 数据集描述 - maxLength: 500 - type: - type: string - description: 数据集类型 - tags: - type: array - items: - type: string - description: 标签列表 - dataSource: - type: string - description: 数据源 - targetLocation: - type: string - description: 目标位置 - - UpdateDatasetRequest: - type: object - properties: - name: - type: string - description: 数据集名称 - maxLength: 100 - description: - type: string - description: 数据集描述 - maxLength: 500 - tags: - type: array - items: - type: string - description: 标签列表 - status: - type: string - enum: [DRAFT, ACTIVE, PROCESSING, ARCHIVED, PUBLISHED, DEPRECATED] - description: 数据集状态 - - UploadFilesPreRequest: - type: object - description: 切片上传预上传请求 - properties: - hasArchive: - type: boolean - description: 是否为压缩包上传 - default: false - totalFileNum: - type: integer - format: int32 - minimum: 1 - description: 总文件数量 - totalSize: - type: integer - format: int64 - description: 总文件大小(字节) - required: [ totalFileNum ] - - UploadFileRequest: - type: object - description: 分片上传请求 - properties: - reqId: - type: string - description: 预上传返回的请求ID - fileNo: - type: integer - format: int32 - description: 文件编号(批量中的第几个) - fileName: - type: string - description: 文件名称 - totalChunkNum: - type: integer - format: int32 - description: 文件总分片数量 - chunkNo: - type: integer - format: int32 - description: 当前分片编号(从1开始) - file: - type: string - format: binary - description: 分片二进制内容 - checkSumHex: - type: string - description: 分片校验和(十六进制) - required: [ reqId, fileNo, fileName, totalChunkNum, chunkNo, file ] - - DatasetTypeResponse: - type: object - properties: - code: - type: string - description: 类型编码 - name: - type: string - description: 类型名称 - description: - type: string - description: 类型描述 - supportedFormats: - type: array - items: - type: string - description: 支持的文件格式 - icon: - type: string - description: 图标 - - PagedDatasetFileResponse: - type: object - properties: - content: - type: array - items: - $ref: '#/components/schemas/DatasetFileResponse' - page: - type: integer - description: 当前页码 - size: - type: integer - description: 每页大小 - totalElements: - type: integer - description: 总元素数 - totalPages: - type: integer - description: 总页数 - first: - type: boolean - description: 是否为第一页 - last: - type: boolean - description: 是否为最后一页 - - DatasetFileResponse: - type: object - properties: - id: - type: string - description: 文件ID - fileName: - type: string - description: 文件名 - originalName: - type: string - description: 原始文件名 - fileType: - type: string - description: 文件类型 - fileSize: - type: integer - format: int64 - description: 文件大小(字节) - status: - type: string - enum: [UPLOADED, PROCESSING, COMPLETED, ERROR] - description: 文件状态 - description: - type: string - description: 文件描述 - filePath: - type: string - description: 文件路径 - uploadTime: - type: string - format: date-time - description: 上传时间 - uploadedBy: - type: string - description: 上传者 - - TagResponse: - type: object - properties: - id: - type: string - description: 标签ID - name: - type: string - description: 标签名称 - color: - type: string - description: 标签颜色 - description: - type: string - description: 标签描述 - usageCount: - type: integer - description: 使用次数 - - CreateTagRequest: - type: object - required: - - name - properties: - name: - type: string - description: 标签名称 - minLength: 1 - maxLength: 50 - color: - type: string - description: 标签颜色 - pattern: '^#[0-9A-Fa-f]{6}$' - description: - type: string - description: 标签描述 - maxLength: 200 - - DatasetStatisticsResponse: - type: object - properties: - totalFiles: - type: integer - description: 总文件数 - completedFiles: - type: integer - description: 已完成文件数 - totalSize: - type: integer - format: int64 - description: 总大小(字节) - completionRate: - type: number - format: float - description: 完成率(0-100) - fileTypeDistribution: - type: object - additionalProperties: - type: integer - description: 文件类型分布 - statusDistribution: - type: object - additionalProperties: - type: integer - description: 状态分布 - - ErrorResponse: - type: object - properties: - error: - type: string - description: 错误代码 - message: - type: string - description: 错误消息 - timestamp: - type: string - format: date-time - description: 错误时间 - path: - type: string - description: 请求路径 - - AddFilesRequest: - type: object - description: 将源文件路径添加到数据集的请求 - properties: - sourcePaths: - type: array - items: - type: string - description: 源文件路径列表(相对或绝对路径),每个元素表示一个要添加的文件或目录路径 - softAdd: - type: boolean - description: 如果为 true,则仅在数据库中创建记录(默认 false) - default: false - required: - - sourcePaths +openapi: 3.0.3 +info: + title: Data Management Service API + description: | + 数据管理服务API,提供数据集的创建、管理和文件操作功能。 + + 主要功能: + - 数据集的创建和管理 + - 多种数据集类型支持(图像、文本、音频、视频、多模态等) + - 数据集文件管理 + - 数据集标签和元数据管理 + - 数据集统计信息 + version: 1.0.0 + +servers: + - url: http://localhost:8092/api/v1/data-management + description: Development server + +tags: + - name: Dataset + description: 数据集管理 + - name: DatasetFile + description: 数据集文件管理 + - name: DatasetType + description: 数据集类型管理 + - name: Tag + description: 标签管理 + +paths: + /data-management/datasets: + get: + tags: [Dataset] + operationId: getDatasets + summary: 获取数据集列表 + description: 分页查询数据集列表,支持按类型、标签等条件筛选 + parameters: + - name: page + in: query + schema: + type: integer + default: 0 + description: 页码,从1开始 + - name: size + in: query + schema: + type: integer + default: 20 + description: 每页大小 + - name: type + in: query + schema: + type: string + description: 数据集类型过滤 + - name: tags + in: query + schema: + type: string + description: 标签过滤,多个标签用逗号分隔 + - name: keyword + in: query + schema: + type: string + description: 关键词搜索(名称、描述) + - name: status + in: query + schema: + type: string + enum: [DRAFT, ACTIVE, PROCESSING, ARCHIVED, PUBLISHED, DEPRECATED] + description: 数据集状态过滤 + responses: + '200': + description: 成功 + content: + application/json: + schema: + $ref: '#/components/schemas/PagedDatasetResponse' + '400': + description: 请求参数错误 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + post: + tags: [Dataset] + operationId: createDataset + summary: 创建数据集 + description: 创建新的数据集 + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateDatasetRequest' + responses: + '201': + description: 创建成功 + content: + application/json: + schema: + $ref: '#/components/schemas/DatasetResponse' + '400': + description: 请求参数错误 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + /data-management/datasets/{datasetId}: + get: + tags: [Dataset] + operationId: getDatasetById + summary: 获取数据集详情 + description: 根据ID获取数据集详细信息 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + responses: + '200': + description: 成功 + content: + application/json: + schema: + $ref: '#/components/schemas/DatasetResponse' + '404': + description: 数据集不存在 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + put: + tags: [Dataset] + summary: 更新数据集 + operationId: updateDataset + description: 更新数据集信息 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/UpdateDatasetRequest' + responses: + '200': + description: 更新成功 + content: + application/json: + schema: + $ref: '#/components/schemas/DatasetResponse' + '404': + description: 数据集不存在 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + delete: + tags: [Dataset] + operationId: deleteDataset + summary: 删除数据集 + description: 删除指定的数据集 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + responses: + '204': + description: 删除成功 + '404': + description: 数据集不存在 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + /data-management/datasets/{datasetId}/files: + get: + tags: [DatasetFile] + summary: 获取数据集文件列表 + operationId: getDatasetFiles + description: 分页获取数据集中的文件列表 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + - name: page + in: query + schema: + type: integer + default: 0 + description: 页码,从0开始 + - name: size + in: query + schema: + type: integer + default: 20 + description: 每页大小 + - name: fileType + in: query + schema: + type: string + description: 文件类型过滤 + - name: status + in: query + schema: + type: string + enum: [UPLOADED, PROCESSING, COMPLETED, ERROR] + description: 文件状态过滤 + responses: + '200': + description: 成功 + content: + application/json: + schema: + $ref: '#/components/schemas/PagedDatasetFileResponse' + + /data-management/datasets/{datasetId}/files/directories: + post: + tags: [ DatasetFile ] + operationId: createDirectory + summary: 在数据集下创建子目录 + description: 在指定数据集下的某个前缀路径中创建一个新的子目录 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateDirectoryRequest' + responses: + '200': + description: 创建成功 + + /data-management/datasets/{datasetId}/files/{fileId}: + get: + tags: [DatasetFile] + summary: 获取文件详情 + description: 获取数据集中指定文件的详细信息 + operationId: getDatasetFileById + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + - name: fileId + in: path + required: true + schema: + type: string + description: 文件ID + responses: + '200': + description: 成功 + content: + application/json: + schema: + $ref: '#/components/schemas/DatasetFileResponse' + + delete: + tags: [DatasetFile] + summary: 删除文件 + operationId: deleteDatasetFile + description: 从数据集中删除指定文件 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + - name: fileId + in: path + required: true + schema: + type: string + description: 文件ID + responses: + '204': + description: 删除成功 + + /data-management/datasets/{datasetId}/files/{fileId}/download: + get: + tags: [DatasetFile] + operationId: downloadDatasetFile + summary: 下载文件 + description: 下载数据集中的指定文件 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + - name: fileId + in: path + required: true + schema: + type: string + description: 文件ID + responses: + '200': + description: 文件内容 + content: + application/octet-stream: + schema: + type: string + format: binary + + /data-management/datasets/{datasetId}/files/download: + get: + tags: [ DatasetFile ] + operationId: downloadDatasetFileAsZip + summary: 下载文件 + description: 下载数据集中全部文件 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + responses: + '200': + description: 文件内容 + content: + application/octet-stream: + schema: + type: string + format: binary + + /data-management/datasets/{datasetId}/files/upload/add: + post: + tags: [ DatasetFile ] + operationId: addFilesToDataset + summary: 添加文件到数据集(仅创建数据库记录) + description: 将指定源文件路径列表添加到数据集,仅在数据库中创建记录,不执行物理文件系统操作。 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/AddFilesRequest' + responses: + '200': + description: 添加成功,返回创建的文件记录列表 + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/DatasetFileResponse' + + /data-management/datasets/{datasetId}/files/upload/pre-upload: + post: + tags: [ DatasetFile ] + operationId: preUpload + summary: 切片上传预上传 + description: 预上传接口,返回后续分片上传所需的请求ID + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/UploadFilesPreRequest' + responses: + '200': + description: 预上传成功,返回请求ID + content: + application/json: + schema: + type: string + + /data-management/datasets/{datasetId}/files/upload/chunk: + post: + tags: [ DatasetFile ] + operationId: chunkUpload + summary: 切片上传 + description: 使用预上传返回的请求ID进行分片上传 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + requestBody: + required: true + content: + multipart/form-data: + schema: + $ref: '#/components/schemas/UploadFileRequest' + responses: + '200': + description: 上传成功 + + /data-management/dataset-types: + get: + operationId: getDatasetTypes + tags: [DatasetType] + summary: 获取数据集类型列表 + description: 获取所有支持的数据集类型 + responses: + '200': + description: 成功 + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/DatasetTypeResponse' + + /data-management/tags: + get: + tags: [Tag] + operationId: getTags + summary: 获取标签列表 + description: 获取所有可用的标签 + parameters: + - name: keyword + in: query + schema: + type: string + description: 标签名称关键词搜索 + responses: + '200': + description: 成功 + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/TagResponse' + + post: + tags: [Tag] + operationId: createTag + summary: 创建标签 + description: 创建新的标签 + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateTagRequest' + responses: + '201': + description: 创建成功 + content: + application/json: + schema: + $ref: '#/components/schemas/TagResponse' + + /data-management/datasets/{datasetId}/statistics: + get: + tags: [Dataset] + operationId: getDatasetStatistics + summary: 获取数据集统计信息 + description: 获取数据集的统计信息(文件数量、大小、完成度等) + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + responses: + '200': + description: 成功 + content: + application/json: + schema: + $ref: '#/components/schemas/DatasetStatisticsResponse' + +components: + schemas: + PagedDatasetResponse: + type: object + properties: + content: + type: array + items: + $ref: '#/components/schemas/DatasetResponse' + page: + type: integer + description: 当前页码 + size: + type: integer + description: 每页大小 + totalElements: + type: integer + description: 总元素数 + totalPages: + type: integer + description: 总页数 + first: + type: boolean + description: 是否为第一页 + last: + type: boolean + description: 是否为最后一页 + + DatasetResponse: + type: object + properties: + id: + type: string + description: 数据集ID + name: + type: string + description: 数据集名称 + description: + type: string + description: 数据集描述 + type: + $ref: '#/components/schemas/DatasetTypeResponse' + status: + type: string + enum: [ACTIVE, INACTIVE, PROCESSING] + description: 数据集状态 + tags: + type: array + items: + $ref: '#/components/schemas/TagResponse' + description: 标签列表 + dataSource: + type: string + description: 数据源 + targetLocation: + type: string + description: 目标位置 + fileCount: + type: integer + description: 文件数量 + totalSize: + type: integer + format: int64 + description: 总大小(字节) + completionRate: + type: number + format: float + description: 完成率(0-100) + createdAt: + type: string + format: date-time + description: 创建时间 + updatedAt: + type: string + format: date-time + description: 更新时间 + createdBy: + type: string + description: 创建者 + + CreateDatasetRequest: + type: object + required: + - name + - type + properties: + name: + type: string + description: 数据集名称 + minLength: 1 + maxLength: 100 + description: + type: string + description: 数据集描述 + maxLength: 500 + type: + type: string + description: 数据集类型 + tags: + type: array + items: + type: string + description: 标签列表 + dataSource: + type: string + description: 数据源 + targetLocation: + type: string + description: 目标位置 + + UpdateDatasetRequest: + type: object + properties: + name: + type: string + description: 数据集名称 + maxLength: 100 + description: + type: string + description: 数据集描述 + maxLength: 500 + tags: + type: array + items: + type: string + description: 标签列表 + status: + type: string + enum: [DRAFT, ACTIVE, PROCESSING, ARCHIVED, PUBLISHED, DEPRECATED] + description: 数据集状态 + + UploadFilesPreRequest: + type: object + description: 切片上传预上传请求 + properties: + hasArchive: + type: boolean + description: 是否为压缩包上传 + default: false + totalFileNum: + type: integer + format: int32 + minimum: 1 + description: 总文件数量 + totalSize: + type: integer + format: int64 + description: 总文件大小(字节) + prefix: + type: string + description: 目标子目录前缀,例如 "images/",为空表示数据集根目录 + required: [ totalFileNum ] + + CreateDirectoryRequest: + type: object + description: 创建数据集子目录请求 + properties: + parentPrefix: + type: string + description: 父级前缀路径,例如 "images/",为空表示数据集根目录 + directoryName: + type: string + description: 新建目录名称 + required: [ directoryName ] + + UploadFileRequest: + type: object + description: 分片上传请求 + properties: + reqId: + type: string + description: 预上传返回的请求ID + fileNo: + type: integer + format: int32 + description: 文件编号(批量中的第几个) + fileName: + type: string + description: 文件名称 + totalChunkNum: + type: integer + format: int32 + description: 文件总分片数量 + chunkNo: + type: integer + format: int32 + description: 当前分片编号(从1开始) + file: + type: string + format: binary + description: 分片二进制内容 + checkSumHex: + type: string + description: 分片校验和(十六进制) + required: [ reqId, fileNo, fileName, totalChunkNum, chunkNo, file ] + + DatasetTypeResponse: + type: object + properties: + code: + type: string + description: 类型编码 + name: + type: string + description: 类型名称 + description: + type: string + description: 类型描述 + supportedFormats: + type: array + items: + type: string + description: 支持的文件格式 + icon: + type: string + description: 图标 + + PagedDatasetFileResponse: + type: object + properties: + content: + type: array + items: + $ref: '#/components/schemas/DatasetFileResponse' + page: + type: integer + description: 当前页码 + size: + type: integer + description: 每页大小 + totalElements: + type: integer + description: 总元素数 + totalPages: + type: integer + description: 总页数 + first: + type: boolean + description: 是否为第一页 + last: + type: boolean + description: 是否为最后一页 + + DatasetFileResponse: + type: object + properties: + id: + type: string + description: 文件ID + fileName: + type: string + description: 文件名 + originalName: + type: string + description: 原始文件名 + fileType: + type: string + description: 文件类型 + fileSize: + type: integer + format: int64 + description: 文件大小(字节) + status: + type: string + enum: [UPLOADED, PROCESSING, COMPLETED, ERROR] + description: 文件状态 + description: + type: string + description: 文件描述 + filePath: + type: string + description: 文件路径 + uploadTime: + type: string + format: date-time + description: 上传时间 + uploadedBy: + type: string + description: 上传者 + + TagResponse: + type: object + properties: + id: + type: string + description: 标签ID + name: + type: string + description: 标签名称 + color: + type: string + description: 标签颜色 + description: + type: string + description: 标签描述 + usageCount: + type: integer + description: 使用次数 + + CreateTagRequest: + type: object + required: + - name + properties: + name: + type: string + description: 标签名称 + minLength: 1 + maxLength: 50 + color: + type: string + description: 标签颜色 + pattern: '^#[0-9A-Fa-f]{6}$' + description: + type: string + description: 标签描述 + maxLength: 200 + + DatasetStatisticsResponse: + type: object + properties: + totalFiles: + type: integer + description: 总文件数 + completedFiles: + type: integer + description: 已完成文件数 + totalSize: + type: integer + format: int64 + description: 总大小(字节) + completionRate: + type: number + format: float + description: 完成率(0-100) + fileTypeDistribution: + type: object + additionalProperties: + type: integer + description: 文件类型分布 + statusDistribution: + type: object + additionalProperties: + type: integer + description: 状态分布 + + ErrorResponse: + type: object + properties: + error: + type: string + description: 错误代码 + message: + type: string + description: 错误消息 + timestamp: + type: string + format: date-time + description: 错误时间 + path: + type: string + description: 请求路径 + + AddFilesRequest: + type: object + description: 将源文件路径添加到数据集的请求 + properties: + sourcePaths: + type: array + items: + type: string + description: 源文件路径列表(相对或绝对路径),每个元素表示一个要添加的文件或目录路径 + softAdd: + type: boolean + description: 如果为 true,则仅在数据库中创建记录(默认 false) + default: false + required: + - sourcePaths diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java index 02aaf546..62cdec19 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java @@ -1,504 +1,759 @@ -package com.datamate.datamanagement.application; - -import com.baomidou.mybatisplus.core.metadata.IPage; -import com.baomidou.mybatisplus.extension.plugins.pagination.Page; -import com.datamate.common.domain.model.ChunkUploadPreRequest; -import com.datamate.common.domain.model.FileUploadResult; -import com.datamate.common.domain.service.FileService; -import com.datamate.common.domain.utils.AnalyzerUtils; -import com.datamate.common.domain.utils.ArchiveAnalyzer; -import com.datamate.common.infrastructure.exception.BusinessAssert; -import com.datamate.common.infrastructure.exception.BusinessException; -import com.datamate.common.infrastructure.exception.CommonErrorCode; -import com.datamate.common.infrastructure.exception.SystemErrorCode; -import com.datamate.common.interfaces.PagedResponse; -import com.datamate.common.interfaces.PagingQuery; -import com.datamate.datamanagement.common.enums.DuplicateMethod; -import com.datamate.datamanagement.domain.contants.DatasetConstant; -import com.datamate.datamanagement.domain.model.dataset.Dataset; -import com.datamate.datamanagement.domain.model.dataset.DatasetFile; -import com.datamate.datamanagement.domain.model.dataset.DatasetFileUploadCheckInfo; -import com.datamate.datamanagement.infrastructure.exception.DataManagementErrorCode; -import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository; -import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetRepository; -import com.datamate.datamanagement.interfaces.converter.DatasetConverter; -import com.datamate.datamanagement.interfaces.dto.AddFilesRequest; -import com.datamate.datamanagement.interfaces.dto.CopyFilesRequest; -import com.datamate.datamanagement.interfaces.dto.UploadFileRequest; -import com.datamate.datamanagement.interfaces.dto.UploadFilesPreRequest; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import jakarta.servlet.http.HttpServletResponse; -import lombok.extern.slf4j.Slf4j; -import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; -import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; -import org.apache.commons.io.IOUtils; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.core.io.Resource; -import org.springframework.core.io.UrlResource; -import org.springframework.http.HttpHeaders; -import org.springframework.stereotype.Service; -import org.springframework.transaction.annotation.Transactional; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.net.MalformedURLException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.nio.file.attribute.BasicFileAttributes; -import java.time.LocalDateTime; -import java.time.ZoneId; -import java.time.format.DateTimeFormatter; -import java.util.*; -import java.util.concurrent.CompletableFuture; -import java.util.function.Function; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -/** - * 数据集文件应用服务 - */ -@Slf4j -@Service -@Transactional -public class DatasetFileApplicationService { - - private final DatasetFileRepository datasetFileRepository; - private final DatasetRepository datasetRepository; - private final FileService fileService; - - @Value("${datamate.data-management.base-path:/dataset}") - private String datasetBasePath; - - @Value("${datamate.data-management.file.duplicate:COVER}") - private DuplicateMethod duplicateMethod; - - @Autowired - public DatasetFileApplicationService(DatasetFileRepository datasetFileRepository, - DatasetRepository datasetRepository, FileService fileService) { - this.datasetFileRepository = datasetFileRepository; - this.datasetRepository = datasetRepository; - this.fileService = fileService; - } - - /** - * 获取数据集文件列表 - */ - @Transactional(readOnly = true) - public PagedResponse getDatasetFiles(String datasetId, String fileType, String status, String name, PagingQuery pagingQuery) { - IPage page = new Page<>(pagingQuery.getPage(), pagingQuery.getSize()); - IPage files = datasetFileRepository.findByCriteria(datasetId, fileType, status, name, page); - return PagedResponse.of(files); - } - - /** - * 获取数据集文件列表 - */ - @Transactional(readOnly = true) - public PagedResponse getDatasetFilesWithDirectory(String datasetId, String prefix, PagingQuery pagingQuery) { - Dataset dataset = datasetRepository.getById(datasetId); - int page = Math.max(pagingQuery.getPage(), 1); - int size = pagingQuery.getSize() == null || pagingQuery.getSize() < 0 ? 20 : pagingQuery.getSize(); - if (dataset == null) { - return PagedResponse.of(new Page<>(page, size)); - } - String datasetPath = dataset.getPath(); - Path queryPath = Path.of(dataset.getPath() + File.separator + prefix); - Map datasetFilesMap = datasetFileRepository.findAllByDatasetId(datasetId) - .stream().collect(Collectors.toMap(DatasetFile::getFilePath, Function.identity())); - try (Stream pathStream = Files.list(queryPath)) { - List allFiles = pathStream - .filter(path -> path.toString().startsWith(datasetPath)) - .sorted(Comparator - .comparing((Path path) -> !Files.isDirectory(path)) - .thenComparing(path -> path.getFileName().toString())) - .collect(Collectors.toList()); - - // 计算分页 - int total = allFiles.size(); - int totalPages = (int) Math.ceil((double) total / size); - - // 获取当前页数据 - int fromIndex = (page - 1) * size; - fromIndex = Math.max(fromIndex, 0); - int toIndex = Math.min(fromIndex + size, total); - - List pageData = new ArrayList<>(); - if (fromIndex < total) { - pageData = allFiles.subList(fromIndex, toIndex); - } - List datasetFiles = pageData.stream().map(path -> getDatasetFile(path, datasetFilesMap)).toList(); - - return new PagedResponse<>(page, size, total, totalPages, datasetFiles); - } catch (IOException e) { - log.error("list dataset path error", e); - return PagedResponse.of(new Page<>(page, size)); - } - } - - private DatasetFile getDatasetFile(Path path, Map datasetFilesMap) { - DatasetFile datasetFile = new DatasetFile(); - LocalDateTime localDateTime = LocalDateTime.now(); - try { - localDateTime = Files.getLastModifiedTime(path).toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime(); - } catch (IOException e) { - log.error("get last modified time error", e); - } - datasetFile.setFileName(path.getFileName().toString()); - datasetFile.setUploadTime(localDateTime); - if (Files.isDirectory(path)) { - datasetFile.setId("directory-" + datasetFile.getFileName()); - } else if (Objects.isNull(datasetFilesMap.get(path.toString()))) { - datasetFile.setId("file-" + datasetFile.getFileName()); - datasetFile.setFileSize(path.toFile().length()); - } else { - datasetFile = datasetFilesMap.get(path.toString()); - } - return datasetFile; - } - - /** - * 获取文件详情 - */ - @Transactional(readOnly = true) - public DatasetFile getDatasetFile(String datasetId, String fileId) { - DatasetFile file = datasetFileRepository.getById(fileId); - if (file == null) { - throw new IllegalArgumentException("File not found: " + fileId); - } - if (!file.getDatasetId().equals(datasetId)) { - throw new IllegalArgumentException("File does not belong to the specified dataset"); - } - return file; - } - - /** - * 删除文件 - */ - @Transactional - public void deleteDatasetFile(String datasetId, String fileId) { - DatasetFile file = getDatasetFile(datasetId, fileId); - Dataset dataset = datasetRepository.getById(datasetId); - dataset.setFiles(new ArrayList<>(Collections.singleton(file))); - datasetFileRepository.removeById(fileId); - dataset.removeFile(file); - datasetRepository.updateById(dataset); - // 删除文件时,上传到数据集中的文件会同时删除数据库中的记录和文件系统中的文件,归集过来的文件仅删除数据库中的记录 - if (file.getFilePath().startsWith(dataset.getPath())) { - try { - Path filePath = Paths.get(file.getFilePath()); - Files.deleteIfExists(filePath); - } catch (IOException ex) { - throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); - } - } - } - - /** - * 下载文件 - */ - @Transactional(readOnly = true) - public Resource downloadFile(String datasetId, String fileId) { - DatasetFile file = getDatasetFile(datasetId, fileId); - try { - Path filePath = Paths.get(file.getFilePath()).normalize(); - Resource resource = new UrlResource(filePath.toUri()); - if (resource.exists()) { - return resource; - } else { - throw new RuntimeException("File not found: " + file.getFileName()); - } - } catch (MalformedURLException ex) { - throw new RuntimeException("File not found: " + file.getFileName(), ex); - } - } - - /** - * 下载文件 - */ - @Transactional(readOnly = true) - public void downloadDatasetFileAsZip(String datasetId, HttpServletResponse response) { - Dataset dataset = datasetRepository.getById(datasetId); - if (Objects.isNull(dataset)) { - throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); - } - List allByDatasetId = datasetFileRepository.findAllByDatasetId(datasetId); - Set filePaths = allByDatasetId.stream().map(DatasetFile::getFilePath).collect(Collectors.toSet()); - String datasetPath = dataset.getPath(); - Path downloadPath = Path.of(datasetPath); - response.setContentType("application/zip"); - String zipName = String.format("dataset_%s.zip", - LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMddHHmmss"))); - response.setHeader(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + zipName); - try (ZipArchiveOutputStream zos = new ZipArchiveOutputStream(response.getOutputStream())) { - try (Stream pathStream = Files.walk(downloadPath)) { - List allPaths = pathStream.filter(path -> path.toString().startsWith(datasetPath)) - .filter(path -> filePaths.stream().anyMatch(filePath -> filePath.startsWith(path.toString()))) - .toList(); - for (Path path : allPaths) { - addToZipFile(path, downloadPath, zos); - } - } - } catch (IOException e) { - log.error("Failed to download files in batches.", e); - throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); - } - } - - private void addToZipFile(Path path, Path basePath, ZipArchiveOutputStream zos) throws IOException { - String entryName = basePath.relativize(path) - .toString() - .replace(File.separator, "/"); - - // 处理目录 - if (Files.isDirectory(path)) { - if (!entryName.isEmpty()) { - entryName += "/"; - ZipArchiveEntry dirEntry = new ZipArchiveEntry(entryName); - zos.putArchiveEntry(dirEntry); - zos.closeArchiveEntry(); - } - } else { - // 处理文件 - ZipArchiveEntry fileEntry = new ZipArchiveEntry(path.toFile(), entryName); - - // 设置更多属性 - BasicFileAttributes attrs = Files.readAttributes(path, BasicFileAttributes.class); - fileEntry.setSize(attrs.size()); - fileEntry.setLastModifiedTime(attrs.lastModifiedTime()); - - zos.putArchiveEntry(fileEntry); - - try (InputStream is = Files.newInputStream(path)) { - IOUtils.copy(is, zos); - } - zos.closeArchiveEntry(); - } - } - - /** - * 预上传 - * - * @param chunkUploadRequest 上传请求 - * @param datasetId 数据集id - * @return 请求id - */ - @Transactional - public String preUpload(UploadFilesPreRequest chunkUploadRequest, String datasetId) { - if (Objects.isNull(datasetRepository.getById(datasetId))) { - throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); - } - ChunkUploadPreRequest request = ChunkUploadPreRequest.builder().build(); - request.setUploadPath(datasetBasePath + File.separator + datasetId); - request.setTotalFileNum(chunkUploadRequest.getTotalFileNum()); - request.setServiceId(DatasetConstant.SERVICE_ID); - DatasetFileUploadCheckInfo checkInfo = new DatasetFileUploadCheckInfo(); - checkInfo.setDatasetId(datasetId); - checkInfo.setHasArchive(chunkUploadRequest.isHasArchive()); - try { - ObjectMapper objectMapper = new ObjectMapper(); - String checkInfoJson = objectMapper.writeValueAsString(checkInfo); - request.setCheckInfo(checkInfoJson); - } catch (JsonProcessingException e) { - log.warn("Failed to serialize checkInfo to JSON", e); - } - return fileService.preUpload(request); - } - - /** - * 切片上传 - * - * @param uploadFileRequest 上传请求 - */ - @Transactional - public void chunkUpload(String datasetId, UploadFileRequest uploadFileRequest) { - FileUploadResult uploadResult = fileService.chunkUpload(DatasetConverter.INSTANCE.toChunkUploadRequest(uploadFileRequest)); - saveFileInfoToDb(uploadResult, datasetId); - } - - private void saveFileInfoToDb(FileUploadResult fileUploadResult, String datasetId) { - if (Objects.isNull(fileUploadResult.getSavedFile())) { - // 文件切片上传没有完成 - return; - } - DatasetFileUploadCheckInfo checkInfo; - try { - ObjectMapper objectMapper = new ObjectMapper(); - checkInfo = objectMapper.readValue(fileUploadResult.getCheckInfo(), DatasetFileUploadCheckInfo.class); - if (!Objects.equals(checkInfo.getDatasetId(), datasetId)) { - throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); - } - } catch (IllegalArgumentException | JsonProcessingException e) { - log.warn("Failed to convert checkInfo to DatasetFileUploadCheckInfo", e); - throw BusinessException.of(CommonErrorCode.PRE_UPLOAD_REQUEST_NOT_EXIST); - } - List files; - if (checkInfo.isHasArchive() && AnalyzerUtils.isPackage(fileUploadResult.getSavedFile().getPath())) { - files = ArchiveAnalyzer.process(fileUploadResult); - } else { - files = Collections.singletonList(fileUploadResult); - } - addFileToDataset(datasetId, files); - } - - private void addFileToDataset(String datasetId, List unpacked) { - Dataset dataset = datasetRepository.getById(datasetId); - dataset.setFiles(datasetFileRepository.findAllByDatasetId(datasetId)); - for (FileUploadResult file : unpacked) { - File savedFile = file.getSavedFile(); - LocalDateTime currentTime = LocalDateTime.now(); - DatasetFile datasetFile = DatasetFile.builder() - .id(UUID.randomUUID().toString()) - .datasetId(datasetId) - .fileSize(savedFile.length()) - .uploadTime(currentTime) - .lastAccessTime(currentTime) - .fileName(file.getFileName()) - .filePath(savedFile.getPath()) - .fileType(AnalyzerUtils.getExtension(file.getFileName())) - .build(); - setDatasetFileId(datasetFile, dataset); - datasetFileRepository.saveOrUpdate(datasetFile); - dataset.addFile(datasetFile); - } - dataset.active(); - datasetRepository.updateById(dataset); - } - - /** - * 为数据集文件设置文件id - * - * @param datasetFile 要设置id的文件 - * @param dataset 数据集(包含文件列表) - */ - private void setDatasetFileId(DatasetFile datasetFile, Dataset dataset) { - Map existDatasetFilMap = dataset.getFiles().stream().collect(Collectors.toMap(DatasetFile::getFilePath, Function.identity())); - DatasetFile existDatasetFile = existDatasetFilMap.get(datasetFile.getFilePath()); - if (Objects.isNull(existDatasetFile)) { - return; - } - if (duplicateMethod == DuplicateMethod.ERROR) { - log.error("file {} already exists in dataset {}", datasetFile.getFileName(), datasetFile.getDatasetId()); - throw BusinessException.of(DataManagementErrorCode.DATASET_FILE_ALREADY_EXISTS); - } - if (duplicateMethod == DuplicateMethod.COVER) { - dataset.removeFile(existDatasetFile); - datasetFile.setId(existDatasetFile.getId()); - } - } - - /** - * 复制文件到数据集目录 - * - * @param datasetId 数据集id - * @param req 复制文件请求 - * @return 复制的文件列表 - */ - @Transactional - public List copyFilesToDatasetDir(String datasetId, CopyFilesRequest req) { - Dataset dataset = datasetRepository.getById(datasetId); - BusinessAssert.notNull(dataset, SystemErrorCode.RESOURCE_NOT_FOUND); - List copiedFiles = new ArrayList<>(); - List existDatasetFiles = datasetFileRepository.findAllByDatasetId(datasetId); - dataset.setFiles(existDatasetFiles); - for (String sourceFilePath : req.sourcePaths()) { - Path sourcePath = Paths.get(sourceFilePath); - if (!Files.exists(sourcePath) || !Files.isRegularFile(sourcePath)) { - log.warn("Source file does not exist or is not a regular file: {}", sourceFilePath); - continue; - } - String fileName = sourcePath.getFileName().toString(); - File sourceFile = sourcePath.toFile(); - LocalDateTime currentTime = LocalDateTime.now(); - DatasetFile datasetFile = DatasetFile.builder() - .id(UUID.randomUUID().toString()) - .datasetId(datasetId) - .fileName(fileName) - .fileType(AnalyzerUtils.getExtension(fileName)) - .fileSize(sourceFile.length()) - .filePath(Paths.get(dataset.getPath(), fileName).toString()) - .uploadTime(currentTime) - .lastAccessTime(currentTime) - .build(); - setDatasetFileId(datasetFile, dataset); - dataset.addFile(datasetFile); - copiedFiles.add(datasetFile); - } - datasetFileRepository.saveOrUpdateBatch(copiedFiles, 100); - dataset.active(); - datasetRepository.updateById(dataset); - CompletableFuture.runAsync(() -> copyFilesToDatasetDir(req.sourcePaths(), dataset)); - return copiedFiles; - } - - private void copyFilesToDatasetDir(List sourcePaths, Dataset dataset) { - for (String sourcePath : sourcePaths) { - Path sourceFilePath = Paths.get(sourcePath); - Path targetFilePath = Paths.get(dataset.getPath(), sourceFilePath.getFileName().toString()); - try { - Files.createDirectories(Path.of(dataset.getPath())); - Files.copy(sourceFilePath, targetFilePath); - } catch (IOException e) { - log.error("Failed to copy file from {} to {}", sourcePath, targetFilePath, e); - } - } - } - - /** - * 添加文件到数据集(仅创建数据库记录,不执行文件系统操作) - * - * @param datasetId 数据集id - * @param req 添加文件请求 - * @return 添加的文件列表 - */ - @Transactional - public List addFilesToDataset(String datasetId, AddFilesRequest req) { - Dataset dataset = datasetRepository.getById(datasetId); - BusinessAssert.notNull(dataset, SystemErrorCode.RESOURCE_NOT_FOUND); - List addedFiles = new ArrayList<>(); - List existDatasetFiles = datasetFileRepository.findAllByDatasetId(datasetId); - dataset.setFiles(existDatasetFiles); - - boolean softAdd = req.softAdd(); - String metadata; - try { - Map metadataMap = Map.of("softAdd", softAdd); - ObjectMapper objectMapper = new ObjectMapper(); - metadata = objectMapper.writeValueAsString(metadataMap); - } catch (JsonProcessingException e) { - log.error("Failed to serialize metadataMap", e); - throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR); - } - - for (String sourceFilePath : req.sourcePaths()) { - Path sourcePath = Paths.get(sourceFilePath); - String fileName = sourcePath.getFileName().toString(); - File sourceFile = sourcePath.toFile(); - LocalDateTime currentTime = LocalDateTime.now(); - - DatasetFile datasetFile = DatasetFile.builder() - .id(UUID.randomUUID().toString()) - .datasetId(datasetId) - .fileName(fileName) - .fileType(AnalyzerUtils.getExtension(fileName)) - .fileSize(sourceFile.length()) - .filePath(sourceFilePath) - .uploadTime(currentTime) - .lastAccessTime(currentTime) - .metadata(metadata) - .build(); - setDatasetFileId(datasetFile, dataset); - dataset.addFile(datasetFile); - addedFiles.add(datasetFile); - } - datasetFileRepository.saveOrUpdateBatch(addedFiles, 100); - dataset.active(); - datasetRepository.updateById(dataset); - // Note: addFilesToDataset only creates DB records, no file system operations - // If file copy is needed, use copyFilesToDatasetDir endpoint instead - return addedFiles; - } -} +package com.datamate.datamanagement.application; + +import com.baomidou.mybatisplus.core.metadata.IPage; +import com.baomidou.mybatisplus.extension.plugins.pagination.Page; +import com.datamate.common.domain.model.ChunkUploadPreRequest; +import com.datamate.common.domain.model.FileUploadResult; +import com.datamate.common.domain.service.FileService; +import com.datamate.common.domain.utils.AnalyzerUtils; +import com.datamate.common.domain.utils.ArchiveAnalyzer; +import com.datamate.common.infrastructure.exception.BusinessAssert; +import com.datamate.common.infrastructure.exception.BusinessException; +import com.datamate.common.infrastructure.exception.CommonErrorCode; +import com.datamate.common.infrastructure.exception.SystemErrorCode; +import com.datamate.common.interfaces.PagedResponse; +import com.datamate.common.interfaces.PagingQuery; +import com.datamate.datamanagement.common.enums.DuplicateMethod; +import com.datamate.datamanagement.domain.contants.DatasetConstant; +import com.datamate.datamanagement.domain.model.dataset.Dataset; +import com.datamate.datamanagement.domain.model.dataset.DatasetFile; +import com.datamate.datamanagement.domain.model.dataset.DatasetFileUploadCheckInfo; +import com.datamate.datamanagement.infrastructure.exception.DataManagementErrorCode; +import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository; +import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetRepository; +import com.datamate.datamanagement.interfaces.converter.DatasetConverter; +import com.datamate.datamanagement.interfaces.dto.AddFilesRequest; +import com.datamate.datamanagement.interfaces.dto.CopyFilesRequest; +import com.datamate.datamanagement.interfaces.dto.CreateDirectoryRequest; +import com.datamate.datamanagement.interfaces.dto.UploadFileRequest; +import com.datamate.datamanagement.interfaces.dto.UploadFilesPreRequest; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import jakarta.servlet.http.HttpServletResponse; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; +import org.apache.commons.io.IOUtils; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.core.io.Resource; +import org.springframework.core.io.UrlResource; +import org.springframework.http.HttpHeaders; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.attribute.BasicFileAttributes; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.util.*; +import java.util.concurrent.CompletableFuture; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * 数据集文件应用服务 + */ +@Slf4j +@Service +@Transactional +public class DatasetFileApplicationService { + + private final DatasetFileRepository datasetFileRepository; + private final DatasetRepository datasetRepository; + private final FileService fileService; + + @Value("${datamate.data-management.base-path:/dataset}") + private String datasetBasePath; + + @Value("${datamate.data-management.file.duplicate:COVER}") + private DuplicateMethod duplicateMethod; + + @Autowired + public DatasetFileApplicationService(DatasetFileRepository datasetFileRepository, + DatasetRepository datasetRepository, FileService fileService) { + this.datasetFileRepository = datasetFileRepository; + this.datasetRepository = datasetRepository; + this.fileService = fileService; + } + + /** + * 获取数据集文件列表 + */ + @Transactional(readOnly = true) + public PagedResponse getDatasetFiles(String datasetId, String fileType, String status, String name, PagingQuery pagingQuery) { + IPage page = new Page<>(pagingQuery.getPage(), pagingQuery.getSize()); + IPage files = datasetFileRepository.findByCriteria(datasetId, fileType, status, name, page); + return PagedResponse.of(files); + } + + /** + * 获取数据集文件列表 + */ + @Transactional(readOnly = true) + public PagedResponse getDatasetFilesWithDirectory(String datasetId, String prefix, PagingQuery pagingQuery) { + Dataset dataset = datasetRepository.getById(datasetId); + int page = Math.max(pagingQuery.getPage(), 1); + int size = pagingQuery.getSize() == null || pagingQuery.getSize() < 0 ? 20 : pagingQuery.getSize(); + if (dataset == null) { + return PagedResponse.of(new Page<>(page, size)); + } + String datasetPath = dataset.getPath(); + Path queryPath = Path.of(dataset.getPath() + File.separator + prefix); + Map datasetFilesMap = datasetFileRepository.findAllByDatasetId(datasetId) + .stream().collect(Collectors.toMap(DatasetFile::getFilePath, Function.identity())); + try (Stream pathStream = Files.list(queryPath)) { + List allFiles = pathStream + .filter(path -> path.toString().startsWith(datasetPath)) + .sorted(Comparator + .comparing((Path path) -> !Files.isDirectory(path)) + .thenComparing(path -> path.getFileName().toString())) + .collect(Collectors.toList()); + + // 计算分页 + int total = allFiles.size(); + int totalPages = (int) Math.ceil((double) total / size); + + // 获取当前页数据 + int fromIndex = (page - 1) * size; + fromIndex = Math.max(fromIndex, 0); + int toIndex = Math.min(fromIndex + size, total); + + List pageData = new ArrayList<>(); + if (fromIndex < total) { + pageData = allFiles.subList(fromIndex, toIndex); + } + List datasetFiles = pageData.stream().map(path -> getDatasetFile(path, datasetFilesMap)).toList(); + + return new PagedResponse<>(page, size, total, totalPages, datasetFiles); + } catch (IOException e) { + log.error("list dataset path error", e); + return PagedResponse.of(new Page<>(page, size)); + } + } + + private DatasetFile getDatasetFile(Path path, Map datasetFilesMap) { + DatasetFile datasetFile = new DatasetFile(); + LocalDateTime localDateTime = LocalDateTime.now(); + try { + localDateTime = Files.getLastModifiedTime(path).toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime(); + } catch (IOException e) { + log.error("get last modified time error", e); + } + datasetFile.setFileName(path.getFileName().toString()); + datasetFile.setUploadTime(localDateTime); + + // 目录与普通文件区分处理 + if (Files.isDirectory(path)) { + datasetFile.setId("directory-" + datasetFile.getFileName()); + datasetFile.setDirectory(true); + + // 统计目录下文件数量和总大小 + try { + long fileCount; + long totalSize; + + try (Stream walk = Files.walk(path)) { + fileCount = walk.filter(Files::isRegularFile).count(); + } + + try (Stream walk = Files.walk(path)) { + totalSize = walk + .filter(Files::isRegularFile) + .mapToLong(p -> { + try { + return Files.size(p); + } catch (IOException e) { + log.error("get file size error", e); + return 0L; + } + }) + .sum(); + } + + datasetFile.setFileCount(fileCount); + datasetFile.setFileSize(totalSize); + } catch (IOException e) { + log.error("stat directory info error", e); + } + } else { + DatasetFile exist = datasetFilesMap.get(path.toString()); + if (exist == null) { + datasetFile.setId("file-" + datasetFile.getFileName()); + datasetFile.setFileSize(path.toFile().length()); + } else { + datasetFile = exist; + } + } + return datasetFile; + } + + /** + * 获取文件详情 + */ + @Transactional(readOnly = true) + public DatasetFile getDatasetFile(String datasetId, String fileId) { + DatasetFile file = datasetFileRepository.getById(fileId); + if (file == null) { + throw new IllegalArgumentException("File not found: " + fileId); + } + if (!file.getDatasetId().equals(datasetId)) { + throw new IllegalArgumentException("File does not belong to the specified dataset"); + } + return file; + } + + /** + * 删除文件 + */ + @Transactional + public void deleteDatasetFile(String datasetId, String fileId) { + DatasetFile file = getDatasetFile(datasetId, fileId); + Dataset dataset = datasetRepository.getById(datasetId); + dataset.setFiles(new ArrayList<>(Collections.singleton(file))); + datasetFileRepository.removeById(fileId); + dataset.removeFile(file); + datasetRepository.updateById(dataset); + // 删除文件时,上传到数据集中的文件会同时删除数据库中的记录和文件系统中的文件,归集过来的文件仅删除数据库中的记录 + if (file.getFilePath().startsWith(dataset.getPath())) { + try { + Path filePath = Paths.get(file.getFilePath()); + Files.deleteIfExists(filePath); + } catch (IOException ex) { + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); + } + } + } + + /** + * 下载文件 + */ + @Transactional(readOnly = true) + public Resource downloadFile(String datasetId, String fileId) { + DatasetFile file = getDatasetFile(datasetId, fileId); + try { + Path filePath = Paths.get(file.getFilePath()).normalize(); + Resource resource = new UrlResource(filePath.toUri()); + if (resource.exists()) { + return resource; + } else { + throw new RuntimeException("File not found: " + file.getFileName()); + } + } catch (MalformedURLException ex) { + throw new RuntimeException("File not found: " + file.getFileName(), ex); + } + } + + /** + * 下载文件 + */ + @Transactional(readOnly = true) + public void downloadDatasetFileAsZip(String datasetId, HttpServletResponse response) { + Dataset dataset = datasetRepository.getById(datasetId); + if (Objects.isNull(dataset)) { + throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); + } + List allByDatasetId = datasetFileRepository.findAllByDatasetId(datasetId); + Set filePaths = allByDatasetId.stream().map(DatasetFile::getFilePath).collect(Collectors.toSet()); + String datasetPath = dataset.getPath(); + Path downloadPath = Path.of(datasetPath); + response.setContentType("application/zip"); + String zipName = String.format("dataset_%s.zip", + LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMddHHmmss"))); + response.setHeader(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + zipName); + try (ZipArchiveOutputStream zos = new ZipArchiveOutputStream(response.getOutputStream())) { + try (Stream pathStream = Files.walk(downloadPath)) { + List allPaths = pathStream.filter(path -> path.toString().startsWith(datasetPath)) + .filter(path -> filePaths.stream().anyMatch(filePath -> filePath.startsWith(path.toString()))) + .toList(); + for (Path path : allPaths) { + addToZipFile(path, downloadPath, zos); + } + } + } catch (IOException e) { + log.error("Failed to download files in batches.", e); + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); + } + } + + private void addToZipFile(Path path, Path basePath, ZipArchiveOutputStream zos) throws IOException { + String entryName = basePath.relativize(path) + .toString() + .replace(File.separator, "/"); + + // 处理目录 + if (Files.isDirectory(path)) { + if (!entryName.isEmpty()) { + entryName += "/"; + ZipArchiveEntry dirEntry = new ZipArchiveEntry(entryName); + zos.putArchiveEntry(dirEntry); + zos.closeArchiveEntry(); + } + } else { + // 处理文件 + ZipArchiveEntry fileEntry = new ZipArchiveEntry(path.toFile(), entryName); + + // 设置更多属性 + BasicFileAttributes attrs = Files.readAttributes(path, BasicFileAttributes.class); + fileEntry.setSize(attrs.size()); + fileEntry.setLastModifiedTime(attrs.lastModifiedTime()); + + zos.putArchiveEntry(fileEntry); + + try (InputStream is = Files.newInputStream(path)) { + IOUtils.copy(is, zos); + } + zos.closeArchiveEntry(); + } + } + + /** + * 预上传 + * + * @param chunkUploadRequest 上传请求 + * @param datasetId 数据集id + * @return 请求id + */ + @Transactional + public String preUpload(UploadFilesPreRequest chunkUploadRequest, String datasetId) { + if (Objects.isNull(datasetRepository.getById(datasetId))) { + throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); + } + + // 构建上传路径,如果有 prefix 则追加到路径中 + String prefix = Optional.ofNullable(chunkUploadRequest.getPrefix()).orElse("").trim(); + prefix = prefix.replace("\\", "/"); + while (prefix.startsWith("/")) { + prefix = prefix.substring(1); + } + + String uploadPath = datasetBasePath + File.separator + datasetId; + if (!prefix.isEmpty()) { + uploadPath = uploadPath + File.separator + prefix.replace("/", File.separator); + } + + ChunkUploadPreRequest request = ChunkUploadPreRequest.builder().build(); + request.setUploadPath(uploadPath); + request.setTotalFileNum(chunkUploadRequest.getTotalFileNum()); + request.setServiceId(DatasetConstant.SERVICE_ID); + DatasetFileUploadCheckInfo checkInfo = new DatasetFileUploadCheckInfo(); + checkInfo.setDatasetId(datasetId); + checkInfo.setHasArchive(chunkUploadRequest.isHasArchive()); + checkInfo.setPrefix(prefix); + try { + ObjectMapper objectMapper = new ObjectMapper(); + String checkInfoJson = objectMapper.writeValueAsString(checkInfo); + request.setCheckInfo(checkInfoJson); + } catch (JsonProcessingException e) { + log.warn("Failed to serialize checkInfo to JSON", e); + } + return fileService.preUpload(request); + } + + /** + * 切片上传 + * + * @param uploadFileRequest 上传请求 + */ + @Transactional + public void chunkUpload(String datasetId, UploadFileRequest uploadFileRequest) { + FileUploadResult uploadResult = fileService.chunkUpload(DatasetConverter.INSTANCE.toChunkUploadRequest(uploadFileRequest)); + saveFileInfoToDb(uploadResult, datasetId); + } + + private void saveFileInfoToDb(FileUploadResult fileUploadResult, String datasetId) { + if (Objects.isNull(fileUploadResult.getSavedFile())) { + // 文件切片上传没有完成 + return; + } + DatasetFileUploadCheckInfo checkInfo; + try { + ObjectMapper objectMapper = new ObjectMapper(); + checkInfo = objectMapper.readValue(fileUploadResult.getCheckInfo(), DatasetFileUploadCheckInfo.class); + if (!Objects.equals(checkInfo.getDatasetId(), datasetId)) { + throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); + } + } catch (IllegalArgumentException | JsonProcessingException e) { + log.warn("Failed to convert checkInfo to DatasetFileUploadCheckInfo", e); + throw BusinessException.of(CommonErrorCode.PRE_UPLOAD_REQUEST_NOT_EXIST); + } + List files; + if (checkInfo.isHasArchive() && AnalyzerUtils.isPackage(fileUploadResult.getSavedFile().getPath())) { + files = ArchiveAnalyzer.process(fileUploadResult); + } else { + files = Collections.singletonList(fileUploadResult); + } + addFileToDataset(datasetId, files); + } + + private void addFileToDataset(String datasetId, List unpacked) { + Dataset dataset = datasetRepository.getById(datasetId); + dataset.setFiles(datasetFileRepository.findAllByDatasetId(datasetId)); + for (FileUploadResult file : unpacked) { + File savedFile = file.getSavedFile(); + LocalDateTime currentTime = LocalDateTime.now(); + DatasetFile datasetFile = DatasetFile.builder() + .id(UUID.randomUUID().toString()) + .datasetId(datasetId) + .fileSize(savedFile.length()) + .uploadTime(currentTime) + .lastAccessTime(currentTime) + .fileName(file.getFileName()) + .filePath(savedFile.getPath()) + .fileType(AnalyzerUtils.getExtension(file.getFileName())) + .build(); + setDatasetFileId(datasetFile, dataset); + datasetFileRepository.saveOrUpdate(datasetFile); + dataset.addFile(datasetFile); + } + dataset.active(); + datasetRepository.updateById(dataset); + } + + /** + * 在数据集下创建子目录 + */ + @Transactional + public void createDirectory(String datasetId, CreateDirectoryRequest req) { + Dataset dataset = datasetRepository.getById(datasetId); + if (dataset == null) { + throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); + } + String datasetPath = dataset.getPath(); + String parentPrefix = Optional.ofNullable(req.getParentPrefix()).orElse("").trim(); + parentPrefix = parentPrefix.replace("\\", "/"); + while (parentPrefix.startsWith("/")) { + parentPrefix = parentPrefix.substring(1); + } + + String directoryName = Optional.ofNullable(req.getDirectoryName()).orElse("").trim(); + if (directoryName.isEmpty()) { + throw BusinessException.of(CommonErrorCode.PARAM_ERROR); + } + if (directoryName.contains("..") || directoryName.contains("/") || directoryName.contains("\\")) { + throw BusinessException.of(CommonErrorCode.PARAM_ERROR); + } + + Path basePath = Paths.get(datasetPath); + Path targetPath = parentPrefix.isEmpty() + ? basePath.resolve(directoryName) + : basePath.resolve(parentPrefix).resolve(directoryName); + + Path normalized = targetPath.normalize(); + if (!normalized.startsWith(basePath)) { + throw BusinessException.of(CommonErrorCode.PARAM_ERROR); + } + + try { + Files.createDirectories(normalized); + } catch (IOException e) { + log.error("Failed to create directory {} for dataset {}", normalized, datasetId, e); + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); + } + } + + /** + * 下载目录为 ZIP 文件 + */ + @Transactional(readOnly = true) + public void downloadDirectory(String datasetId, String prefix, HttpServletResponse response) { + Dataset dataset = datasetRepository.getById(datasetId); + if (dataset == null) { + throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); + } + + String datasetPath = dataset.getPath(); + prefix = Optional.ofNullable(prefix).orElse("").trim(); + prefix = prefix.replace("\\", "/"); + while (prefix.startsWith("/")) { + prefix = prefix.substring(1); + } + while (prefix.endsWith("/")) { + prefix = prefix.substring(0, prefix.length() - 1); + } + + Path basePath = Paths.get(datasetPath); + Path targetPath = prefix.isEmpty() ? basePath : basePath.resolve(prefix); + Path normalized = targetPath.normalize(); + + if (!normalized.startsWith(basePath)) { + throw BusinessException.of(CommonErrorCode.PARAM_ERROR); + } + + if (!Files.exists(normalized) || !Files.isDirectory(normalized)) { + throw BusinessException.of(DataManagementErrorCode.DIRECTORY_NOT_FOUND); + } + + String zipFileName = prefix.isEmpty() ? dataset.getName() : prefix.replace("/", "_"); + zipFileName = zipFileName + "_" + LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")) + ".zip"; + + try { + response.setContentType("application/zip"); + response.setHeader(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=\"" + zipFileName + "\""); + + try (ZipArchiveOutputStream zipOut = new ZipArchiveOutputStream(response.getOutputStream())) { + zipDirectory(normalized, normalized, zipOut); + zipOut.finish(); + } + } catch (IOException e) { + log.error("Failed to download directory {} for dataset {}", normalized, datasetId, e); + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); + } + } + + /** + * 递归压缩目录 + */ + private void zipDirectory(Path sourceDir, Path basePath, ZipArchiveOutputStream zipOut) throws IOException { + try (Stream paths = Files.walk(sourceDir)) { + paths.filter(path -> !Files.isDirectory(path)) + .forEach(path -> { + try { + Path relativePath = basePath.relativize(path); + ZipArchiveEntry zipEntry = new ZipArchiveEntry(relativePath.toString()); + zipOut.putArchiveEntry(zipEntry); + try (InputStream fis = Files.newInputStream(path)) { + IOUtils.copy(fis, zipOut); + } + zipOut.closeArchiveEntry(); + } catch (IOException e) { + log.error("Failed to add file to zip: {}", path, e); + } + }); + } + } + + /** + * 删除目录及其所有内容 + */ + @Transactional + public void deleteDirectory(String datasetId, String prefix) { + Dataset dataset = datasetRepository.getById(datasetId); + if (dataset == null) { + throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); + } + + prefix = Optional.ofNullable(prefix).orElse("").trim(); + prefix = prefix.replace("\\", "/"); + while (prefix.startsWith("/")) { + prefix = prefix.substring(1); + } + while (prefix.endsWith("/")) { + prefix = prefix.substring(0, prefix.length() - 1); + } + + if (prefix.isEmpty()) { + throw BusinessException.of(CommonErrorCode.PARAM_ERROR); + } + + String datasetPath = dataset.getPath(); + Path basePath = Paths.get(datasetPath); + Path targetPath = basePath.resolve(prefix); + Path normalized = targetPath.normalize(); + + if (!normalized.startsWith(basePath)) { + throw BusinessException.of(CommonErrorCode.PARAM_ERROR); + } + + if (!Files.exists(normalized) || !Files.isDirectory(normalized)) { + throw BusinessException.of(DataManagementErrorCode.DIRECTORY_NOT_FOUND); + } + + // 删除数据库中该目录下的所有文件记录(基于数据集内相对路径判断) + String datasetPathNorm = datasetPath.replace("\\", "/"); + String logicalPrefix = prefix; // 已经去掉首尾斜杠 + List filesToDelete = datasetFileRepository.findAllByDatasetId(datasetId).stream() + .filter(file -> { + if (file.getFilePath() == null) { + return false; + } + String filePath = file.getFilePath().replace("\\", "/"); + if (!filePath.startsWith(datasetPathNorm)) { + return false; + } + String relative = filePath.substring(datasetPathNorm.length()); + while (relative.startsWith("/")) { + relative = relative.substring(1); + } + return relative.equals(logicalPrefix) || relative.startsWith(logicalPrefix + "/"); + }) + .collect(Collectors.toList()); + + for (DatasetFile file : filesToDelete) { + datasetFileRepository.removeById(file.getId()); + } + + // 删除文件系统中的目录 + try { + deleteDirectoryRecursively(normalized); + } catch (IOException e) { + log.error("Failed to delete directory {} for dataset {}", normalized, datasetId, e); + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); + } + + // 更新数据集 + dataset.setFiles(filesToDelete); + for (DatasetFile file : filesToDelete) { + dataset.removeFile(file); + } + datasetRepository.updateById(dataset); + } + + /** + * 递归删除目录 + */ + private void deleteDirectoryRecursively(Path directory) throws IOException { + try (Stream paths = Files.walk(directory)) { + paths.sorted(Comparator.reverseOrder()) + .forEach(path -> { + try { + Files.delete(path); + } catch (IOException e) { + log.error("Failed to delete: {}", path, e); + } + }); + } + } + + /** + * 为数据集文件设置文件id + * + * @param datasetFile 要设置id的文件 + * @param dataset 数据集(包含文件列表) + */ + private void setDatasetFileId(DatasetFile datasetFile, Dataset dataset) { + Map existDatasetFilMap = dataset.getFiles().stream().collect(Collectors.toMap(DatasetFile::getFilePath, Function.identity())); + DatasetFile existDatasetFile = existDatasetFilMap.get(datasetFile.getFilePath()); + if (Objects.isNull(existDatasetFile)) { + return; + } + if (duplicateMethod == DuplicateMethod.ERROR) { + log.error("file {} already exists in dataset {}", datasetFile.getFileName(), datasetFile.getDatasetId()); + throw BusinessException.of(DataManagementErrorCode.DATASET_FILE_ALREADY_EXISTS); + } + if (duplicateMethod == DuplicateMethod.COVER) { + dataset.removeFile(existDatasetFile); + datasetFile.setId(existDatasetFile.getId()); + } + } + + /** + * 复制文件到数据集目录 + * + * @param datasetId 数据集id + * @param req 复制文件请求 + * @return 复制的文件列表 + */ + @Transactional + public List copyFilesToDatasetDir(String datasetId, CopyFilesRequest req) { + Dataset dataset = datasetRepository.getById(datasetId); + BusinessAssert.notNull(dataset, SystemErrorCode.RESOURCE_NOT_FOUND); + List copiedFiles = new ArrayList<>(); + List existDatasetFiles = datasetFileRepository.findAllByDatasetId(datasetId); + dataset.setFiles(existDatasetFiles); + for (String sourceFilePath : req.sourcePaths()) { + Path sourcePath = Paths.get(sourceFilePath); + if (!Files.exists(sourcePath) || !Files.isRegularFile(sourcePath)) { + log.warn("Source file does not exist or is not a regular file: {}", sourceFilePath); + continue; + } + String fileName = sourcePath.getFileName().toString(); + File sourceFile = sourcePath.toFile(); + LocalDateTime currentTime = LocalDateTime.now(); + DatasetFile datasetFile = DatasetFile.builder() + .id(UUID.randomUUID().toString()) + .datasetId(datasetId) + .fileName(fileName) + .fileType(AnalyzerUtils.getExtension(fileName)) + .fileSize(sourceFile.length()) + .filePath(Paths.get(dataset.getPath(), fileName).toString()) + .uploadTime(currentTime) + .lastAccessTime(currentTime) + .build(); + setDatasetFileId(datasetFile, dataset); + dataset.addFile(datasetFile); + copiedFiles.add(datasetFile); + } + datasetFileRepository.saveOrUpdateBatch(copiedFiles, 100); + dataset.active(); + datasetRepository.updateById(dataset); + CompletableFuture.runAsync(() -> copyFilesToDatasetDir(req.sourcePaths(), dataset)); + return copiedFiles; + } + + private void copyFilesToDatasetDir(List sourcePaths, Dataset dataset) { + for (String sourcePath : sourcePaths) { + Path sourceFilePath = Paths.get(sourcePath); + Path targetFilePath = Paths.get(dataset.getPath(), sourceFilePath.getFileName().toString()); + try { + Files.createDirectories(Path.of(dataset.getPath())); + Files.copy(sourceFilePath, targetFilePath); + } catch (IOException e) { + log.error("Failed to copy file from {} to {}", sourcePath, targetFilePath, e); + } + } + } + + /** + * 添加文件到数据集(仅创建数据库记录,不执行文件系统操作) + * + * @param datasetId 数据集id + * @param req 添加文件请求 + * @return 添加的文件列表 + */ + @Transactional + public List addFilesToDataset(String datasetId, AddFilesRequest req) { + Dataset dataset = datasetRepository.getById(datasetId); + BusinessAssert.notNull(dataset, SystemErrorCode.RESOURCE_NOT_FOUND); + List addedFiles = new ArrayList<>(); + List existDatasetFiles = datasetFileRepository.findAllByDatasetId(datasetId); + dataset.setFiles(existDatasetFiles); + + boolean softAdd = req.softAdd(); + String metadata; + try { + Map metadataMap = Map.of("softAdd", softAdd); + ObjectMapper objectMapper = new ObjectMapper(); + metadata = objectMapper.writeValueAsString(metadataMap); + } catch (JsonProcessingException e) { + log.error("Failed to serialize metadataMap", e); + throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR); + } + + for (String sourceFilePath : req.sourcePaths()) { + Path sourcePath = Paths.get(sourceFilePath); + String fileName = sourcePath.getFileName().toString(); + File sourceFile = sourcePath.toFile(); + LocalDateTime currentTime = LocalDateTime.now(); + + DatasetFile datasetFile = DatasetFile.builder() + .id(UUID.randomUUID().toString()) + .datasetId(datasetId) + .fileName(fileName) + .fileType(AnalyzerUtils.getExtension(fileName)) + .fileSize(sourceFile.length()) + .filePath(sourceFilePath) + .uploadTime(currentTime) + .lastAccessTime(currentTime) + .metadata(metadata) + .build(); + setDatasetFileId(datasetFile, dataset); + dataset.addFile(datasetFile); + addedFiles.add(datasetFile); + } + datasetFileRepository.saveOrUpdateBatch(addedFiles, 100); + dataset.active(); + datasetRepository.updateById(dataset); + // Note: addFilesToDataset only creates DB records, no file system operations + // If file copy is needed, use copyFilesToDatasetDir endpoint instead + return addedFiles; + } +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/domain/model/dataset/DatasetFile.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/domain/model/dataset/DatasetFile.java index e0df444b..45a58a12 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/domain/model/dataset/DatasetFile.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/domain/model/dataset/DatasetFile.java @@ -1,54 +1,63 @@ -package com.datamate.datamanagement.domain.model.dataset; - -import com.baomidou.mybatisplus.annotation.TableId; -import com.baomidou.mybatisplus.annotation.TableName; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; -import lombok.*; -import lombok.extern.slf4j.Slf4j; - -import java.time.LocalDateTime; -import java.util.Collections; -import java.util.List; - -/** - * 数据集文件实体(与数据库表 t_dm_dataset_files 对齐) - */ -@Getter -@Setter -@Builder -@Slf4j -@NoArgsConstructor -@AllArgsConstructor -@TableName("t_dm_dataset_files") -public class DatasetFile { - @TableId - private String id; // UUID - private String datasetId; // UUID - private String fileName; - private String filePath; - private String fileType; // JPG/PNG/DCM/TXT - private Long fileSize; // bytes - private String checkSum; - private String tags; - private String metadata; - private String status; // UPLOADED, PROCESSING, COMPLETED, ERROR - private LocalDateTime uploadTime; - private LocalDateTime lastAccessTime; - private LocalDateTime createdAt; - private LocalDateTime updatedAt; - - /** - * 解析标签 - * - * @return 标签列表 - */ - public List analyzeTag() { - try { - ObjectMapper mapper = new ObjectMapper(); - return mapper.readValue(tags, new TypeReference>() {}); - } catch (Exception e) { - return Collections.emptyList(); - } - } -} +package com.datamate.datamanagement.domain.model.dataset; + +import com.baomidou.mybatisplus.annotation.TableField; +import com.baomidou.mybatisplus.annotation.TableId; +import com.baomidou.mybatisplus.annotation.TableName; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import lombok.*; +import lombok.extern.slf4j.Slf4j; + +import java.time.LocalDateTime; +import java.util.Collections; +import java.util.List; + +/** + * 数据集文件实体(与数据库表 t_dm_dataset_files 对齐) + */ +@Getter +@Setter +@Builder +@Slf4j +@NoArgsConstructor +@AllArgsConstructor +@TableName("t_dm_dataset_files") +public class DatasetFile { + @TableId + private String id; // UUID + private String datasetId; // UUID + private String fileName; + private String filePath; + private String fileType; // JPG/PNG/DCM/TXT + private Long fileSize; // bytes + private String checkSum; + private String tags; + private String metadata; + private String status; // UPLOADED, PROCESSING, COMPLETED, ERROR + private LocalDateTime uploadTime; + private LocalDateTime lastAccessTime; + private LocalDateTime createdAt; + private LocalDateTime updatedAt; + + /** 标记是否为目录(非持久化字段) */ + @TableField(exist = false) + private Boolean directory; + + /** 目录包含的文件数量(非持久化字段) */ + @TableField(exist = false) + private Long fileCount; + + /** + * 解析标签 + * + * @return 标签列表 + */ + public List analyzeTag() { + try { + ObjectMapper mapper = new ObjectMapper(); + return mapper.readValue(tags, new TypeReference>() {}); + } catch (Exception e) { + return Collections.emptyList(); + } + } +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/domain/model/dataset/DatasetFileUploadCheckInfo.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/domain/model/dataset/DatasetFileUploadCheckInfo.java index b833f92c..edd52947 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/domain/model/dataset/DatasetFileUploadCheckInfo.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/domain/model/dataset/DatasetFileUploadCheckInfo.java @@ -1,21 +1,24 @@ -package com.datamate.datamanagement.domain.model.dataset; - -import lombok.AllArgsConstructor; -import lombok.Getter; -import lombok.NoArgsConstructor; -import lombok.Setter; - -/** - * 数据集文件上传检查信息 - */ -@Getter -@Setter -@NoArgsConstructor -@AllArgsConstructor -public class DatasetFileUploadCheckInfo { - /** 数据集id */ - private String datasetId; - - /** 是否为压缩包上传 */ - private boolean hasArchive; -} +package com.datamate.datamanagement.domain.model.dataset; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; + +/** + * 数据集文件上传检查信息 + */ +@Getter +@Setter +@NoArgsConstructor +@AllArgsConstructor +public class DatasetFileUploadCheckInfo { + /** 数据集id */ + private String datasetId; + + /** 是否为压缩包上传 */ + private boolean hasArchive; + + /** 目标子目录前缀,例如 "images/",为空表示数据集根目录 */ + private String prefix; +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/infrastructure/exception/DataManagementErrorCode.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/infrastructure/exception/DataManagementErrorCode.java index 683f12b7..c1d941e8 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/infrastructure/exception/DataManagementErrorCode.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/infrastructure/exception/DataManagementErrorCode.java @@ -1,43 +1,47 @@ -package com.datamate.datamanagement.infrastructure.exception; - -import com.datamate.common.infrastructure.exception.ErrorCode; -import lombok.AllArgsConstructor; -import lombok.Getter; - -/** - * 数据管理模块错误码 - * - * @author dallas - * @since 2025-10-20 - */ -@Getter -@AllArgsConstructor -public enum DataManagementErrorCode implements ErrorCode { - /** - * 数据集不存在 - */ - DATASET_NOT_FOUND("data_management.0001", "数据集不存在"), - /** - * 数据集已存在 - */ - DATASET_ALREADY_EXISTS("data_management.0002", "数据集已存在"), - /** - * 数据集状态错误 - */ - DATASET_STATUS_ERROR("data_management.0003", "数据集状态错误"), - /** - * 数据集标签不存在 - */ - DATASET_TAG_NOT_FOUND("data_management.0004", "数据集标签不存在"), - /** - * 数据集标签已存在 - */ - DATASET_TAG_ALREADY_EXISTS("data_management.0005", "数据集标签已存在"), - /** - * 数据集标签已存在 - */ - DATASET_FILE_ALREADY_EXISTS("data_management.0006", "数据集文件已存在"); - - private final String code; - private final String message; -} +package com.datamate.datamanagement.infrastructure.exception; + +import com.datamate.common.infrastructure.exception.ErrorCode; +import lombok.AllArgsConstructor; +import lombok.Getter; + +/** + * 数据管理模块错误码 + * + * @author dallas + * @since 2025-10-20 + */ +@Getter +@AllArgsConstructor +public enum DataManagementErrorCode implements ErrorCode { + /** + * 数据集不存在 + */ + DATASET_NOT_FOUND("data_management.0001", "数据集不存在"), + /** + * 数据集已存在 + */ + DATASET_ALREADY_EXISTS("data_management.0002", "数据集已存在"), + /** + * 数据集状态错误 + */ + DATASET_STATUS_ERROR("data_management.0003", "数据集状态错误"), + /** + * 数据集标签不存在 + */ + DATASET_TAG_NOT_FOUND("data_management.0004", "数据集标签不存在"), + /** + * 数据集标签已存在 + */ + DATASET_TAG_ALREADY_EXISTS("data_management.0005", "数据集标签已存在"), + /** + * 数据集文件已存在 + */ + DATASET_FILE_ALREADY_EXISTS("data_management.0006", "数据集文件已存在"), + /** + * 目录不存在 + */ + DIRECTORY_NOT_FOUND("data_management.0007", "目录不存在"); + + private final String code; + private final String message; +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateDirectoryRequest.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateDirectoryRequest.java new file mode 100644 index 00000000..441cc74a --- /dev/null +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateDirectoryRequest.java @@ -0,0 +1,20 @@ +package com.datamate.datamanagement.interfaces.dto; + +import jakarta.validation.constraints.NotBlank; +import lombok.Getter; +import lombok.Setter; + +/** + * 创建数据集子目录请求 + */ +@Getter +@Setter +public class CreateDirectoryRequest { + + /** 父级前缀路径,例如 "images/",为空表示数据集根目录 */ + private String parentPrefix; + + /** 新建目录名称 */ + @NotBlank + private String directoryName; +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/DatasetFileResponse.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/DatasetFileResponse.java index ec06ff49..02055713 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/DatasetFileResponse.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/DatasetFileResponse.java @@ -1,36 +1,40 @@ -package com.datamate.datamanagement.interfaces.dto; - -import lombok.Getter; -import lombok.Setter; - -import java.time.LocalDateTime; - -/** - * 数据集文件响应DTO - */ -@Getter -@Setter -public class DatasetFileResponse { - /** 文件ID */ - private String id; - /** 文件名 */ - private String fileName; - /** 原始文件名 */ - private String originalName; - /** 文件类型 */ - private String fileType; - /** 文件大小(字节) */ - private Long fileSize; - /** 文件状态 */ - private String status; - /** 文件描述 */ - private String description; - /** 文件路径 */ - private String filePath; - /** 上传时间 */ - private LocalDateTime uploadTime; - /** 最后更新时间 */ - private LocalDateTime lastAccessTime; - /** 上传者 */ - private String uploadedBy; -} +package com.datamate.datamanagement.interfaces.dto; + +import lombok.Getter; +import lombok.Setter; + +import java.time.LocalDateTime; + +/** + * 数据集文件响应DTO + */ +@Getter +@Setter +public class DatasetFileResponse { + /** 文件ID */ + private String id; + /** 文件名 */ + private String fileName; + /** 原始文件名 */ + private String originalName; + /** 文件类型 */ + private String fileType; + /** 文件大小(字节) */ + private Long fileSize; + /** 文件状态 */ + private String status; + /** 文件描述 */ + private String description; + /** 文件路径 */ + private String filePath; + /** 上传时间 */ + private LocalDateTime uploadTime; + /** 最后更新时间 */ + private LocalDateTime lastAccessTime; + /** 上传者 */ + private String uploadedBy; + /** 是否为目录 */ + private Boolean directory; + /** 目录文件数量 */ + private Long fileCount; +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UploadFilesPreRequest.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UploadFilesPreRequest.java index 1bfcc125..9b7ced05 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UploadFilesPreRequest.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UploadFilesPreRequest.java @@ -1,22 +1,25 @@ -package com.datamate.datamanagement.interfaces.dto; - -import jakarta.validation.constraints.Min; -import lombok.Getter; -import lombok.Setter; - -/** - * 切片上传预上传请求 - */ -@Getter -@Setter -public class UploadFilesPreRequest { - /** 是否为压缩包上传 */ - private boolean hasArchive; - - /** 总文件数量 */ - @Min(1) - private int totalFileNum; - - /** 总文件大小 */ - private long totalSize; -} +package com.datamate.datamanagement.interfaces.dto; + +import jakarta.validation.constraints.Min; +import lombok.Getter; +import lombok.Setter; + +/** + * 切片上传预上传请求 + */ +@Getter +@Setter +public class UploadFilesPreRequest { + /** 是否为压缩包上传 */ + private boolean hasArchive; + + /** 总文件数量 */ + @Min(1) + private int totalFileNum; + + /** 总文件大小 */ + private long totalSize; + + /** 目标子目录前缀,例如 "images/",为空表示数据集根目录 */ + private String prefix; +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/rest/DatasetFileController.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/rest/DatasetFileController.java index a3dfecb0..e1b61d5c 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/rest/DatasetFileController.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/rest/DatasetFileController.java @@ -1,165 +1,197 @@ -package com.datamate.datamanagement.interfaces.rest; - -import com.datamate.common.infrastructure.common.IgnoreResponseWrap; -import com.datamate.common.infrastructure.common.Response; -import com.datamate.common.infrastructure.exception.SystemErrorCode; -import com.datamate.common.interfaces.PagedResponse; -import com.datamate.common.interfaces.PagingQuery; -import com.datamate.datamanagement.application.DatasetFileApplicationService; -import com.datamate.datamanagement.domain.model.dataset.DatasetFile; -import com.datamate.datamanagement.interfaces.converter.DatasetConverter; -import com.datamate.datamanagement.interfaces.dto.AddFilesRequest; -import com.datamate.datamanagement.interfaces.dto.CopyFilesRequest; -import com.datamate.datamanagement.interfaces.dto.DatasetFileResponse; -import com.datamate.datamanagement.interfaces.dto.UploadFileRequest; -import com.datamate.datamanagement.interfaces.dto.UploadFilesPreRequest; -import jakarta.servlet.http.HttpServletResponse; -import jakarta.validation.Valid; -import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.core.io.Resource; -import org.springframework.http.HttpHeaders; -import org.springframework.http.HttpStatus; -import org.springframework.http.MediaType; -import org.springframework.http.ResponseEntity; -import org.springframework.web.bind.annotation.*; - -import java.util.List; - -/** - * 数据集文件 REST 控制器(UUID 模式) - */ -@Slf4j -@RestController -@RequestMapping("/data-management/datasets/{datasetId}/files") -public class DatasetFileController { - - private final DatasetFileApplicationService datasetFileApplicationService; - - @Autowired - public DatasetFileController(DatasetFileApplicationService datasetFileApplicationService) { - this.datasetFileApplicationService = datasetFileApplicationService; - } - - @GetMapping - public Response> getDatasetFiles( - @PathVariable("datasetId") String datasetId, - @RequestParam(value = "isWithDirectory", required = false) boolean isWithDirectory, - @RequestParam(value = "page", required = false, defaultValue = "0") Integer page, - @RequestParam(value = "size", required = false, defaultValue = "20") Integer size, - @RequestParam(value = "prefix", required = false, defaultValue = "") String prefix) { - PagingQuery pagingQuery = new PagingQuery(page, size); - PagedResponse filesPage; - if (isWithDirectory) { - filesPage = datasetFileApplicationService.getDatasetFilesWithDirectory(datasetId, prefix, pagingQuery); - } else { - filesPage = datasetFileApplicationService.getDatasetFiles(datasetId, null, null, null, pagingQuery); - } - return Response.ok(filesPage); - } - - @GetMapping("/{fileId}") - public ResponseEntity> getDatasetFileById( - @PathVariable("datasetId") String datasetId, - @PathVariable("fileId") String fileId) { - try { - DatasetFile datasetFile = datasetFileApplicationService.getDatasetFile(datasetId, fileId); - return ResponseEntity.ok(Response.ok(DatasetConverter.INSTANCE.convertToResponse(datasetFile))); - } catch (IllegalArgumentException e) { - return ResponseEntity.status(HttpStatus.NOT_FOUND).body(Response.error(SystemErrorCode.UNKNOWN_ERROR, null)); - } - } - - @DeleteMapping("/{fileId}") - public ResponseEntity> deleteDatasetFile( - @PathVariable("datasetId") String datasetId, - @PathVariable("fileId") String fileId) { - try { - datasetFileApplicationService.deleteDatasetFile(datasetId, fileId); - return ResponseEntity.ok().build(); - } catch (IllegalArgumentException e) { - return ResponseEntity.status(HttpStatus.NOT_FOUND).body(Response.error(SystemErrorCode.UNKNOWN_ERROR, null)); - } - } - - @IgnoreResponseWrap - @GetMapping(value = "/{fileId}/download", produces = MediaType.APPLICATION_OCTET_STREAM_VALUE + ";charset=UTF-8") - public ResponseEntity downloadDatasetFileById(@PathVariable("datasetId") String datasetId, - @PathVariable("fileId") String fileId) { - try { - DatasetFile datasetFile = datasetFileApplicationService.getDatasetFile(datasetId, fileId); - Resource resource = datasetFileApplicationService.downloadFile(datasetId, fileId); - - return ResponseEntity.ok() - .contentType(MediaType.APPLICATION_OCTET_STREAM) - .header(HttpHeaders.CONTENT_DISPOSITION, - "attachment; filename=\"" + datasetFile.getFileName() + "\"") - .body(resource); - } catch (IllegalArgumentException e) { - return ResponseEntity.status(HttpStatus.NOT_FOUND).build(); - } catch (Exception e) { - return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).build(); - } - } - - @IgnoreResponseWrap - @GetMapping(value = "/download", produces = MediaType.APPLICATION_OCTET_STREAM_VALUE) - public void downloadDatasetFileAsZip(@PathVariable("datasetId") String datasetId, HttpServletResponse response) { - datasetFileApplicationService.downloadDatasetFileAsZip(datasetId, response); - } - - /** - * 文件上传请求 - * - * @param request 批量文件上传请求 - * @return 批量上传请求id - */ - @PostMapping("/upload/pre-upload") - public ResponseEntity> preUpload(@PathVariable("datasetId") String datasetId, - @RequestBody @Valid UploadFilesPreRequest request) { - return ResponseEntity.ok(Response.ok(datasetFileApplicationService.preUpload(request, datasetId))); - } - - /** - * 分块上传 - * - * @param uploadFileRequest 上传文件请求 - */ - @PostMapping("/upload/chunk") - public ResponseEntity chunkUpload(@PathVariable("datasetId") String datasetId, - @Valid UploadFileRequest uploadFileRequest) { - log.info("file upload reqId:{}, fileNo:{}, total chunk num:{}, current chunkNo:{}", - uploadFileRequest.getReqId(), uploadFileRequest.getFileNo(), uploadFileRequest.getTotalChunkNum(), - uploadFileRequest.getChunkNo()); - datasetFileApplicationService.chunkUpload(datasetId, uploadFileRequest); - return ResponseEntity.ok().build(); - } - - /** - * 将指定路径中的文件拷贝到数据集目录下 - * - * @param datasetId 数据集ID - * @param req 源文件路径列表 - * @return 数据集文件响应DTO列表 - */ - @PostMapping("/upload/copy") - public List copyFilesToDatasetDir(@PathVariable("datasetId") String datasetId, - @RequestBody @Valid CopyFilesRequest req) { - List datasetFiles = datasetFileApplicationService.copyFilesToDatasetDir(datasetId, req); - return DatasetConverter.INSTANCE.convertToResponseList(datasetFiles); - } - - /** - * 添加文件到数据集(仅创建数据库记录,不执行文件系统操作) - * - * @param datasetId 数据集ID - * @param req 添加文件请求(包含源文件路径列表和softAdd标志) - * @return 数据集文件响应DTO列表 - */ - @PostMapping("/upload/add") - public List addFilesToDataset(@PathVariable("datasetId") String datasetId, - @RequestBody @Valid AddFilesRequest req) { - List datasetFiles = datasetFileApplicationService.addFilesToDataset(datasetId, req); - return DatasetConverter.INSTANCE.convertToResponseList(datasetFiles); - } -} +package com.datamate.datamanagement.interfaces.rest; + +import com.datamate.common.infrastructure.common.IgnoreResponseWrap; +import com.datamate.common.infrastructure.common.Response; +import com.datamate.common.infrastructure.exception.SystemErrorCode; +import com.datamate.common.interfaces.PagedResponse; +import com.datamate.common.interfaces.PagingQuery; +import com.datamate.datamanagement.application.DatasetFileApplicationService; +import com.datamate.datamanagement.domain.model.dataset.DatasetFile; +import com.datamate.datamanagement.interfaces.converter.DatasetConverter; +import com.datamate.datamanagement.interfaces.dto.AddFilesRequest; +import com.datamate.datamanagement.interfaces.dto.CopyFilesRequest; +import com.datamate.datamanagement.interfaces.dto.CreateDirectoryRequest; +import com.datamate.datamanagement.interfaces.dto.DatasetFileResponse; +import com.datamate.datamanagement.interfaces.dto.UploadFileRequest; +import com.datamate.datamanagement.interfaces.dto.UploadFilesPreRequest; +import jakarta.servlet.http.HttpServletResponse; +import jakarta.validation.Valid; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.core.io.Resource; +import org.springframework.http.HttpHeaders; +import org.springframework.http.HttpStatus; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.*; + +import java.util.List; + +/** + * 数据集文件 REST 控制器(UUID 模式) + */ +@Slf4j +@RestController +@RequestMapping("/data-management/datasets/{datasetId}/files") +public class DatasetFileController { + + private final DatasetFileApplicationService datasetFileApplicationService; + + @Autowired + public DatasetFileController(DatasetFileApplicationService datasetFileApplicationService) { + this.datasetFileApplicationService = datasetFileApplicationService; + } + + @GetMapping + public Response> getDatasetFiles( + @PathVariable("datasetId") String datasetId, + @RequestParam(value = "isWithDirectory", required = false) boolean isWithDirectory, + @RequestParam(value = "page", required = false, defaultValue = "0") Integer page, + @RequestParam(value = "size", required = false, defaultValue = "20") Integer size, + @RequestParam(value = "prefix", required = false, defaultValue = "") String prefix) { + PagingQuery pagingQuery = new PagingQuery(page, size); + PagedResponse filesPage; + if (isWithDirectory) { + filesPage = datasetFileApplicationService.getDatasetFilesWithDirectory(datasetId, prefix, pagingQuery); + } else { + filesPage = datasetFileApplicationService.getDatasetFiles(datasetId, null, null, null, pagingQuery); + } + return Response.ok(filesPage); + } + + @GetMapping("/{fileId}") + public ResponseEntity> getDatasetFileById( + @PathVariable("datasetId") String datasetId, + @PathVariable("fileId") String fileId) { + try { + DatasetFile datasetFile = datasetFileApplicationService.getDatasetFile(datasetId, fileId); + return ResponseEntity.ok(Response.ok(DatasetConverter.INSTANCE.convertToResponse(datasetFile))); + } catch (IllegalArgumentException e) { + return ResponseEntity.status(HttpStatus.NOT_FOUND).body(Response.error(SystemErrorCode.UNKNOWN_ERROR, null)); + } + } + + @DeleteMapping("/{fileId}") + public ResponseEntity> deleteDatasetFile( + @PathVariable("datasetId") String datasetId, + @PathVariable("fileId") String fileId) { + try { + datasetFileApplicationService.deleteDatasetFile(datasetId, fileId); + return ResponseEntity.ok().build(); + } catch (IllegalArgumentException e) { + return ResponseEntity.status(HttpStatus.NOT_FOUND).body(Response.error(SystemErrorCode.UNKNOWN_ERROR, null)); + } + } + + @IgnoreResponseWrap + @GetMapping(value = "/{fileId}/download", produces = MediaType.APPLICATION_OCTET_STREAM_VALUE + ";charset=UTF-8") + public ResponseEntity downloadDatasetFileById(@PathVariable("datasetId") String datasetId, + @PathVariable("fileId") String fileId) { + try { + DatasetFile datasetFile = datasetFileApplicationService.getDatasetFile(datasetId, fileId); + Resource resource = datasetFileApplicationService.downloadFile(datasetId, fileId); + + return ResponseEntity.ok() + .contentType(MediaType.APPLICATION_OCTET_STREAM) + .header(HttpHeaders.CONTENT_DISPOSITION, + "attachment; filename=\"" + datasetFile.getFileName() + "\"") + .body(resource); + } catch (IllegalArgumentException e) { + return ResponseEntity.status(HttpStatus.NOT_FOUND).build(); + } catch (Exception e) { + return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).build(); + } + } + + @IgnoreResponseWrap + @GetMapping(value = "/download", produces = MediaType.APPLICATION_OCTET_STREAM_VALUE) + public void downloadDatasetFileAsZip(@PathVariable("datasetId") String datasetId, HttpServletResponse response) { + datasetFileApplicationService.downloadDatasetFileAsZip(datasetId, response); + } + + /** + * 文件上传请求 + * + * @param request 批量文件上传请求 + * @return 批量上传请求id + */ + @PostMapping("/upload/pre-upload") + public ResponseEntity> preUpload(@PathVariable("datasetId") String datasetId, + @RequestBody @Valid UploadFilesPreRequest request) { + return ResponseEntity.ok(Response.ok(datasetFileApplicationService.preUpload(request, datasetId))); + } + + /** + * 分块上传 + * + * @param uploadFileRequest 上传文件请求 + */ + @PostMapping("/upload/chunk") + public ResponseEntity chunkUpload(@PathVariable("datasetId") String datasetId, + @Valid UploadFileRequest uploadFileRequest) { + log.info("file upload reqId:{}, fileNo:{}, total chunk num:{}, current chunkNo:{}", + uploadFileRequest.getReqId(), uploadFileRequest.getFileNo(), uploadFileRequest.getTotalChunkNum(), + uploadFileRequest.getChunkNo()); + datasetFileApplicationService.chunkUpload(datasetId, uploadFileRequest); + return ResponseEntity.ok().build(); + } + + /** + * 将指定路径中的文件拷贝到数据集目录下 + * + * @param datasetId 数据集ID + * @param req 源文件路径列表 + * @return 数据集文件响应DTO列表 + */ + @PostMapping("/upload/copy") + public List copyFilesToDatasetDir(@PathVariable("datasetId") String datasetId, + @RequestBody @Valid CopyFilesRequest req) { + List datasetFiles = datasetFileApplicationService.copyFilesToDatasetDir(datasetId, req); + return DatasetConverter.INSTANCE.convertToResponseList(datasetFiles); + } + + /** + * 添加文件到数据集(仅创建数据库记录,不执行文件系统操作) + * + * @param datasetId 数据集ID + * @param req 添加文件请求(包含源文件路径列表和softAdd标志) + * @return 数据集文件响应DTO列表 + */ + @PostMapping("/upload/add") + public List addFilesToDataset(@PathVariable("datasetId") String datasetId, + @RequestBody @Valid AddFilesRequest req) { + List datasetFiles = datasetFileApplicationService.addFilesToDataset(datasetId, req); + return DatasetConverter.INSTANCE.convertToResponseList(datasetFiles); + } + + /** + * 在数据集下创建子目录 + */ + @PostMapping("/directories") + public ResponseEntity createDirectory(@PathVariable("datasetId") String datasetId, + @RequestBody @Valid CreateDirectoryRequest req) { + datasetFileApplicationService.createDirectory(datasetId, req); + return ResponseEntity.ok().build(); + } + + /** + * 下载目录(压缩为 ZIP) + */ + @IgnoreResponseWrap + @GetMapping(value = "/directories/download", produces = "application/zip") + public void downloadDirectory(@PathVariable("datasetId") String datasetId, + @RequestParam(value = "prefix", required = false, defaultValue = "") String prefix, + HttpServletResponse response) { + datasetFileApplicationService.downloadDirectory(datasetId, prefix, response); + } + + /** + * 删除目录及其所有内容 + */ + @DeleteMapping("/directories") + public ResponseEntity deleteDirectory(@PathVariable("datasetId") String datasetId, + @RequestParam(value = "prefix", required = false, defaultValue = "") String prefix) { + datasetFileApplicationService.deleteDirectory(datasetId, prefix); + return ResponseEntity.ok().build(); + } +} diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/infrastructure/exception/CommonErrorCode.java b/backend/shared/domain-common/src/main/java/com/datamate/common/infrastructure/exception/CommonErrorCode.java index 0e1d6a3e..0e5e9842 100644 --- a/backend/shared/domain-common/src/main/java/com/datamate/common/infrastructure/exception/CommonErrorCode.java +++ b/backend/shared/domain-common/src/main/java/com/datamate/common/infrastructure/exception/CommonErrorCode.java @@ -1,17 +1,18 @@ -package com.datamate.common.infrastructure.exception; - -import lombok.AllArgsConstructor; -import lombok.Getter; - -/** - * CommonErrorCode - * - * @since 2025/12/5 - */ -@Getter -@AllArgsConstructor -public enum CommonErrorCode implements ErrorCode{ - PRE_UPLOAD_REQUEST_NOT_EXIST("common.0101", "预上传请求不存在"); - private final String code; - private final String message; -} +package com.datamate.common.infrastructure.exception; + +import lombok.AllArgsConstructor; +import lombok.Getter; + +/** + * CommonErrorCode + * + * @since 2025/12/5 + */ +@Getter +@AllArgsConstructor +public enum CommonErrorCode implements ErrorCode{ + PARAM_ERROR("common.0001", "参数错误"), + PRE_UPLOAD_REQUEST_NOT_EXIST("common.0101", "预上传请求不存在"); + private final String code; + private final String message; +} diff --git a/frontend/src/components/CardView.tsx b/frontend/src/components/CardView.tsx index 9e044aad..ace44d24 100644 --- a/frontend/src/components/CardView.tsx +++ b/frontend/src/components/CardView.tsx @@ -199,15 +199,11 @@ function CardView(props: CardViewProps) { ? "" : "bg-gradient-to-br from-sky-300 to-blue-500 text-white" }`} - style={{ - ...(item?.iconColor + style={ + item?.iconColor ? { backgroundColor: item.iconColor } - : {}), - backgroundImage: - "linear-gradient(180deg, rgba(255,255,255,0.35), rgba(255,255,255,0.05))", - boxShadow: - "inset 0 0 0 1px rgba(255,255,255,0.25)", - }} + : {} + } >
{item?.icon}
diff --git a/frontend/src/components/business/DatasetFileTransfer.tsx b/frontend/src/components/business/DatasetFileTransfer.tsx index 92284cb1..f942f857 100644 --- a/frontend/src/components/business/DatasetFileTransfer.tsx +++ b/frontend/src/components/business/DatasetFileTransfer.tsx @@ -1,331 +1,406 @@ -import React, { useCallback, useEffect } from "react"; -import { Button, Input, Table } from "antd"; -import { RightOutlined } from "@ant-design/icons"; -import { mapDataset } from "@/pages/DataManagement/dataset.const"; -import { - Dataset, - DatasetFile, - DatasetType, -} from "@/pages/DataManagement/dataset.model"; -import { - queryDatasetFilesUsingGet, - queryDatasetsUsingGet, -} from "@/pages/DataManagement/dataset.api"; -import { formatBytes } from "@/utils/unit"; -import { useDebouncedEffect } from "@/hooks/useDebouncedEffect"; - -interface DatasetFileTransferProps - extends React.HTMLAttributes { - open: boolean; - selectedFilesMap: { [key: string]: DatasetFile }; - onSelectedFilesChange: (filesMap: { [key: string]: DatasetFile }) => void; - onDatasetSelect?: (dataset: Dataset | null) => void; -} - -const fileCols = [ - { - title: "所属数据集", - dataIndex: "datasetName", - key: "datasetName", - ellipsis: true, - }, - { - title: "文件名", - dataIndex: "fileName", - key: "fileName", - ellipsis: true, - }, - { - title: "大小", - dataIndex: "fileSize", - key: "fileSize", - ellipsis: true, - render: formatBytes, - }, -]; - -// Customize Table Transfer -const DatasetFileTransfer: React.FC = ({ - open, - selectedFilesMap, - onSelectedFilesChange, - onDatasetSelect, - ...props -}) => { - const [datasets, setDatasets] = React.useState([]); - const [datasetSearch, setDatasetSearch] = React.useState(""); - const [datasetPagination, setDatasetPagination] = React.useState<{ - current: number; - pageSize: number; - total: number; - }>({ current: 1, pageSize: 10, total: 0 }); - - const [files, setFiles] = React.useState([]); - const [filesSearch, setFilesSearch] = React.useState(""); - const [filesPagination, setFilesPagination] = React.useState<{ - current: number; - pageSize: number; - total: number; - }>({ current: 1, pageSize: 10, total: 0 }); - - const [showFiles, setShowFiles] = React.useState(false); - const [selectedDataset, setSelectedDataset] = React.useState( - null - ); - const [datasetSelections, setDatasetSelections] = React.useState( - [] - ); - - const fetchDatasets = async () => { - const { data } = await queryDatasetsUsingGet({ - // Ant Design Table pagination.current is 1-based; ensure backend also receives 1-based value - page: datasetPagination.current, - size: datasetPagination.pageSize, - keyword: datasetSearch, - type: DatasetType.TEXT, - }); - setDatasets(data.content.map(mapDataset) || []); - setDatasetPagination((prev) => ({ - ...prev, - total: data.totalElements, - })); - }; - - useDebouncedEffect( - () => { - fetchDatasets(); - }, - [datasetSearch, datasetPagination.pageSize, datasetPagination.current], - 300 - ); - - const fetchFiles = useCallback( - async ( - options?: Partial<{ page: number; pageSize: number; keyword: string }> - ) => { - if (!selectedDataset) return; - const page = options?.page ?? filesPagination.current; - const pageSize = options?.pageSize ?? filesPagination.pageSize; - const keyword = options?.keyword ?? filesSearch; - - const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, { - page, - size: pageSize, - keyword, - }); - setFiles( - (data.content || []).map((item: DatasetFile) => ({ - ...item, - key: item.id, - datasetName: selectedDataset.name, - })) - ); - setFilesPagination((prev) => ({ - ...prev, - current: page, - pageSize, - total: data.totalElements, - })); - }, - [selectedDataset, filesPagination.current, filesPagination.pageSize, filesSearch] - ); - - useEffect(() => { - // 当数据集变化时,重置文件分页并拉取第一页文件,避免额外的循环请求 - if (selectedDataset) { - setFilesPagination({ current: 1, pageSize: 10, total: 0 }); - fetchFiles({ page: 1, pageSize: 10 }).catch(() => {}); - } else { - setFiles([]); - setFilesPagination({ current: 1, pageSize: 10, total: 0 }); - } - // 只在 selectedDataset 变化时触发 - // eslint-disable-next-line react-hooks/exhaustive-deps - }, [selectedDataset]); - - useEffect(() => { - onDatasetSelect?.(selectedDataset); - }, [selectedDataset, onDatasetSelect]); - - const toggleSelectFile = (record: DatasetFile) => { - if (!selectedFilesMap[record.id]) { - onSelectedFilesChange({ - ...selectedFilesMap, - [record.id]: record, - }); - } else { - const newSelectedFiles = { ...selectedFilesMap }; - delete newSelectedFiles[record.id]; - onSelectedFilesChange(newSelectedFiles); - } - }; - - useEffect(() => { - if (!open) { - // 重置状态 - setDatasets([]); - setDatasetSearch(""); - setDatasetPagination({ current: 1, pageSize: 10, total: 0 }); - setFiles([]); - setFilesSearch(""); - setFilesPagination({ current: 1, pageSize: 10, total: 0 }); - setShowFiles(false); - setSelectedDataset(null); - setDatasetSelections([]); - onDatasetSelect?.(null); - } - }, [open, onDatasetSelect]); - - const datasetCols = [ - { - title: "数据集名称", - dataIndex: "name", - key: "name", - ellipsis: true, - }, - { - title: "文件数", - dataIndex: "fileCount", - key: "fileCount", - ellipsis: true, - }, - { - title: "大小", - dataIndex: "totalSize", - key: "totalSize", - ellipsis: true, - render: formatBytes, - }, - ]; - - return ( -
-
-
-
选择数据集
-
- setDatasetSearch(e.target.value)} - /> -
- - selectedDataset?.id === record.id ? "bg-blue-100" : "" - } - onRow={(record: Dataset) => ({ - onClick: () => { - setSelectedDataset(record); - if (!datasetSelections.find((d) => d.id === record.id)) { - setDatasetSelections([...datasetSelections, record]); - } else { - setDatasetSelections( - datasetSelections.filter((d) => d.id !== record.id) - ); - } - }, - })} - dataSource={datasets} - columns={datasetCols} - pagination={{ - ...datasetPagination, - onChange: (page, pageSize) => - setDatasetPagination({ - current: page, - pageSize: pageSize || datasetPagination.pageSize, - total: datasetPagination.total, - }), - }} - /> - - -
-
选择文件
-
- setFilesSearch(e.target.value)} - /> -
-
{ - const nextPageSize = pageSize || filesPagination.pageSize; - setFilesPagination((prev) => ({ - ...prev, - current: page, - pageSize: nextPageSize, - })); - fetchFiles({ page, pageSize: nextPageSize }).catch(() => {}); - }, - }} - onRow={(record: DatasetFile) => ({ - onClick: () => toggleSelectFile(record), - })} - rowSelection={{ - type: "checkbox", - selectedRowKeys: Object.keys(selectedFilesMap), - - // 单选 - onSelect: (record: DatasetFile) => { - toggleSelectFile(record); - }, - - // 全选 - onSelectAll: (selected, selectedRows: DatasetFile[]) => { - if (selected) { - // ✔ 全选 -> 将 files 列表全部加入 selectedFilesMap - const newMap: Record = { ...selectedFilesMap }; - selectedRows.forEach((f) => { - newMap[f.id] = f; - }); - onSelectedFilesChange(newMap); - } else { - // ✘ 取消全选 -> 清空 map - const newMap = { ...selectedFilesMap }; - Object.keys(newMap).forEach((id) => { - if (files.some((f) => String(f.id) === id)) { - // 仅移除当前页对应文件 - delete newMap[id]; - } - }); - onSelectedFilesChange(newMap); - } - }, - - getCheckboxProps: (record: DatasetFile) => ({ - name: record.fileName, - }), - }} - /> - - - -
- - - ); -}; - -export default DatasetFileTransfer; +import React, { useCallback, useEffect } from "react"; +import { Button, Input, Table, message } from "antd"; +import { RightOutlined } from "@ant-design/icons"; +import { mapDataset } from "@/pages/DataManagement/dataset.const"; +import { + Dataset, + DatasetFile, + DatasetType, +} from "@/pages/DataManagement/dataset.model"; +import { + queryDatasetFilesUsingGet, + queryDatasetsUsingGet, +} from "@/pages/DataManagement/dataset.api"; +import { formatBytes } from "@/utils/unit"; +import { useDebouncedEffect } from "@/hooks/useDebouncedEffect"; + +interface DatasetFileTransferProps + extends React.HTMLAttributes { + open: boolean; + selectedFilesMap: { [key: string]: DatasetFile }; + onSelectedFilesChange: (filesMap: { [key: string]: DatasetFile }) => void; + onDatasetSelect?: (dataset: Dataset | null) => void; + datasetTypeFilter?: DatasetType; +} + +const fileCols = [ + { + title: "所属数据集", + dataIndex: "datasetName", + key: "datasetName", + ellipsis: true, + }, + { + title: "文件名", + dataIndex: "fileName", + key: "fileName", + ellipsis: true, + }, + { + title: "大小", + dataIndex: "fileSize", + key: "fileSize", + ellipsis: true, + render: formatBytes, + }, +]; + +// Customize Table Transfer +const DatasetFileTransfer: React.FC = ({ + open, + selectedFilesMap, + onSelectedFilesChange, + onDatasetSelect, + datasetTypeFilter = DatasetType.TEXT, + ...props +}) => { + const [datasets, setDatasets] = React.useState([]); + const [datasetSearch, setDatasetSearch] = React.useState(""); + const [datasetPagination, setDatasetPagination] = React.useState<{ + current: number; + pageSize: number; + total: number; + }>({ current: 1, pageSize: 10, total: 0 }); + + const [files, setFiles] = React.useState([]); + const [filesSearch, setFilesSearch] = React.useState(""); + const [filesPagination, setFilesPagination] = React.useState<{ + current: number; + pageSize: number; + total: number; + }>({ current: 1, pageSize: 10, total: 0 }); + + const [showFiles, setShowFiles] = React.useState(false); + const [selectedDataset, setSelectedDataset] = React.useState( + null + ); + const [datasetSelections, setDatasetSelections] = React.useState( + [] + ); + const [selectingAll, setSelectingAll] = React.useState(false); + + const fetchDatasets = async () => { + const { data } = await queryDatasetsUsingGet({ + // Ant Design Table pagination.current is 1-based; ensure backend also receives 1-based value + page: datasetPagination.current, + size: datasetPagination.pageSize, + keyword: datasetSearch, + type: datasetTypeFilter, + }); + setDatasets(data.content.map(mapDataset) || []); + setDatasetPagination((prev) => ({ + ...prev, + total: data.totalElements, + })); + }; + + useDebouncedEffect( + () => { + fetchDatasets(); + }, + [datasetSearch, datasetPagination.pageSize, datasetPagination.current], + 300 + ); + + const fetchFiles = useCallback( + async ( + options?: Partial<{ page: number; pageSize: number; keyword: string }> + ) => { + if (!selectedDataset) return; + const page = options?.page ?? filesPagination.current; + const pageSize = options?.pageSize ?? filesPagination.pageSize; + const keyword = options?.keyword ?? filesSearch; + + const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, { + page, + size: pageSize, + keyword, + }); + setFiles( + (data.content || []).map((item: DatasetFile) => ({ + ...item, + id: item.id, + key: String(item.id), // rowKey 使用字符串,确保与 selectedRowKeys 类型一致 + datasetName: selectedDataset.name, + })) + ); + setFilesPagination((prev) => ({ + ...prev, + current: page, + pageSize, + total: data.totalElements, + })); + }, + [selectedDataset, filesPagination.current, filesPagination.pageSize, filesSearch] + ); + + useEffect(() => { + // 当数据集变化时,重置文件分页并拉取第一页文件,避免额外的循环请求 + if (selectedDataset) { + setFilesPagination({ current: 1, pageSize: 10, total: 0 }); + // 后端 page 参数为 0-based,这里传 0 获取第一页 + fetchFiles({ page: 0, pageSize: 10 }).catch(() => {}); + } else { + setFiles([]); + setFilesPagination({ current: 1, pageSize: 10, total: 0 }); + } + // 只在 selectedDataset 变化时触发 + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [selectedDataset]); + + useEffect(() => { + onDatasetSelect?.(selectedDataset); + }, [selectedDataset, onDatasetSelect]); + + const handleSelectAllInDataset = useCallback(async () => { + if (!selectedDataset) { + message.warning("请先选择一个数据集"); + return; + } + + try { + setSelectingAll(true); + + const pageSize = 1000; // 分批拉取,避免后端单页限制 + let page = 0; // 后端 page 参数为 0-based,从 0 开始 + let total = 0; + const allFiles: DatasetFile[] = []; + + while (true) { + const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, { + page, + size: pageSize, + }); + + const content: DatasetFile[] = (data.content || []).map( + (item: DatasetFile) => ({ + ...item, + key: item.id, + datasetName: selectedDataset.name, + }), + ); + + if (!content.length) { + break; + } + + allFiles.push(...content); + // 优先用后端的 totalElements,否则用当前累积数 + total = typeof data.totalElements === "number" ? data.totalElements : allFiles.length; + + // 如果这一页数量小于 pageSize,说明已经拿完;否则继续下一页 + if (content.length < pageSize) { + break; + } + + page += 1; + } + + const newMap: { [key: string]: DatasetFile } = { ...selectedFilesMap }; + allFiles.forEach((file) => { + if (file && file.id != null) { + newMap[String(file.id)] = file; + } + }); + + onSelectedFilesChange(newMap); + + const count = total || allFiles.length; + if (count > 0) { + message.success(`已选中当前数据集的全部 ${count} 个文件`); + } else { + message.info("当前数据集下没有可选文件"); + } + } catch (error) { + console.error("Failed to select all files in dataset", error); + message.error("全选整个数据集失败,请稍后重试"); + } finally { + setSelectingAll(false); + } + }, [selectedDataset, selectedFilesMap, onSelectedFilesChange]); + + const toggleSelectFile = (record: DatasetFile) => { + if (!selectedFilesMap[record.id]) { + onSelectedFilesChange({ + ...selectedFilesMap, + [record.id]: record, + }); + } else { + const newSelectedFiles = { ...selectedFilesMap }; + delete newSelectedFiles[record.id]; + onSelectedFilesChange(newSelectedFiles); + } + }; + + useEffect(() => { + if (!open) { + // 重置状态 + setDatasets([]); + setDatasetSearch(""); + setDatasetPagination({ current: 1, pageSize: 10, total: 0 }); + setFiles([]); + setFilesSearch(""); + setFilesPagination({ current: 1, pageSize: 10, total: 0 }); + setShowFiles(false); + setSelectedDataset(null); + setDatasetSelections([]); + onDatasetSelect?.(null); + } + }, [open, onDatasetSelect]); + + const datasetCols = [ + { + title: "数据集名称", + dataIndex: "name", + key: "name", + ellipsis: true, + }, + { + title: "文件数", + dataIndex: "fileCount", + key: "fileCount", + ellipsis: true, + }, + { + title: "大小", + dataIndex: "totalSize", + key: "totalSize", + ellipsis: true, + render: formatBytes, + }, + ]; + + return ( +
+
+
+
选择数据集
+
+ setDatasetSearch(e.target.value)} + /> +
+
+ selectedDataset?.id === record.id ? "bg-blue-100" : "" + } + onRow={(record: Dataset) => ({ + onClick: () => { + setSelectedDataset(record); + if (!datasetSelections.find((d) => d.id === record.id)) { + setDatasetSelections([...datasetSelections, record]); + } else { + setDatasetSelections( + datasetSelections.filter((d) => d.id !== record.id) + ); + } + }, + })} + dataSource={datasets} + columns={datasetCols} + pagination={{ + ...datasetPagination, + onChange: (page, pageSize) => + setDatasetPagination({ + current: page, + pageSize: pageSize || datasetPagination.pageSize, + total: datasetPagination.total, + }), + }} + /> + + +
+
+ 选择文件 + +
+
+ setFilesSearch(e.target.value)} + /> +
+
String(record.id)} + size="small" + dataSource={files} + columns={fileCols.slice(1, fileCols.length)} + pagination={{ + ...filesPagination, + onChange: (page, pageSize) => { + const nextPageSize = pageSize || filesPagination.pageSize; + setFilesPagination((prev) => ({ + ...prev, + current: page, + pageSize: nextPageSize, + })); + // 前端分页是 1-based,后端是 0-based,所以这里传 page - 1 + fetchFiles({ page: page - 1, pageSize: nextPageSize }).catch(() => {}); + }, + }} + onRow={(record: DatasetFile) => ({ + onClick: () => toggleSelectFile(record), + })} + rowSelection={{ + type: "checkbox", + selectedRowKeys: Object.keys(selectedFilesMap), + preserveSelectedRowKeys: true, + + // 单选 + onSelect: (record: DatasetFile) => { + toggleSelectFile(record); + }, + + // 全选 - 改为全选整个数据集而不是当前页 + onSelectAll: (selected, selectedRows: DatasetFile[]) => { + if (selected) { + // 点击表头“全选”时,改为一键全选当前数据集的全部文件 + // 而不是只选中当前页 + handleSelectAllInDataset(); + } else { + // 取消表头“全选”时,清空当前已选文件 + onSelectedFilesChange({}); + } + }, + + getCheckboxProps: (record: DatasetFile) => ({ + name: record.fileName, + }), + }} + /> + + + +
+ + + ); +}; + +export default DatasetFileTransfer; diff --git a/frontend/src/hooks/useSliceUpload.tsx b/frontend/src/hooks/useSliceUpload.tsx index 44a3dca6..56b95178 100644 --- a/frontend/src/hooks/useSliceUpload.tsx +++ b/frontend/src/hooks/useSliceUpload.tsx @@ -1,187 +1,198 @@ -import { TaskItem } from "@/pages/DataManagement/dataset.model"; -import { calculateSHA256, checkIsFilesExist } from "@/utils/file.util"; -import { App } from "antd"; -import { useRef, useState } from "react"; - -export function useFileSliceUpload( - { - preUpload, - uploadChunk, - cancelUpload, - }: { - preUpload: (id: string, params: any) => Promise<{ data: number }>; - uploadChunk: (id: string, formData: FormData, config: any) => Promise; - cancelUpload: ((reqId: number) => Promise) | null; - }, - showTaskCenter = true // 上传时是否显示任务中心 -) { - const { message } = App.useApp(); - const [taskList, setTaskList] = useState([]); - const taskListRef = useRef([]); // 用于固定任务顺序 - - const createTask = (detail: any = {}) => { - const { dataset } = detail; - const title = `上传数据集: ${dataset.name} `; - const controller = new AbortController(); - const task: TaskItem = { - key: dataset.id, - title, - percent: 0, - reqId: -1, - controller, - size: 0, - updateEvent: detail.updateEvent, - hasArchive: detail.hasArchive, - }; - taskListRef.current = [task, ...taskListRef.current]; - - setTaskList(taskListRef.current); - return task; - }; - - const updateTaskList = (task: TaskItem) => { - taskListRef.current = taskListRef.current.map((item) => - item.key === task.key ? task : item - ); - setTaskList(taskListRef.current); - }; - - const removeTask = (task: TaskItem) => { - const { key } = task; - taskListRef.current = taskListRef.current.filter( - (item) => item.key !== key - ); - setTaskList(taskListRef.current); - if (task.isCancel && task.cancelFn) { - task.cancelFn(); - } - if (task.updateEvent) window.dispatchEvent(new Event(task.updateEvent)); - if (showTaskCenter) { - window.dispatchEvent( - new CustomEvent("show:task-popover", { detail: { show: false } }) - ); - } - }; - - async function buildFormData({ file, reqId, i, j }) { - const formData = new FormData(); - const { slices, name, size } = file; - const checkSum = await calculateSHA256(slices[j]); - formData.append("file", slices[j]); - formData.append("reqId", reqId.toString()); - formData.append("fileNo", (i + 1).toString()); - formData.append("chunkNo", (j + 1).toString()); - formData.append("fileName", name); - formData.append("fileSize", size.toString()); - formData.append("totalChunkNum", slices.length.toString()); - formData.append("checkSumHex", checkSum); - return formData; - } - - async function uploadSlice(task: TaskItem, fileInfo) { - if (!task) { - return; - } - const { reqId, key } = task; - const { loaded, i, j, files, totalSize } = fileInfo; - const formData = await buildFormData({ - file: files[i], - i, - j, - reqId, - }); - - let newTask = { ...task }; - await uploadChunk(key, formData, { - onUploadProgress: (e) => { - const loadedSize = loaded + e.loaded; - const curPercent = Number((loadedSize / totalSize) * 100).toFixed(2); - - newTask = { - ...newTask, - ...taskListRef.current.find((item) => item.key === key), - size: loadedSize, - percent: curPercent >= 100 ? 99.99 : curPercent, - }; - updateTaskList(newTask); - }, - }); - } - - async function uploadFile({ task, files, totalSize }) { - const { data: reqId } = await preUpload(task.key, { - totalFileNum: files.length, - totalSize, - datasetId: task.key, - hasArchive: task.hasArchive, - }); - - const newTask: TaskItem = { - ...task, - reqId, - isCancel: false, - cancelFn: () => { - task.controller.abort(); - cancelUpload?.(reqId); - if (task.updateEvent) window.dispatchEvent(new Event(task.updateEvent)); - }, - }; - updateTaskList(newTask); - if (showTaskCenter) { - window.dispatchEvent( - new CustomEvent("show:task-popover", { detail: { show: true } }) - ); - } - // // 更新数据状态 - if (task.updateEvent) window.dispatchEvent(new Event(task.updateEvent)); - - let loaded = 0; - for (let i = 0; i < files.length; i++) { - const { slices } = files[i]; - for (let j = 0; j < slices.length; j++) { - await uploadSlice(newTask, { - loaded, - i, - j, - files, - totalSize, - }); - loaded += slices[j].size; - } - } - removeTask(newTask); - } - - const handleUpload = async ({ task, files }) => { - const isErrorFile = await checkIsFilesExist(files); - if (isErrorFile) { - message.error("文件被修改或删除,请重新选择文件上传"); - removeTask({ - ...task, - isCancel: false, - ...taskListRef.current.find((item) => item.key === task.key), - }); - return; - } - - try { - const totalSize = files.reduce((acc, file) => acc + file.size, 0); - await uploadFile({ task, files, totalSize }); - } catch (err) { - console.error(err); - message.error("文件上传失败,请稍后重试"); - removeTask({ - ...task, - isCancel: true, - ...taskListRef.current.find((item) => item.key === task.key), - }); - } - }; - - return { - taskList, - createTask, - removeTask, - handleUpload, - }; -} +import { TaskItem } from "@/pages/DataManagement/dataset.model"; +import { calculateSHA256, checkIsFilesExist } from "@/utils/file.util"; +import { App } from "antd"; +import { useRef, useState } from "react"; + +export function useFileSliceUpload( + { + preUpload, + uploadChunk, + cancelUpload, + }: { + preUpload: (id: string, params: any) => Promise<{ data: number }>; + uploadChunk: (id: string, formData: FormData, config: any) => Promise; + cancelUpload: ((reqId: number) => Promise) | null; + }, + showTaskCenter = true // 上传时是否显示任务中心 +) { + const { message } = App.useApp(); + const [taskList, setTaskList] = useState([]); + const taskListRef = useRef([]); // 用于固定任务顺序 + + const createTask = (detail: any = {}) => { + const { dataset } = detail; + const title = `上传数据集: ${dataset.name} `; + const controller = new AbortController(); + const task: TaskItem = { + key: dataset.id, + title, + percent: 0, + reqId: -1, + controller, + size: 0, + updateEvent: detail.updateEvent, + hasArchive: detail.hasArchive, + prefix: detail.prefix, + }; + taskListRef.current = [task, ...taskListRef.current]; + + setTaskList(taskListRef.current); + return task; + }; + + const updateTaskList = (task: TaskItem) => { + taskListRef.current = taskListRef.current.map((item) => + item.key === task.key ? task : item + ); + setTaskList(taskListRef.current); + }; + + const removeTask = (task: TaskItem) => { + const { key } = task; + taskListRef.current = taskListRef.current.filter( + (item) => item.key !== key + ); + setTaskList(taskListRef.current); + if (task.isCancel && task.cancelFn) { + task.cancelFn(); + } + if (task.updateEvent) { + // 携带前缀信息,便于刷新后仍停留在当前目录 + window.dispatchEvent( + new CustomEvent(task.updateEvent, { + detail: { prefix: (task as any).prefix }, + }) + ); + } + if (showTaskCenter) { + window.dispatchEvent( + new CustomEvent("show:task-popover", { detail: { show: false } }) + ); + } + }; + + async function buildFormData({ file, reqId, i, j }) { + const formData = new FormData(); + const { slices, name, size } = file; + const checkSum = await calculateSHA256(slices[j]); + formData.append("file", slices[j]); + formData.append("reqId", reqId.toString()); + formData.append("fileNo", (i + 1).toString()); + formData.append("chunkNo", (j + 1).toString()); + formData.append("fileName", name); + formData.append("fileSize", size.toString()); + formData.append("totalChunkNum", slices.length.toString()); + formData.append("checkSumHex", checkSum); + return formData; + } + + async function uploadSlice(task: TaskItem, fileInfo) { + if (!task) { + return; + } + const { reqId, key } = task; + const { loaded, i, j, files, totalSize } = fileInfo; + const formData = await buildFormData({ + file: files[i], + i, + j, + reqId, + }); + + let newTask = { ...task }; + await uploadChunk(key, formData, { + onUploadProgress: (e) => { + const loadedSize = loaded + e.loaded; + const curPercent = Number((loadedSize / totalSize) * 100).toFixed(2); + + newTask = { + ...newTask, + ...taskListRef.current.find((item) => item.key === key), + size: loadedSize, + percent: curPercent >= 100 ? 99.99 : curPercent, + }; + updateTaskList(newTask); + }, + }); + } + + async function uploadFile({ task, files, totalSize }) { + console.log('[useSliceUpload] Calling preUpload with prefix:', task.prefix); + const { data: reqId } = await preUpload(task.key, { + totalFileNum: files.length, + totalSize, + datasetId: task.key, + hasArchive: task.hasArchive, + prefix: task.prefix, + }); + console.log('[useSliceUpload] PreUpload response reqId:', reqId); + + const newTask: TaskItem = { + ...task, + reqId, + isCancel: false, + cancelFn: () => { + task.controller.abort(); + cancelUpload?.(reqId); + if (task.updateEvent) window.dispatchEvent(new Event(task.updateEvent)); + }, + }; + updateTaskList(newTask); + if (showTaskCenter) { + window.dispatchEvent( + new CustomEvent("show:task-popover", { detail: { show: true } }) + ); + } + // // 更新数据状态 + if (task.updateEvent) window.dispatchEvent(new Event(task.updateEvent)); + + let loaded = 0; + for (let i = 0; i < files.length; i++) { + const { slices } = files[i]; + for (let j = 0; j < slices.length; j++) { + await uploadSlice(newTask, { + loaded, + i, + j, + files, + totalSize, + }); + loaded += slices[j].size; + } + } + removeTask(newTask); + } + + const handleUpload = async ({ task, files }) => { + const isErrorFile = await checkIsFilesExist(files); + if (isErrorFile) { + message.error("文件被修改或删除,请重新选择文件上传"); + removeTask({ + ...task, + isCancel: false, + ...taskListRef.current.find((item) => item.key === task.key), + }); + return; + } + + try { + const totalSize = files.reduce((acc, file) => acc + file.size, 0); + await uploadFile({ task, files, totalSize }); + } catch (err) { + console.error(err); + message.error("文件上传失败,请稍后重试"); + removeTask({ + ...task, + isCancel: true, + ...taskListRef.current.find((item) => item.key === task.key), + }); + } + }; + + return { + taskList, + createTask, + removeTask, + handleUpload, + }; +} diff --git a/frontend/src/pages/DataAnnotation/AutoAnnotation/AutoAnnotation.tsx b/frontend/src/pages/DataAnnotation/AutoAnnotation/AutoAnnotation.tsx new file mode 100644 index 00000000..deedd54e --- /dev/null +++ b/frontend/src/pages/DataAnnotation/AutoAnnotation/AutoAnnotation.tsx @@ -0,0 +1,302 @@ +import { useState, useEffect } from "react"; +import { Card, Button, Table, message, Modal, Tag, Progress, Space, Tooltip } from "antd"; +import { + PlusOutlined, + DeleteOutlined, + DownloadOutlined, + ReloadOutlined, + EyeOutlined, +} from "@ant-design/icons"; +import type { ColumnType } from "antd/es/table"; +import type { AutoAnnotationTask, AutoAnnotationStatus } from "../annotation.model"; +import { + queryAutoAnnotationTasksUsingGet, + deleteAutoAnnotationTaskByIdUsingDelete, + downloadAutoAnnotationResultUsingGet, +} from "../annotation.api"; +import CreateAutoAnnotationDialog from "./components/CreateAutoAnnotationDialog"; + +const STATUS_COLORS: Record = { + pending: "default", + running: "processing", + completed: "success", + failed: "error", + cancelled: "default", +}; + +const STATUS_LABELS: Record = { + pending: "等待中", + running: "处理中", + completed: "已完成", + failed: "失败", + cancelled: "已取消", +}; + +const MODEL_SIZE_LABELS: Record = { + n: "YOLOv8n (最快)", + s: "YOLOv8s", + m: "YOLOv8m", + l: "YOLOv8l (推荐)", + x: "YOLOv8x (最精确)", +}; + +export default function AutoAnnotation() { + const [loading, setLoading] = useState(false); + const [tasks, setTasks] = useState([]); + const [showCreateDialog, setShowCreateDialog] = useState(false); + const [selectedRowKeys, setSelectedRowKeys] = useState([]); + + useEffect(() => { + fetchTasks(); + const interval = setInterval(() => { + fetchTasks(true); + }, 3000); + return () => clearInterval(interval); + }, []); + + const fetchTasks = async (silent = false) => { + if (!silent) setLoading(true); + try { + const response = await queryAutoAnnotationTasksUsingGet(); + setTasks(response.data || response || []); + } catch (error) { + console.error("Failed to fetch auto annotation tasks:", error); + if (!silent) message.error("获取任务列表失败"); + } finally { + if (!silent) setLoading(false); + } + }; + + const handleDelete = (task: AutoAnnotationTask) => { + Modal.confirm({ + title: `确认删除自动标注任务「${task.name}」吗?`, + content: "删除任务后,已生成的标注结果不会被删除。", + okText: "删除", + okType: "danger", + cancelText: "取消", + onOk: async () => { + try { + await deleteAutoAnnotationTaskByIdUsingDelete(task.id); + message.success("任务删除成功"); + fetchTasks(); + setSelectedRowKeys((keys) => keys.filter((k) => k !== task.id)); + } catch (error) { + console.error(error); + message.error("删除失败,请稍后重试"); + } + }, + }); + }; + + const handleDownload = async (task: AutoAnnotationTask) => { + try { + message.loading("正在准备下载...", 0); + await downloadAutoAnnotationResultUsingGet(task.id); + message.destroy(); + message.success("下载已开始"); + } catch (error) { + console.error(error); + message.destroy(); + message.error("下载失败"); + } + }; + + const handleViewResult = (task: AutoAnnotationTask) => { + if (task.outputPath) { + Modal.info({ + title: "标注结果路径", + content: ( +
+

输出路径:{task.outputPath}

+

检测对象数:{task.detectedObjects}

+

+ 处理图片数:{task.processedImages} / {task.totalImages} +

+
+ ), + }); + } + }; + + const columns: ColumnType[] = [ + { title: "任务名称", dataIndex: "name", key: "name", width: 200 }, + { + title: "数据集", + dataIndex: "datasetName", + key: "datasetName", + width: 220, + render: (_: any, record: AutoAnnotationTask) => { + const list = + record.sourceDatasets && record.sourceDatasets.length > 0 + ? record.sourceDatasets + : record.datasetName + ? [record.datasetName] + : []; + + if (list.length === 0) return "-"; + + const text = list.join(","); + return ( + + {text} + + ); + }, + }, + { + title: "模型", + dataIndex: ["config", "modelSize"], + key: "modelSize", + width: 120, + render: (size: string) => MODEL_SIZE_LABELS[size] || size, + }, + { + title: "置信度", + dataIndex: ["config", "confThreshold"], + key: "confThreshold", + width: 100, + render: (threshold: number) => `${(threshold * 100).toFixed(0)}%`, + }, + { + title: "目标类别", + dataIndex: ["config", "targetClasses"], + key: "targetClasses", + width: 120, + render: (classes: number[]) => ( + 0 ? classes.join(", ") : "全部类别"} + > + + {classes.length > 0 + ? `${classes.length} 个类别` + : "全部类别"} + + + ), + }, + { + title: "状态", + dataIndex: "status", + key: "status", + width: 100, + render: (status: AutoAnnotationStatus) => ( + {STATUS_LABELS[status]} + ), + }, + { + title: "进度", + dataIndex: "progress", + key: "progress", + width: 150, + render: (progress: number, record: AutoAnnotationTask) => ( +
+ +
+ {record.processedImages} / {record.totalImages} +
+
+ ), + }, + { + title: "检测对象数", + dataIndex: "detectedObjects", + key: "detectedObjects", + width: 100, + render: (count: number) => count.toLocaleString(), + }, + { + title: "创建时间", + dataIndex: "createdAt", + key: "createdAt", + width: 150, + render: (time: string) => new Date(time).toLocaleString(), + }, + { + title: "操作", + key: "actions", + width: 180, + fixed: "right", + render: (_: any, record: AutoAnnotationTask) => ( + + {record.status === "completed" && ( + <> + + + + + } + > +
setSelectedRowKeys(keys as string[]), + }} + pagination={{ pageSize: 10 }} + scroll={{ x: 1000 }} + /> + + + setShowCreateDialog(false)} + onSuccess={() => { + setShowCreateDialog(false); + fetchTasks(); + }} + /> + + ); +} \ No newline at end of file diff --git a/frontend/src/pages/DataAnnotation/AutoAnnotation/components/CreateAutoAnnotationDialog.tsx b/frontend/src/pages/DataAnnotation/AutoAnnotation/components/CreateAutoAnnotationDialog.tsx new file mode 100644 index 00000000..2e9adbec --- /dev/null +++ b/frontend/src/pages/DataAnnotation/AutoAnnotation/components/CreateAutoAnnotationDialog.tsx @@ -0,0 +1,286 @@ +import { useState, useEffect } from "react"; +import { Modal, Form, Input, Select, Slider, message, Checkbox } from "antd"; +import { createAutoAnnotationTaskUsingPost } from "../../annotation.api"; +import { queryDatasetsUsingGet } from "@/pages/DataManagement/dataset.api"; +import { mapDataset } from "@/pages/DataManagement/dataset.const"; +import { DatasetType, type DatasetFile, type Dataset } from "@/pages/DataManagement/dataset.model"; +import DatasetFileTransfer from "@/components/business/DatasetFileTransfer"; + +const { Option } = Select; + +interface CreateAutoAnnotationDialogProps { + visible: boolean; + onCancel: () => void; + onSuccess: () => void; +} + +const COCO_CLASSES = [ + { id: 0, name: "person", label: "人" }, + { id: 1, name: "bicycle", label: "自行车" }, + { id: 2, name: "car", label: "汽车" }, + { id: 3, name: "motorcycle", label: "摩托车" }, + { id: 4, name: "airplane", label: "飞机" }, + { id: 5, name: "bus", label: "公交车" }, + { id: 6, name: "train", label: "火车" }, + { id: 7, name: "truck", label: "卡车" }, + { id: 8, name: "boat", label: "船" }, + { id: 9, name: "traffic light", label: "红绿灯" }, + { id: 10, name: "fire hydrant", label: "消防栓" }, + { id: 11, name: "stop sign", label: "停止标志" }, + { id: 12, name: "parking meter", label: "停车计时器" }, + { id: 13, name: "bench", label: "长椅" }, + { id: 14, name: "bird", label: "鸟" }, + { id: 15, name: "cat", label: "猫" }, + { id: 16, name: "dog", label: "狗" }, + { id: 17, name: "horse", label: "马" }, + { id: 18, name: "sheep", label: "羊" }, + { id: 19, name: "cow", label: "牛" }, + { id: 20, name: "elephant", label: "大象" }, + { id: 21, name: "bear", label: "熊" }, + { id: 22, name: "zebra", label: "斑马" }, + { id: 23, name: "giraffe", label: "长颈鹿" }, + { id: 24, name: "backpack", label: "背包" }, + { id: 25, name: "umbrella", label: "雨伞" }, + { id: 26, name: "handbag", label: "手提包" }, + { id: 27, name: "tie", label: "领带" }, + { id: 28, name: "suitcase", label: "行李箱" }, + { id: 29, name: "frisbee", label: "飞盘" }, + { id: 30, name: "skis", label: "滑雪板" }, + { id: 31, name: "snowboard", label: "滑雪板" }, + { id: 32, name: "sports ball", label: "球类" }, + { id: 33, name: "kite", label: "风筝" }, + { id: 34, name: "baseball bat", label: "棒球棒" }, + { id: 35, name: "baseball glove", label: "棒球手套" }, + { id: 36, name: "skateboard", label: "滑板" }, + { id: 37, name: "surfboard", label: "冲浪板" }, + { id: 38, name: "tennis racket", label: "网球拍" }, + { id: 39, name: "bottle", label: "瓶子" }, + { id: 40, name: "wine glass", label: "酒杯" }, + { id: 41, name: "cup", label: "杯子" }, + { id: 42, name: "fork", label: "叉子" }, + { id: 43, name: "knife", label: "刀" }, + { id: 44, name: "spoon", label: "勺子" }, + { id: 45, name: "bowl", label: "碗" }, + { id: 46, name: "banana", label: "香蕉" }, + { id: 47, name: "apple", label: "苹果" }, + { id: 48, name: "sandwich", label: "三明治" }, + { id: 49, name: "orange", label: "橙子" }, + { id: 50, name: "broccoli", label: "西兰花" }, + { id: 51, name: "carrot", label: "胡萝卜" }, + { id: 52, name: "hot dog", label: "热狗" }, + { id: 53, name: "pizza", label: "披萨" }, + { id: 54, name: "donut", label: "甜甜圈" }, + { id: 55, name: "cake", label: "蛋糕" }, + { id: 56, name: "chair", label: "椅子" }, + { id: 57, name: "couch", label: "沙发" }, + { id: 58, name: "potted plant", label: "盆栽" }, + { id: 59, name: "bed", label: "床" }, + { id: 60, name: "dining table", label: "餐桌" }, + { id: 61, name: "toilet", label: "马桶" }, + { id: 62, name: "tv", label: "电视" }, + { id: 63, name: "laptop", label: "笔记本电脑" }, + { id: 64, name: "mouse", label: "鼠标" }, + { id: 65, name: "remote", label: "遥控器" }, + { id: 66, name: "keyboard", label: "键盘" }, + { id: 67, name: "cell phone", label: "手机" }, + { id: 68, name: "microwave", label: "微波炉" }, + { id: 69, name: "oven", label: "烤箱" }, + { id: 70, name: "toaster", label: "烤面包机" }, + { id: 71, name: "sink", label: "水槽" }, + { id: 72, name: "refrigerator", label: "冰箱" }, + { id: 73, name: "book", label: "书" }, + { id: 74, name: "clock", label: "钟表" }, + { id: 75, name: "vase", label: "花瓶" }, + { id: 76, name: "scissors", label: "剪刀" }, + { id: 77, name: "teddy bear", label: "玩具熊" }, + { id: 78, name: "hair drier", label: "吹风机" }, + { id: 79, name: "toothbrush", label: "牙刷" }, +]; + +export default function CreateAutoAnnotationDialog({ + visible, + onCancel, + onSuccess, +}: CreateAutoAnnotationDialogProps) { + const [form] = Form.useForm(); + const [loading, setLoading] = useState(false); + const [datasets, setDatasets] = useState([]); + const [selectAllClasses, setSelectAllClasses] = useState(true); + const [selectedFilesMap, setSelectedFilesMap] = useState>({}); + const [selectedDataset, setSelectedDataset] = useState(null); + const [imageFileCount, setImageFileCount] = useState(0); + + useEffect(() => { + if (visible) { + fetchDatasets(); + form.resetFields(); + form.setFieldsValue({ + modelSize: "l", + confThreshold: 0.7, + targetClasses: [], + }); + } + }, [visible, form]); + + const fetchDatasets = async () => { + try { + const { data } = await queryDatasetsUsingGet({ + page: 0, + pageSize: 1000, + }); + const imageDatasets = (data.content || []) + .map(mapDataset) + .filter((ds: any) => ds.datasetType === DatasetType.IMAGE); + setDatasets(imageDatasets); + } catch (error) { + console.error("Failed to fetch datasets:", error); + message.error("获取数据集列表失败"); + } + }; + + useEffect(() => { + const imageExtensions = [".jpg", ".jpeg", ".png", ".bmp", ".gif", ".tiff", ".webp"]; + const count = Object.values(selectedFilesMap).filter((file) => { + const ext = file.fileName?.toLowerCase().match(/\.[^.]+$/)?.[0] || ""; + return imageExtensions.includes(ext); + }).length; + setImageFileCount(count); + }, [selectedFilesMap]); + + const handleSubmit = async () => { + try { + const values = await form.validateFields(); + + if (imageFileCount === 0) { + message.error("请至少选择一个图像文件"); + return; + } + + setLoading(true); + + const imageExtensions = [".jpg", ".jpeg", ".png", ".bmp", ".gif", ".tiff", ".webp"]; + const imageFileIds = Object.values(selectedFilesMap) + .filter((file) => { + const ext = file.fileName?.toLowerCase().match(/\.[^.]+$/)?.[0] || ""; + return imageExtensions.includes(ext); + }) + .map((file) => file.id); + + const payload = { + name: values.name, + datasetId: values.datasetId, + fileIds: imageFileIds, + config: { + modelSize: values.modelSize, + confThreshold: values.confThreshold, + targetClasses: selectAllClasses ? [] : values.targetClasses || [], + outputDatasetName: values.outputDatasetName || undefined, + }, + }; + + await createAutoAnnotationTaskUsingPost(payload); + message.success("自动标注任务创建成功"); + onSuccess(); + } catch (error: any) { + if (error.errorFields) return; + console.error("Failed to create auto annotation task:", error); + message.error(error.message || "创建任务失败"); + } finally { + setLoading(false); + } + }; + + const handleClassSelectionChange = (checked: boolean) => { + setSelectAllClasses(checked); + if (checked) { + form.setFieldsValue({ targetClasses: [] }); + } + }; + + return ( + +
+ + + + + + { + setSelectedDataset(dataset); + form.setFieldsValue({ datasetId: dataset?.id ?? "" }); + }} + datasetTypeFilter={DatasetType.IMAGE} + /> + {selectedDataset && ( +
+ 当前数据集:{selectedDataset.name} - 已选择 + {imageFileCount} 个图像文件 +
+ )} +
+ + + + + + + + + `${(v || 0) * 100}%` }} /> + + + + handleClassSelectionChange(e.target.checked)}> + 选中所有类别 + + {!selectAllClasses && ( + + + + )} + + + + + + +
+ ); +} \ No newline at end of file diff --git a/frontend/src/pages/DataAnnotation/AutoAnnotation/index.ts b/frontend/src/pages/DataAnnotation/AutoAnnotation/index.ts new file mode 100644 index 00000000..fd1dd1b6 --- /dev/null +++ b/frontend/src/pages/DataAnnotation/AutoAnnotation/index.ts @@ -0,0 +1 @@ +export { default } from "./AutoAnnotation"; \ No newline at end of file diff --git a/frontend/src/pages/DataAnnotation/Create/components/CreateAnnotationTaskDialog.tsx b/frontend/src/pages/DataAnnotation/Create/components/CreateAnnotationTaskDialog.tsx index 8652fb1b..2c186c88 100644 --- a/frontend/src/pages/DataAnnotation/Create/components/CreateAnnotationTaskDialog.tsx +++ b/frontend/src/pages/DataAnnotation/Create/components/CreateAnnotationTaskDialog.tsx @@ -1,192 +1,489 @@ -import { queryDatasetsUsingGet } from "@/pages/DataManagement/dataset.api"; -import { mapDataset } from "@/pages/DataManagement/dataset.const"; -import { Button, Form, Input, Modal, Select, message } from "antd"; -import TextArea from "antd/es/input/TextArea"; -import { useEffect, useState } from "react"; -import { createAnnotationTaskUsingPost, queryAnnotationTemplatesUsingGet } from "../../annotation.api"; -import { Dataset } from "@/pages/DataManagement/dataset.model"; -import type { AnnotationTemplate } from "../../annotation.model"; - -export default function CreateAnnotationTask({ - open, - onClose, - onRefresh, -}: { - open: boolean; - onClose: () => void; - onRefresh: () => void; -}) { - const [form] = Form.useForm(); - const [datasets, setDatasets] = useState([]); - const [templates, setTemplates] = useState([]); - const [submitting, setSubmitting] = useState(false); - const [nameManuallyEdited, setNameManuallyEdited] = useState(false); - - useEffect(() => { - if (!open) return; - const fetchData = async () => { - try { - // Fetch datasets - const { data: datasetData } = await queryDatasetsUsingGet({ - page: 0, - pageSize: 1000, // Use camelCase for HTTP params - }); - setDatasets(datasetData.content.map(mapDataset) || []); - - // Fetch templates - const templateResponse = await queryAnnotationTemplatesUsingGet({ - page: 1, - size: 100, // Backend max is 100 (template API uses 'size' not 'pageSize') - }); - - // The API returns: {code, message, data: {content, total, page, ...}} - if (templateResponse.code === 200 && templateResponse.data) { - const fetchedTemplates = templateResponse.data.content || []; - console.log("Fetched templates:", fetchedTemplates); - setTemplates(fetchedTemplates); - } else { - console.error("Failed to fetch templates:", templateResponse); - setTemplates([]); - } - } catch (error) { - console.error("Error fetching data:", error); - setTemplates([]); - } - }; - fetchData(); - }, [open]); - - // Reset form and manual-edit flag when modal opens - useEffect(() => { - if (open) { - form.resetFields(); - setNameManuallyEdited(false); - } - }, [open, form]); - - const handleSubmit = async () => { - try { - const values = await form.validateFields(); - setSubmitting(true); - // Send templateId instead of labelingConfig - const requestData = { - name: values.name, - description: values.description, - datasetId: values.datasetId, - templateId: values.templateId, - }; - await createAnnotationTaskUsingPost(requestData); - message?.success?.("创建标注任务成功"); - onClose(); - onRefresh(); - } catch (err: any) { - console.error("Create annotation task failed", err); - const msg = err?.message || err?.data?.message || "创建失败,请稍后重试"; - (message as any)?.error?.(msg); - } finally { - setSubmitting(false); - } - }; - - return ( - - - - - } - width={800} - > -
- {/* 数据集 与 标注工程名称 并排显示(数据集在左) */} -
- - setNameManuallyEdited(true)} - /> - -
- {/* 描述变为可选 */} - -